/ Check-in [d4cce2c7]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge the latest trunk changes, including the multi-threaded sorter, into the sessions branch.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | sessions
Files: files | file ages | folders
SHA1: d4cce2c71e64ab7b6a65a81b88b69445ed859351
User & Date: drh 2014-09-02 15:49:47
Context
2014-09-08
15:04
Merge support for large files on Android from trunk. check-in: c2885c6b user: drh tags: sessions
2014-09-02
15:49
Merge the latest trunk changes, including the multi-threaded sorter, into the sessions branch. check-in: d4cce2c7 user: drh tags: sessions
2014-09-01
23:06
Update comments in the ANALYZE command that describe how the Stat4Accum objecct is passed around within the VDBE. No changes to functional code. check-in: 9779c7a9 user: drh tags: trunk
2014-08-26
02:15
Merge recent performance enhancements and the CAST operator enhancements into the sessions branch. check-in: 08ae974a user: drh tags: sessions
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to Makefile.in.

   176    176            main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
   177    177            memjournal.lo \
   178    178            mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \
   179    179            notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
   180    180            pager.lo parse.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \
   181    181            random.lo resolve.lo rowset.lo rtree.lo \
   182    182            sqlite3session.lo select.lo status.lo \
   183         -         table.lo tokenize.lo trigger.lo \
          183  +         table.lo threads.lo tokenize.lo trigger.lo \
   184    184            update.lo util.lo vacuum.lo \
   185    185            vdbe.lo vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbemem.lo vdbesort.lo \
   186    186            vdbetrace.lo wal.lo walker.lo where.lo utf.lo vtab.lo
   187    187   
   188    188   # Object files for the amalgamation.
   189    189   #
   190    190   LIBOBJS1 = sqlite3.lo
................................................................................
   262    262     $(TOP)/src/status.c \
   263    263     $(TOP)/src/shell.c \
   264    264     $(TOP)/src/sqlite.h.in \
   265    265     $(TOP)/src/sqlite3ext.h \
   266    266     $(TOP)/src/sqliteInt.h \
   267    267     $(TOP)/src/sqliteLimit.h \
   268    268     $(TOP)/src/table.c \
          269  +  $(TOP)/src/threads.c \
   269    270     $(TOP)/src/tclsqlite.c \
   270    271     $(TOP)/src/tokenize.c \
   271    272     $(TOP)/src/trigger.c \
   272    273     $(TOP)/src/utf.c \
   273    274     $(TOP)/src/update.c \
   274    275     $(TOP)/src/util.c \
   275    276     $(TOP)/src/vacuum.c \
................................................................................
   743    744   
   744    745   status.lo:	$(TOP)/src/status.c $(HDR)
   745    746   	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/status.c
   746    747   
   747    748   table.lo:	$(TOP)/src/table.c $(HDR)
   748    749   	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/table.c
   749    750   
          751  +threads.lo:	$(TOP)/src/threads.c $(HDR)
          752  +	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/threads.c
          753  +
   750    754   tokenize.lo:	$(TOP)/src/tokenize.c keywordhash.h $(HDR)
   751    755   	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/tokenize.c
   752    756   
   753    757   trigger.lo:	$(TOP)/src/trigger.c $(HDR)
   754    758   	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/trigger.c
   755    759   
   756    760   update.lo:	$(TOP)/src/update.c $(HDR)

Changes to Makefile.msc.

   643    643            main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
   644    644            memjournal.lo \
   645    645            mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \
   646    646            notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
   647    647            pager.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \
   648    648            random.lo resolve.lo rowset.lo rtree.lo \
   649    649            sqlite3session.lo select.lo status.lo \
   650         -         table.lo tokenize.lo trigger.lo \
          650  +         table.lo threads.lo tokenize.lo trigger.lo \
   651    651            update.lo util.lo vacuum.lo \
   652    652            vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbemem.lo vdbesort.lo \
   653    653            vdbetrace.lo wal.lo walker.lo where.lo utf.lo vtab.lo
   654    654   
   655    655   # Object files for the amalgamation.
   656    656   #
   657    657   LIBOBJS1 = sqlite3.lo
................................................................................
   740    740     $(TOP)\src\status.c \
   741    741     $(TOP)\src\shell.c \
   742    742     $(TOP)\src\sqlite.h.in \
   743    743     $(TOP)\src\sqlite3ext.h \
   744    744     $(TOP)\src\sqliteInt.h \
   745    745     $(TOP)\src\sqliteLimit.h \
   746    746     $(TOP)\src\table.c \
          747  +  $(TOP)\src\threads.c \
   747    748     $(TOP)\src\tclsqlite.c \
   748    749     $(TOP)\src\tokenize.c \
   749    750     $(TOP)\src\trigger.c \
   750    751     $(TOP)\src\utf.c \
   751    752     $(TOP)\src\update.c \
   752    753     $(TOP)\src\util.c \
   753    754     $(TOP)\src\vacuum.c \
................................................................................
  1237   1238   	$(LTCOMPILE) -c $(TOP)\src\select.c
  1238   1239   
  1239   1240   status.lo:	$(TOP)\src\status.c $(HDR)
  1240   1241   	$(LTCOMPILE) -c $(TOP)\src\status.c
  1241   1242   
  1242   1243   table.lo:	$(TOP)\src\table.c $(HDR)
  1243   1244   	$(LTCOMPILE) -c $(TOP)\src\table.c
         1245  +
         1246  +threads.lo:	$(TOP)\src\threads.c $(HDR)
         1247  +	$(LTCOMPILE) -c $(TOP)\src\threads.c
  1244   1248   
  1245   1249   tokenize.lo:	$(TOP)\src\tokenize.c keywordhash.h $(HDR)
  1246   1250   	$(LTCOMPILE) -c $(TOP)\src\tokenize.c
  1247   1251   
  1248   1252   trigger.lo:	$(TOP)\src\trigger.c $(HDR)
  1249   1253   	$(LTCOMPILE) -c $(TOP)\src\trigger.c
  1250   1254   

Changes to main.mk.

    63     63            icu.o insert.o journal.o legacy.o loadext.o \
    64     64            main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
    65     65            memjournal.o \
    66     66            mutex.o mutex_noop.o mutex_unix.o mutex_w32.o \
    67     67            notify.o opcodes.o os.o os_unix.o os_win.o \
    68     68            pager.o pcache.o pcache1.o pragma.o prepare.o printf.o \
    69     69            random.o resolve.o rowset.o rtree.o select.o status.o \
    70         -         table.o tokenize.o trigger.o \
           70  +         table.o threads.o tokenize.o trigger.o \
    71     71            update.o util.o vacuum.o \
    72     72            vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \
    73     73   	 vdbetrace.o wal.o walker.o where.o utf.o vtab.o
    74     74   
    75     75   LIBOBJ += sqlite3session.o
    76     76   
    77     77   
................................................................................
   145    145     $(TOP)/src/shell.c \
   146    146     $(TOP)/src/sqlite.h.in \
   147    147     $(TOP)/src/sqlite3ext.h \
   148    148     $(TOP)/src/sqliteInt.h \
   149    149     $(TOP)/src/sqliteLimit.h \
   150    150     $(TOP)/src/table.c \
   151    151     $(TOP)/src/tclsqlite.c \
          152  +  $(TOP)/src/threads.c \
   152    153     $(TOP)/src/tokenize.c \
   153    154     $(TOP)/src/trigger.c \
   154    155     $(TOP)/src/utf.c \
   155    156     $(TOP)/src/update.c \
   156    157     $(TOP)/src/util.c \
   157    158     $(TOP)/src/vacuum.c \
   158    159     $(TOP)/src/vdbe.c \
................................................................................
   317    318     $(TOP)/src/pragma.c \
   318    319     $(TOP)/src/prepare.c \
   319    320     $(TOP)/src/printf.c \
   320    321     $(TOP)/src/random.c \
   321    322     $(TOP)/src/pcache.c \
   322    323     $(TOP)/src/pcache1.c \
   323    324     $(TOP)/src/select.c \
          325  +  $(TOP)/src/threads.c \
   324    326     $(TOP)/src/tokenize.c \
   325    327     $(TOP)/src/utf.c \
   326    328     $(TOP)/src/util.c \
   327    329     $(TOP)/src/vdbeapi.c \
   328    330     $(TOP)/src/vdbeaux.c \
   329    331     $(TOP)/src/vdbe.c \
   330    332     $(TOP)/src/vdbemem.c \

Changes to src/analyze.c.

   383    383   **
   384    384   ** For indexes on ordinary rowid tables, N==K+1.  But for indexes on
   385    385   ** WITHOUT ROWID tables, N=K+P where P is the number of columns in the
   386    386   ** PRIMARY KEY of the table.  The covering index that implements the
   387    387   ** original WITHOUT ROWID table as N==K as a special case.
   388    388   **
   389    389   ** This routine allocates the Stat4Accum object in heap memory. The return 
   390         -** value is a pointer to the the Stat4Accum object encoded as a blob (i.e. 
   391         -** the size of the blob is sizeof(void*) bytes). 
          390  +** value is a pointer to the the Stat4Accum object.  The datatype of the
          391  +** return value is BLOB, but it is really just a pointer to the Stat4Accum
          392  +** object.
   392    393   */
   393    394   static void statInit(
   394    395     sqlite3_context *context,
   395    396     int argc,
   396    397     sqlite3_value **argv
   397    398   ){
   398    399     Stat4Accum *p;
................................................................................
   462    463     
   463    464       for(i=0; i<nCol; i++){
   464    465         p->aBest[i].iCol = i;
   465    466       }
   466    467     }
   467    468   #endif
   468    469   
   469         -  /* Return a pointer to the allocated object to the caller */
   470         -  sqlite3_result_blob(context, p, sizeof(p), stat4Destructor);
          470  +  /* Return a pointer to the allocated object to the caller.  Note that
          471  +  ** only the pointer (the 2nd parameter) matters.  The size of the object
          472  +  ** (given by the 3rd parameter) is never used and can be any positive
          473  +  ** value. */
          474  +  sqlite3_result_blob(context, p, sizeof(*p), stat4Destructor);
   471    475   }
   472    476   static const FuncDef statInitFuncdef = {
   473    477     2+IsStat34,      /* nArg */
   474    478     SQLITE_UTF8,     /* funcFlags */
   475    479     0,               /* pUserData */
   476    480     0,               /* pNext */
   477    481     statInit,        /* xFunc */
................................................................................
   789    793   #define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
   790    794   #define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */
   791    795   
   792    796   /*
   793    797   ** Implementation of the stat_get(P,J) SQL function.  This routine is
   794    798   ** used to query statistical information that has been gathered into
   795    799   ** the Stat4Accum object by prior calls to stat_push().  The P parameter
   796         -** is a BLOB which is decoded into a pointer to the Stat4Accum objects.
          800  +** has type BLOB but it is really just a pointer to the Stat4Accum object.
   797    801   ** The content to returned is determined by the parameter J
   798    802   ** which is one of the STAT_GET_xxxx values defined above.
   799    803   **
   800    804   ** If neither STAT3 nor STAT4 are enabled, then J is always
   801    805   ** STAT_GET_STAT1 and is hence omitted and this routine becomes
   802    806   ** a one-parameter function, stat_get(P), that always returns the
   803    807   ** stat1 table entry information.

Changes to src/btree.c.

  4509   4509   static int moveToRightmost(BtCursor *pCur){
  4510   4510     Pgno pgno;
  4511   4511     int rc = SQLITE_OK;
  4512   4512     MemPage *pPage = 0;
  4513   4513   
  4514   4514     assert( cursorHoldsMutex(pCur) );
  4515   4515     assert( pCur->eState==CURSOR_VALID );
  4516         -  while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){
         4516  +  while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){
  4517   4517       pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
  4518   4518       pCur->aiIdx[pCur->iPage] = pPage->nCell;
  4519   4519       rc = moveToChild(pCur, pgno);
         4520  +    if( rc ) return rc;
  4520   4521     }
  4521         -  if( rc==SQLITE_OK ){
  4522         -    pCur->aiIdx[pCur->iPage] = pPage->nCell-1;
  4523         -    pCur->info.nSize = 0;
  4524         -    pCur->curFlags &= ~BTCF_ValidNKey;
  4525         -  }
  4526         -  return rc;
         4522  +  pCur->aiIdx[pCur->iPage] = pPage->nCell-1;
         4523  +  assert( pCur->info.nSize==0 );
         4524  +  assert( (pCur->curFlags & BTCF_ValidNKey)==0 );
         4525  +  return SQLITE_OK;
  4527   4526   }
  4528   4527   
  4529   4528   /* Move the cursor to the first entry in the table.  Return SQLITE_OK
  4530   4529   ** on success.  Set *pRes to 0 if the cursor actually points to something
  4531   4530   ** or set *pRes to 1 if the table is empty.
  4532   4531   */
  4533   4532   int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
................................................................................
  4650   4649         *pRes = -1;
  4651   4650         return SQLITE_OK;
  4652   4651       }
  4653   4652     }
  4654   4653   
  4655   4654     if( pIdxKey ){
  4656   4655       xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
  4657         -    pIdxKey->isCorrupt = 0;
         4656  +    pIdxKey->errCode = 0;
  4658   4657       assert( pIdxKey->default_rc==1 
  4659   4658            || pIdxKey->default_rc==0 
  4660   4659            || pIdxKey->default_rc==-1
  4661   4660       );
  4662   4661     }else{
  4663   4662       xRecordCompare = 0; /* All keys are integers */
  4664   4663     }
................................................................................
  4774   4773             if( rc ){
  4775   4774               sqlite3_free(pCellKey);
  4776   4775               goto moveto_finish;
  4777   4776             }
  4778   4777             c = xRecordCompare(nCell, pCellKey, pIdxKey, 0);
  4779   4778             sqlite3_free(pCellKey);
  4780   4779           }
  4781         -        assert( pIdxKey->isCorrupt==0 || c==0 );
         4780  +        assert( 
         4781  +            (pIdxKey->errCode!=SQLITE_CORRUPT || c==0)
         4782  +         && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed)
         4783  +        );
  4782   4784           if( c<0 ){
  4783   4785             lwr = idx+1;
  4784   4786           }else if( c>0 ){
  4785   4787             upr = idx-1;
  4786   4788           }else{
  4787   4789             assert( c==0 );
  4788   4790             *pRes = 0;
  4789   4791             rc = SQLITE_OK;
  4790   4792             pCur->aiIdx[pCur->iPage] = (u16)idx;
  4791         -          if( pIdxKey->isCorrupt ) rc = SQLITE_CORRUPT;
         4793  +          if( pIdxKey->errCode ) rc = SQLITE_CORRUPT;
  4792   4794             goto moveto_finish;
  4793   4795           }
  4794   4796           if( lwr>upr ) break;
  4795   4797           assert( lwr+upr>=0 );
  4796   4798           idx = (lwr+upr)>>1;  /* idx = (lwr+upr)/2 */
  4797   4799         }
  4798   4800       }
................................................................................
  4838   4840   }
  4839   4841   
  4840   4842   /*
  4841   4843   ** Advance the cursor to the next entry in the database.  If
  4842   4844   ** successful then set *pRes=0.  If the cursor
  4843   4845   ** was already pointing to the last entry in the database before
  4844   4846   ** this routine was called, then set *pRes=1.
         4847  +**
         4848  +** The main entry point is sqlite3BtreeNext().  That routine is optimized
         4849  +** for the common case of merely incrementing the cell counter BtCursor.aiIdx
         4850  +** to the next cell on the current page.  The (slower) btreeNext() helper
         4851  +** routine is called when it is necessary to move to a different page or
         4852  +** to restore the cursor.
  4845   4853   **
  4846   4854   ** The calling function will set *pRes to 0 or 1.  The initial *pRes value
  4847   4855   ** will be 1 if the cursor being stepped corresponds to an SQL index and
  4848   4856   ** if this routine could have been skipped if that SQL index had been
  4849   4857   ** a unique index.  Otherwise the caller will have set *pRes to zero.
  4850   4858   ** Zero is the common case. The btree implementation is free to use the
  4851   4859   ** initial *pRes value as a hint to improve performance, but the current
  4852   4860   ** SQLite btree implementation does not. (Note that the comdb2 btree
  4853   4861   ** implementation does use this hint, however.)
  4854   4862   */
  4855         -int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
         4863  +static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){
  4856   4864     int rc;
  4857   4865     int idx;
  4858   4866     MemPage *pPage;
  4859   4867   
  4860   4868     assert( cursorHoldsMutex(pCur) );
  4861         -  assert( pRes!=0 );
  4862         -  assert( *pRes==0 || *pRes==1 );
  4863   4869     assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
         4870  +  assert( *pRes==0 );
  4864   4871     if( pCur->eState!=CURSOR_VALID ){
  4865         -    invalidateOverflowCache(pCur);
         4872  +    assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
  4866   4873       rc = restoreCursorPosition(pCur);
  4867   4874       if( rc!=SQLITE_OK ){
  4868         -      *pRes = 0;
  4869   4875         return rc;
  4870   4876       }
  4871   4877       if( CURSOR_INVALID==pCur->eState ){
  4872   4878         *pRes = 1;
  4873   4879         return SQLITE_OK;
  4874   4880       }
  4875   4881       if( pCur->skipNext ){
  4876   4882         assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT );
  4877   4883         pCur->eState = CURSOR_VALID;
  4878   4884         if( pCur->skipNext>0 ){
  4879   4885           pCur->skipNext = 0;
  4880         -        *pRes = 0;
  4881   4886           return SQLITE_OK;
  4882   4887         }
  4883   4888         pCur->skipNext = 0;
  4884   4889       }
  4885   4890     }
  4886   4891   
  4887   4892     pPage = pCur->apPage[pCur->iPage];
................................................................................
  4891   4896     /* If the database file is corrupt, it is possible for the value of idx 
  4892   4897     ** to be invalid here. This can only occur if a second cursor modifies
  4893   4898     ** the page while cursor pCur is holding a reference to it. Which can
  4894   4899     ** only happen if the database is corrupt in such a way as to link the
  4895   4900     ** page into more than one b-tree structure. */
  4896   4901     testcase( idx>pPage->nCell );
  4897   4902   
  4898         -  pCur->info.nSize = 0;
  4899         -  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
  4900   4903     if( idx>=pPage->nCell ){
  4901   4904       if( !pPage->leaf ){
  4902   4905         rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
  4903         -      if( rc ){
  4904         -        *pRes = 0;
  4905         -        return rc;
  4906         -      }
  4907         -      rc = moveToLeftmost(pCur);
  4908         -      *pRes = 0;
  4909         -      return rc;
         4906  +      if( rc ) return rc;
         4907  +      return moveToLeftmost(pCur);
  4910   4908       }
  4911   4909       do{
  4912   4910         if( pCur->iPage==0 ){
  4913   4911           *pRes = 1;
  4914   4912           pCur->eState = CURSOR_INVALID;
  4915   4913           return SQLITE_OK;
  4916   4914         }
  4917   4915         moveToParent(pCur);
  4918   4916         pPage = pCur->apPage[pCur->iPage];
  4919   4917       }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell );
  4920         -    *pRes = 0;
  4921   4918       if( pPage->intKey ){
  4922         -      rc = sqlite3BtreeNext(pCur, pRes);
         4919  +      return sqlite3BtreeNext(pCur, pRes);
  4923   4920       }else{
  4924         -      rc = SQLITE_OK;
         4921  +      return SQLITE_OK;
  4925   4922       }
  4926         -    return rc;
  4927   4923     }
         4924  +  if( pPage->leaf ){
         4925  +    return SQLITE_OK;
         4926  +  }else{
         4927  +    return moveToLeftmost(pCur);
         4928  +  }
         4929  +}
         4930  +int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
         4931  +  MemPage *pPage;
         4932  +  assert( cursorHoldsMutex(pCur) );
         4933  +  assert( pRes!=0 );
         4934  +  assert( *pRes==0 || *pRes==1 );
         4935  +  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
         4936  +  pCur->info.nSize = 0;
         4937  +  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
  4928   4938     *pRes = 0;
         4939  +  if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes);
         4940  +  pPage = pCur->apPage[pCur->iPage];
         4941  +  if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){
         4942  +    pCur->aiIdx[pCur->iPage]--;
         4943  +    return btreeNext(pCur, pRes);
         4944  +  }
  4929   4945     if( pPage->leaf ){
  4930   4946       return SQLITE_OK;
         4947  +  }else{
         4948  +    return moveToLeftmost(pCur);
  4931   4949     }
  4932         -  rc = moveToLeftmost(pCur);
  4933         -  return rc;
  4934   4950   }
  4935         -
  4936   4951   
  4937   4952   /*
  4938   4953   ** Step the cursor to the back to the previous entry in the database.  If
  4939   4954   ** successful then set *pRes=0.  If the cursor
  4940   4955   ** was already pointing to the first entry in the database before
  4941   4956   ** this routine was called, then set *pRes=1.
         4957  +**
         4958  +** The main entry point is sqlite3BtreePrevious().  That routine is optimized
         4959  +** for the common case of merely decrementing the cell counter BtCursor.aiIdx
         4960  +** to the previous cell on the current page.  The (slower) btreePrevious() helper
         4961  +** routine is called when it is necessary to move to a different page or
         4962  +** to restore the cursor.
  4942   4963   **
  4943   4964   ** The calling function will set *pRes to 0 or 1.  The initial *pRes value
  4944   4965   ** will be 1 if the cursor being stepped corresponds to an SQL index and
  4945   4966   ** if this routine could have been skipped if that SQL index had been
  4946   4967   ** a unique index.  Otherwise the caller will have set *pRes to zero.
  4947   4968   ** Zero is the common case. The btree implementation is free to use the
  4948   4969   ** initial *pRes value as a hint to improve performance, but the current
  4949   4970   ** SQLite btree implementation does not. (Note that the comdb2 btree
  4950   4971   ** implementation does use this hint, however.)
  4951   4972   */
  4952         -int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
         4973  +static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){
  4953   4974     int rc;
  4954   4975     MemPage *pPage;
  4955   4976   
  4956   4977     assert( cursorHoldsMutex(pCur) );
  4957   4978     assert( pRes!=0 );
  4958         -  assert( *pRes==0 || *pRes==1 );
         4979  +  assert( *pRes==0 );
  4959   4980     assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
  4960         -  pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl);
         4981  +  assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 );
         4982  +  assert( pCur->info.nSize==0 );
  4961   4983     if( pCur->eState!=CURSOR_VALID ){
  4962         -    if( ALWAYS(pCur->eState>=CURSOR_REQUIRESEEK) ){
  4963         -      rc = btreeRestoreCursorPosition(pCur);
  4964         -      if( rc!=SQLITE_OK ){
  4965         -        *pRes = 0;
  4966         -        return rc;
  4967         -      }
         4984  +    assert( pCur->eState>=CURSOR_REQUIRESEEK );
         4985  +    rc = btreeRestoreCursorPosition(pCur);
         4986  +    if( rc!=SQLITE_OK ){
         4987  +      return rc;
  4968   4988       }
  4969   4989       if( CURSOR_INVALID==pCur->eState ){
  4970   4990         *pRes = 1;
  4971   4991         return SQLITE_OK;
  4972   4992       }
  4973   4993       if( pCur->skipNext ){
  4974   4994         assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT );
  4975   4995         pCur->eState = CURSOR_VALID;
  4976   4996         if( pCur->skipNext<0 ){
  4977   4997           pCur->skipNext = 0;
  4978         -        *pRes = 0;
  4979   4998           return SQLITE_OK;
  4980   4999         }
  4981   5000         pCur->skipNext = 0;
  4982   5001       }
  4983   5002     }
  4984   5003   
  4985   5004     pPage = pCur->apPage[pCur->iPage];
  4986   5005     assert( pPage->isInit );
  4987   5006     if( !pPage->leaf ){
  4988   5007       int idx = pCur->aiIdx[pCur->iPage];
  4989   5008       rc = moveToChild(pCur, get4byte(findCell(pPage, idx)));
  4990         -    if( rc ){
  4991         -      *pRes = 0;
  4992         -      return rc;
  4993         -    }
         5009  +    if( rc ) return rc;
  4994   5010       rc = moveToRightmost(pCur);
  4995   5011     }else{
  4996   5012       while( pCur->aiIdx[pCur->iPage]==0 ){
  4997   5013         if( pCur->iPage==0 ){
  4998   5014           pCur->eState = CURSOR_INVALID;
  4999   5015           *pRes = 1;
  5000   5016           return SQLITE_OK;
  5001   5017         }
  5002   5018         moveToParent(pCur);
  5003   5019       }
  5004         -    pCur->info.nSize = 0;
  5005         -    pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
         5020  +    assert( pCur->info.nSize==0 );
         5021  +    assert( (pCur->curFlags & (BTCF_ValidNKey|BTCF_ValidOvfl))==0 );
  5006   5022   
  5007   5023       pCur->aiIdx[pCur->iPage]--;
  5008   5024       pPage = pCur->apPage[pCur->iPage];
  5009   5025       if( pPage->intKey && !pPage->leaf ){
  5010   5026         rc = sqlite3BtreePrevious(pCur, pRes);
  5011   5027       }else{
  5012   5028         rc = SQLITE_OK;
  5013   5029       }
  5014   5030     }
         5031  +  return rc;
         5032  +}
         5033  +int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
         5034  +  assert( cursorHoldsMutex(pCur) );
         5035  +  assert( pRes!=0 );
         5036  +  assert( *pRes==0 || *pRes==1 );
         5037  +  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
  5015   5038     *pRes = 0;
  5016         -  return rc;
         5039  +  pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey);
         5040  +  pCur->info.nSize = 0;
         5041  +  if( pCur->eState!=CURSOR_VALID
         5042  +   || pCur->aiIdx[pCur->iPage]==0
         5043  +   || pCur->apPage[pCur->iPage]->leaf==0
         5044  +  ){
         5045  +    return btreePrevious(pCur, pRes);
         5046  +  }
         5047  +  pCur->aiIdx[pCur->iPage]--;
         5048  +  return SQLITE_OK;
  5017   5049   }
  5018   5050   
  5019   5051   /*
  5020   5052   ** Allocate a new page from the database file.
  5021   5053   **
  5022   5054   ** The new page is marked as dirty.  (In other words, sqlite3PagerWrite()
  5023   5055   ** has already been called on the new page.)  The new page has also

Changes to src/build.c.

  2675   2675     }else{
  2676   2676       tnum = pIndex->tnum;
  2677   2677     }
  2678   2678     pKey = sqlite3KeyInfoOfIndex(pParse, pIndex);
  2679   2679   
  2680   2680     /* Open the sorter cursor if we are to use one. */
  2681   2681     iSorter = pParse->nTab++;
  2682         -  sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, 0, (char*)
         2682  +  sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, pIndex->nKeyCol, (char*)
  2683   2683                       sqlite3KeyInfoRef(pKey), P4_KEYINFO);
  2684   2684   
  2685   2685     /* Open the table. Loop through all rows of the table, inserting index
  2686   2686     ** records into the sorter. */
  2687   2687     sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
  2688   2688     addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); VdbeCoverage(v);
  2689   2689     regRecord = sqlite3GetTempReg(pParse);
................................................................................
  3024   3024       }
  3025   3025       if( j>=pTab->nCol ){
  3026   3026         sqlite3ErrorMsg(pParse, "table %s has no column named %s",
  3027   3027           pTab->zName, zColName);
  3028   3028         pParse->checkSchema = 1;
  3029   3029         goto exit_create_index;
  3030   3030       }
  3031         -    assert( pTab->nCol<=0x7fff && j<=0x7fff );
         3031  +    assert( j<=0x7fff );
  3032   3032       pIndex->aiColumn[i] = (i16)j;
  3033   3033       if( pListItem->pExpr ){
  3034   3034         int nColl;
  3035   3035         assert( pListItem->pExpr->op==TK_COLLATE );
  3036   3036         zColl = pListItem->pExpr->u.zToken;
  3037   3037         nColl = sqlite3Strlen30(zColl) + 1;
  3038   3038         assert( nExtra>=nColl );

Changes to src/expr.c.

  1908   1908         assert( pExpr->op==TK_EXISTS || pExpr->op==TK_SELECT );
  1909   1909   
  1910   1910         assert( ExprHasProperty(pExpr, EP_xIsSelect) );
  1911   1911         pSel = pExpr->x.pSelect;
  1912   1912         sqlite3SelectDestInit(&dest, 0, ++pParse->nMem);
  1913   1913         if( pExpr->op==TK_SELECT ){
  1914   1914           dest.eDest = SRT_Mem;
         1915  +        dest.iSdst = dest.iSDParm;
  1915   1916           sqlite3VdbeAddOp2(v, OP_Null, 0, dest.iSDParm);
  1916   1917           VdbeComment((v, "Init subquery result"));
  1917   1918         }else{
  1918   1919           dest.eDest = SRT_Exists;
  1919   1920           sqlite3VdbeAddOp2(v, OP_Integer, 0, dest.iSDParm);
  1920   1921           VdbeComment((v, "Init EXISTS result"));
  1921   1922         }

Changes to src/main.c.

  2095   2095     SQLITE_MAX_COMPOUND_SELECT,
  2096   2096     SQLITE_MAX_VDBE_OP,
  2097   2097     SQLITE_MAX_FUNCTION_ARG,
  2098   2098     SQLITE_MAX_ATTACHED,
  2099   2099     SQLITE_MAX_LIKE_PATTERN_LENGTH,
  2100   2100     SQLITE_MAX_VARIABLE_NUMBER,      /* IMP: R-38091-32352 */
  2101   2101     SQLITE_MAX_TRIGGER_DEPTH,
         2102  +  SQLITE_MAX_WORKER_THREADS,
  2102   2103   };
  2103   2104   
  2104   2105   /*
  2105   2106   ** Make sure the hard limits are set to reasonable values
  2106   2107   */
  2107   2108   #if SQLITE_MAX_LENGTH<100
  2108   2109   # error SQLITE_MAX_LENGTH must be at least 100
................................................................................
  2130   2131   #endif
  2131   2132   #if SQLITE_MAX_COLUMN>32767
  2132   2133   # error SQLITE_MAX_COLUMN must not exceed 32767
  2133   2134   #endif
  2134   2135   #if SQLITE_MAX_TRIGGER_DEPTH<1
  2135   2136   # error SQLITE_MAX_TRIGGER_DEPTH must be at least 1
  2136   2137   #endif
         2138  +#if SQLITE_MAX_WORKER_THREADS<0 || SQLITE_MAX_WORKER_THREADS>50
         2139  +# error SQLITE_MAX_WORKER_THREADS must be between 0 and 50
         2140  +#endif
  2137   2141   
  2138   2142   
  2139   2143   /*
  2140   2144   ** Change the value of a limit.  Report the old value.
  2141   2145   ** If an invalid limit index is supplied, report -1.
  2142   2146   ** Make no changes but still report the old value if the
  2143   2147   ** new limit is negative.
................................................................................
  2163   2167     assert( aHardLimit[SQLITE_LIMIT_VDBE_OP]==SQLITE_MAX_VDBE_OP );
  2164   2168     assert( aHardLimit[SQLITE_LIMIT_FUNCTION_ARG]==SQLITE_MAX_FUNCTION_ARG );
  2165   2169     assert( aHardLimit[SQLITE_LIMIT_ATTACHED]==SQLITE_MAX_ATTACHED );
  2166   2170     assert( aHardLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]==
  2167   2171                                                  SQLITE_MAX_LIKE_PATTERN_LENGTH );
  2168   2172     assert( aHardLimit[SQLITE_LIMIT_VARIABLE_NUMBER]==SQLITE_MAX_VARIABLE_NUMBER);
  2169   2173     assert( aHardLimit[SQLITE_LIMIT_TRIGGER_DEPTH]==SQLITE_MAX_TRIGGER_DEPTH );
  2170         -  assert( SQLITE_LIMIT_TRIGGER_DEPTH==(SQLITE_N_LIMIT-1) );
         2174  +  assert( aHardLimit[SQLITE_LIMIT_WORKER_THREADS]==SQLITE_MAX_WORKER_THREADS );
         2175  +  assert( SQLITE_LIMIT_WORKER_THREADS==(SQLITE_N_LIMIT-1) );
  2171   2176   
  2172   2177   
  2173   2178     if( limitId<0 || limitId>=SQLITE_N_LIMIT ){
  2174   2179       return -1;
  2175   2180     }
  2176   2181     oldLimit = db->aLimit[limitId];
  2177   2182     if( newLimit>=0 ){                   /* IMP: R-52476-28732 */
................................................................................
  2510   2515     db->errMask = 0xff;
  2511   2516     db->nDb = 2;
  2512   2517     db->magic = SQLITE_MAGIC_BUSY;
  2513   2518     db->aDb = db->aDbStatic;
  2514   2519   
  2515   2520     assert( sizeof(db->aLimit)==sizeof(aHardLimit) );
  2516   2521     memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit));
         2522  +  db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS;
  2517   2523     db->autoCommit = 1;
  2518   2524     db->nextAutovac = -1;
  2519   2525     db->szMmap = sqlite3GlobalConfig.szMmap;
  2520   2526     db->nextPagesize = 0;
         2527  +  db->nMaxSorterMmap = 0x7FFFFFFF;
  2521   2528     db->flags |= SQLITE_ShortColNames | SQLITE_EnableTrigger | SQLITE_CacheSpill
  2522   2529   #if !defined(SQLITE_DEFAULT_AUTOMATIC_INDEX) || SQLITE_DEFAULT_AUTOMATIC_INDEX
  2523   2530                    | SQLITE_AutoIndex
  2524   2531   #endif
  2525   2532   #if SQLITE_DEFAULT_FILE_FORMAT<4
  2526   2533                    | SQLITE_LegacyFileFmt
  2527   2534   #endif
................................................................................
  3377   3384   #ifdef SQLITE_VDBE_COVERAGE
  3378   3385         typedef void (*branch_callback)(void*,int,u8,u8);
  3379   3386         sqlite3GlobalConfig.xVdbeBranch = va_arg(ap,branch_callback);
  3380   3387         sqlite3GlobalConfig.pVdbeBranchArg = va_arg(ap,void*);
  3381   3388   #endif
  3382   3389         break;
  3383   3390       }
         3391  +
         3392  +    /*   sqlite3_test_control(SQLITE_TESTCTRL_SORTER_MMAP, db, nMax); */
         3393  +    case SQLITE_TESTCTRL_SORTER_MMAP: {
         3394  +      sqlite3 *db = va_arg(ap, sqlite3*);
         3395  +      db->nMaxSorterMmap = va_arg(ap, int);
         3396  +      break;
         3397  +    }
  3384   3398   
  3385   3399       /*   sqlite3_test_control(SQLITE_TESTCTRL_ISINIT);
  3386   3400       **
  3387   3401       ** Return SQLITE_OK if SQLite has been initialized and SQLITE_ERROR if
  3388   3402       ** not.
  3389   3403       */
  3390   3404       case SQLITE_TESTCTRL_ISINIT: {
  3391   3405         if( sqlite3GlobalConfig.isInit==0 ) rc = SQLITE_ERROR;
  3392   3406         break;
  3393   3407       }
  3394         -
  3395   3408     }
  3396   3409     va_end(ap);
  3397   3410   #endif /* SQLITE_OMIT_BUILTIN_TEST */
  3398   3411     return rc;
  3399   3412   }
  3400   3413   
  3401   3414   /*

Changes to src/os_unix.c.

  5881   5881   ){
  5882   5882     int rc = SQLITE_OK;
  5883   5883     UNUSED_PARAMETER(NotUsed);
  5884   5884     SimulateIOError(return SQLITE_IOERR_DELETE);
  5885   5885     if( osUnlink(zPath)==(-1) ){
  5886   5886       if( errno==ENOENT
  5887   5887   #if OS_VXWORKS
  5888         -        || errno==0x380003
         5888  +        || osAccess(zPath,0)!=0
  5889   5889   #endif
  5890   5890       ){
  5891   5891         rc = SQLITE_IOERR_DELETE_NOENT;
  5892   5892       }else{
  5893   5893         rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
  5894   5894       }
  5895   5895       return rc;

Changes to src/os_win.c.

   939    939   #else
   940    940     { "WaitForSingleObject",     (SYSCALL)0,                       0 },
   941    941   #endif
   942    942   
   943    943   #define osWaitForSingleObject ((DWORD(WINAPI*)(HANDLE, \
   944    944           DWORD))aSyscall[63].pCurrent)
   945    945   
   946         -#if SQLITE_OS_WINRT
   947    946     { "WaitForSingleObjectEx",   (SYSCALL)WaitForSingleObjectEx,   0 },
   948         -#else
   949         -  { "WaitForSingleObjectEx",   (SYSCALL)0,                       0 },
   950         -#endif
   951    947   
   952    948   #define osWaitForSingleObjectEx ((DWORD(WINAPI*)(HANDLE,DWORD, \
   953    949           BOOL))aSyscall[64].pCurrent)
   954    950   
   955    951   #if SQLITE_OS_WINRT
   956    952     { "SetFilePointerEx",        (SYSCALL)SetFilePointerEx,        0 },
   957    953   #else
................................................................................
  1285   1281     }
  1286   1282     assert( sleepObj!=NULL );
  1287   1283     osWaitForSingleObjectEx(sleepObj, milliseconds, FALSE);
  1288   1284   #else
  1289   1285     osSleep(milliseconds);
  1290   1286   #endif
  1291   1287   }
         1288  +
         1289  +DWORD sqlite3Win32Wait(HANDLE hObject){
         1290  +  DWORD rc;
         1291  +  while( (rc = osWaitForSingleObjectEx(hObject, INFINITE,
         1292  +                                       TRUE))==WAIT_IO_COMPLETION ){}
         1293  +  return rc;
         1294  +}
  1292   1295   
  1293   1296   /*
  1294   1297   ** Return true (non-zero) if we are running under WinNT, Win2K, WinXP,
  1295   1298   ** or WinCE.  Return false (zero) for Win95, Win98, or WinME.
  1296   1299   **
  1297   1300   ** Here is an interesting observation:  Win95, Win98, and WinME lack
  1298   1301   ** the LockFileEx() API.  But we can still statically link against that
................................................................................
  1313   1316   #endif
  1314   1317   
  1315   1318   /*
  1316   1319   ** This function determines if the machine is running a version of Windows
  1317   1320   ** based on the NT kernel.
  1318   1321   */
  1319   1322   int sqlite3_win32_is_nt(void){
  1320         -#if defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX
         1323  +#if SQLITE_OS_WINRT
         1324  +  /*
         1325  +  ** NOTE: The WinRT sub-platform is always assumed to be based on the NT
         1326  +  **       kernel.
         1327  +  */
         1328  +  return 1;
         1329  +#elif defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX
  1321   1330     if( osInterlockedCompareExchange(&sqlite3_os_type, 0, 0)==0 ){
  1322         -#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \
  1323         -        defined(NTDDI_VERSION) && NTDDI_VERSION >= NTDDI_WIN8
  1324         -    OSVERSIONINFOW sInfo;
  1325         -    sInfo.dwOSVersionInfoSize = sizeof(sInfo);
  1326         -    osGetVersionExW(&sInfo);
  1327         -    osInterlockedCompareExchange(&sqlite3_os_type,
  1328         -        (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0);
  1329         -#elif defined(SQLITE_WIN32_HAS_ANSI)
         1331  +#if defined(SQLITE_WIN32_HAS_ANSI)
  1330   1332       OSVERSIONINFOA sInfo;
  1331   1333       sInfo.dwOSVersionInfoSize = sizeof(sInfo);
  1332   1334       osGetVersionExA(&sInfo);
         1335  +    osInterlockedCompareExchange(&sqlite3_os_type,
         1336  +        (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0);
         1337  +#elif defined(SQLITE_WIN32_HAS_WIDE)
         1338  +    OSVERSIONINFOW sInfo;
         1339  +    sInfo.dwOSVersionInfoSize = sizeof(sInfo);
         1340  +    osGetVersionExW(&sInfo);
  1333   1341       osInterlockedCompareExchange(&sqlite3_os_type,
  1334   1342           (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0);
  1335   1343   #endif
  1336   1344     }
  1337   1345     return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2;
  1338   1346   #elif SQLITE_TEST
  1339   1347     return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2;
  1340   1348   #else
         1349  +  /*
         1350  +  ** NOTE: All sub-platforms where the GetVersionEx[AW] functions are
         1351  +  **       deprecated are always assumed to be based on the NT kernel.
         1352  +  */
  1341   1353     return 1;
  1342   1354   #endif
  1343   1355   }
  1344   1356   
  1345   1357   #ifdef SQLITE_WIN32_MALLOC
  1346   1358   /*
  1347   1359   ** Allocate nBytes of memory.

Changes to src/pager.c.

  3618   3618   
  3619   3619       if( rc==SQLITE_OK ){
  3620   3620         pager_reset(pPager);
  3621   3621         pPager->dbSize = (Pgno)((nByte+pageSize-1)/pageSize);
  3622   3622         pPager->pageSize = pageSize;
  3623   3623         sqlite3PageFree(pPager->pTmpSpace);
  3624   3624         pPager->pTmpSpace = pNew;
  3625         -      sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
         3625  +      rc = sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
  3626   3626       }
  3627   3627     }
  3628   3628   
  3629   3629     *pPageSize = pPager->pageSize;
  3630   3630     if( rc==SQLITE_OK ){
  3631   3631       if( nReserve<0 ) nReserve = pPager->nReserve;
  3632   3632       assert( nReserve>=0 && nReserve<1000 );
................................................................................
  4381   4381     **
  4382   4382     ** The doNotSpill ROLLBACK and OFF bits inhibits all cache spilling
  4383   4383     ** regardless of whether or not a sync is required.  This is set during
  4384   4384     ** a rollback or by user request, respectively.
  4385   4385     **
  4386   4386     ** Spilling is also prohibited when in an error state since that could
  4387   4387     ** lead to database corruption.   In the current implementaton it 
  4388         -  ** is impossible for sqlite3PcacheFetch() to be called with createFlag==1
         4388  +  ** is impossible for sqlite3PcacheFetch() to be called with createFlag==3
  4389   4389     ** while in the error state, hence it is impossible for this routine to
  4390   4390     ** be called in the error state.  Nevertheless, we include a NEVER()
  4391   4391     ** test for the error state as a safeguard against future changes.
  4392   4392     */
  4393   4393     if( NEVER(pPager->errCode) ) return SQLITE_OK;
  4394   4394     testcase( pPager->doNotSpill & SPILLFLAG_ROLLBACK );
  4395   4395     testcase( pPager->doNotSpill & SPILLFLAG_OFF );
................................................................................
  4717   4717     */
  4718   4718     if( rc==SQLITE_OK ){
  4719   4719       assert( pPager->memDb==0 );
  4720   4720       rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1);
  4721   4721       testcase( rc!=SQLITE_OK );
  4722   4722     }
  4723   4723   
  4724         -  /* If an error occurred in either of the blocks above, free the 
  4725         -  ** Pager structure and close the file.
         4724  +  /* Initialize the PCache object. */
         4725  +  if( rc==SQLITE_OK ){
         4726  +    assert( nExtra<1000 );
         4727  +    nExtra = ROUND8(nExtra);
         4728  +    rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
         4729  +                           !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
         4730  +  }
         4731  +
         4732  +  /* If an error occurred above, free the  Pager structure and close the file.
  4726   4733     */
  4727   4734     if( rc!=SQLITE_OK ){
  4728         -    assert( !pPager->pTmpSpace );
  4729   4735       sqlite3OsClose(pPager->fd);
         4736  +    sqlite3PageFree(pPager->pTmpSpace);
  4730   4737       sqlite3_free(pPager);
  4731   4738       return rc;
  4732   4739     }
  4733   4740   
  4734         -  /* Initialize the PCache object. */
  4735         -  assert( nExtra<1000 );
  4736         -  nExtra = ROUND8(nExtra);
  4737         -  sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
  4738         -                    !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
  4739         -
  4740   4741     PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename));
  4741   4742     IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
  4742   4743   
  4743   4744     pPager->useJournal = (u8)useJournal;
  4744   4745     /* pPager->stmtOpen = 0; */
  4745   4746     /* pPager->stmtInUse = 0; */
  4746   4747     /* pPager->nRef = 0; */
................................................................................
  5281   5282     }
  5282   5283   
  5283   5284     /* If the pager is in the error state, return an error immediately. 
  5284   5285     ** Otherwise, request the page from the PCache layer. */
  5285   5286     if( pPager->errCode!=SQLITE_OK ){
  5286   5287       rc = pPager->errCode;
  5287   5288     }else{
  5288         -
  5289   5289       if( bMmapOk && pagerUseWal(pPager) ){
  5290   5290         rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
  5291   5291         if( rc!=SQLITE_OK ) goto pager_acquire_err;
  5292   5292       }
  5293   5293   
  5294   5294       if( bMmapOk && iFrame==0 ){
  5295   5295         void *pData = 0;
................................................................................
  5296   5296   
  5297   5297         rc = sqlite3OsFetch(pPager->fd, 
  5298   5298             (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData
  5299   5299         );
  5300   5300   
  5301   5301         if( rc==SQLITE_OK && pData ){
  5302   5302           if( pPager->eState>PAGER_READER ){
  5303         -          (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
         5303  +          pPg = sqlite3PagerLookup(pPager, pgno);
  5304   5304           }
  5305   5305           if( pPg==0 ){
  5306   5306             rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg);
  5307   5307           }else{
  5308   5308             sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData);
  5309   5309           }
  5310   5310           if( pPg ){
................................................................................
  5314   5314           }
  5315   5315         }
  5316   5316         if( rc!=SQLITE_OK ){
  5317   5317           goto pager_acquire_err;
  5318   5318         }
  5319   5319       }
  5320   5320   
  5321         -    rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
         5321  +    {
         5322  +      sqlite3_pcache_page *pBase;
         5323  +      pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3);
         5324  +      if( pBase==0 ){
         5325  +        rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase);
         5326  +        if( rc!=SQLITE_OK ) goto pager_acquire_err;
         5327  +      }
         5328  +      pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase);
         5329  +      if( pPg==0 ) rc = SQLITE_NOMEM;
         5330  +    }
  5322   5331     }
  5323   5332   
  5324   5333     if( rc!=SQLITE_OK ){
  5325   5334       /* Either the call to sqlite3PcacheFetch() returned an error or the
  5326   5335       ** pager was already in the error-state when this function was called.
  5327   5336       ** Set pPg to 0 and jump to the exception handler.  */
  5328   5337       pPg = 0;
................................................................................
  5411   5420   ** See also sqlite3PagerGet().  The difference between this routine
  5412   5421   ** and sqlite3PagerGet() is that _get() will go to the disk and read
  5413   5422   ** in the page if the page is not already in cache.  This routine
  5414   5423   ** returns NULL if the page is not in cache or if a disk I/O error 
  5415   5424   ** has ever happened.
  5416   5425   */
  5417   5426   DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
  5418         -  PgHdr *pPg = 0;
         5427  +  sqlite3_pcache_page *pPage;
  5419   5428     assert( pPager!=0 );
  5420   5429     assert( pgno!=0 );
  5421   5430     assert( pPager->pPCache!=0 );
  5422         -  sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
  5423         -  return pPg;
         5431  +  pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0);
         5432  +  return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage);
  5424   5433   }
  5425   5434   
  5426   5435   /*
  5427   5436   ** Release a page reference.
  5428   5437   **
  5429   5438   ** If the number of references to the page drop to zero, then the
  5430   5439   ** page is added to the LRU list.  When all references to all pages

Changes to src/pcache.c.

   139    139     if( p->pCache->bPurgeable ){
   140    140       if( p->pgno==1 ){
   141    141         p->pCache->pPage1 = 0;
   142    142       }
   143    143       sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0);
   144    144     }
   145    145   }
          146  +
          147  +/*
          148  +** Compute the number of pages of cache requested.
          149  +*/
          150  +static int numberOfCachePages(PCache *p){
          151  +  if( p->szCache>=0 ){
          152  +    return p->szCache;
          153  +  }else{
          154  +    return (int)((-1024*(i64)p->szCache)/(p->szPage+p->szExtra));
          155  +  }
          156  +}
   146    157   
   147    158   /*************************************************** General Interfaces ******
   148    159   **
   149    160   ** Initialize and shutdown the page cache subsystem. Neither of these 
   150    161   ** functions are threadsafe.
   151    162   */
   152    163   int sqlite3PcacheInitialize(void){
................................................................................
   172    183   
   173    184   /*
   174    185   ** Create a new PCache object. Storage space to hold the object
   175    186   ** has already been allocated and is passed in as the p pointer. 
   176    187   ** The caller discovers how much space needs to be allocated by 
   177    188   ** calling sqlite3PcacheSize().
   178    189   */
   179         -void sqlite3PcacheOpen(
          190  +int sqlite3PcacheOpen(
   180    191     int szPage,                  /* Size of every page */
   181    192     int szExtra,                 /* Extra space associated with each page */
   182    193     int bPurgeable,              /* True if pages are on backing store */
   183    194     int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */
   184    195     void *pStress,               /* Argument to xStress */
   185    196     PCache *p                    /* Preallocated space for the PCache */
   186    197   ){
   187    198     memset(p, 0, sizeof(PCache));
   188         -  p->szPage = szPage;
          199  +  p->szPage = 1;
   189    200     p->szExtra = szExtra;
   190    201     p->bPurgeable = bPurgeable;
   191    202     p->eCreate = 2;
   192    203     p->xStress = xStress;
   193    204     p->pStress = pStress;
   194    205     p->szCache = 100;
          206  +  return sqlite3PcacheSetPageSize(p, szPage);
   195    207   }
   196    208   
   197    209   /*
   198    210   ** Change the page size for PCache object. The caller must ensure that there
   199    211   ** are no outstanding page references when this function is called.
   200    212   */
   201         -void sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
          213  +int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
   202    214     assert( pCache->nRef==0 && pCache->pDirty==0 );
   203         -  if( pCache->pCache ){
   204         -    sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
   205         -    pCache->pCache = 0;
          215  +  if( pCache->szPage ){
          216  +    sqlite3_pcache *pNew;
          217  +    pNew = sqlite3GlobalConfig.pcache2.xCreate(
          218  +                szPage, pCache->szExtra + sizeof(PgHdr), pCache->bPurgeable
          219  +    );
          220  +    if( pNew==0 ) return SQLITE_NOMEM;
          221  +    sqlite3GlobalConfig.pcache2.xCachesize(pNew, numberOfCachePages(pCache));
          222  +    if( pCache->pCache ){
          223  +      sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
          224  +    }
          225  +    pCache->pCache = pNew;
   206    226       pCache->pPage1 = 0;
          227  +    pCache->szPage = szPage;
   207    228     }
   208         -  pCache->szPage = szPage;
   209         -}
   210         -
   211         -/*
   212         -** Compute the number of pages of cache requested.
   213         -*/
   214         -static int numberOfCachePages(PCache *p){
   215         -  if( p->szCache>=0 ){
   216         -    return p->szCache;
   217         -  }else{
   218         -    return (int)((-1024*(i64)p->szCache)/(p->szPage+p->szExtra));
   219         -  }
          229  +  return SQLITE_OK;
   220    230   }
   221    231   
   222    232   /*
   223    233   ** Try to obtain a page from the cache.
          234  +**
          235  +** This routine returns a pointer to an sqlite3_pcache_page object if
          236  +** such an object is already in cache, or if a new one is created.
          237  +** This routine returns a NULL pointer if the object was not in cache
          238  +** and could not be created.
          239  +**
          240  +** The createFlags should be 0 to check for existing pages and should
          241  +** be 3 (not 1, but 3) to try to create a new page.
          242  +**
          243  +** If the createFlag is 0, then NULL is always returned if the page
          244  +** is not already in the cache.  If createFlag is 1, then a new page
          245  +** is created only if that can be done without spilling dirty pages
          246  +** and without exceeding the cache size limit.
          247  +**
          248  +** The caller needs to invoke sqlite3PcacheFetchFinish() to properly
          249  +** initialize the sqlite3_pcache_page object and convert it into a
          250  +** PgHdr object.  The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish()
          251  +** routines are split this way for performance reasons. When separated
          252  +** they can both (usually) operate without having to push values to
          253  +** the stack on entry and pop them back off on exit, which saves a
          254  +** lot of pushing and popping.
   224    255   */
   225         -int sqlite3PcacheFetch(
          256  +sqlite3_pcache_page *sqlite3PcacheFetch(
   226    257     PCache *pCache,       /* Obtain the page from this cache */
   227    258     Pgno pgno,            /* Page number to obtain */
   228         -  int createFlag,       /* If true, create page if it does not exist already */
   229         -  PgHdr **ppPage        /* Write the page here */
          259  +  int createFlag        /* If true, create page if it does not exist already */
   230    260   ){
   231         -  sqlite3_pcache_page *pPage;
   232         -  PgHdr *pPgHdr = 0;
   233    261     int eCreate;
   234    262   
   235    263     assert( pCache!=0 );
   236         -  assert( createFlag==1 || createFlag==0 );
          264  +  assert( pCache->pCache!=0 );
          265  +  assert( createFlag==3 || createFlag==0 );
   237    266     assert( pgno>0 );
   238    267   
   239         -  /* If the pluggable cache (sqlite3_pcache*) has not been allocated,
   240         -  ** allocate it now.
   241         -  */
   242         -  if( !pCache->pCache ){
   243         -    sqlite3_pcache *p;
   244         -    if( !createFlag ){
   245         -      *ppPage = 0;
   246         -      return SQLITE_OK;
   247         -    }
   248         -    p = sqlite3GlobalConfig.pcache2.xCreate(
   249         -        pCache->szPage, pCache->szExtra + sizeof(PgHdr), pCache->bPurgeable
   250         -    );
   251         -    if( !p ){
   252         -      return SQLITE_NOMEM;
   253         -    }
   254         -    sqlite3GlobalConfig.pcache2.xCachesize(p, numberOfCachePages(pCache));
   255         -    pCache->pCache = p;
   256         -  }
   257         -
   258    268     /* eCreate defines what to do if the page does not exist.
   259    269     **    0     Do not allocate a new page.  (createFlag==0)
   260    270     **    1     Allocate a new page if doing so is inexpensive.
   261    271     **          (createFlag==1 AND bPurgeable AND pDirty)
   262    272     **    2     Allocate a new page even it doing so is difficult.
   263    273     **          (createFlag==1 AND !(bPurgeable AND pDirty)
   264    274     */
   265         -  eCreate = createFlag==0 ? 0 : pCache->eCreate;
   266         -  assert( (createFlag*(1+(!pCache->bPurgeable||!pCache->pDirty)))==eCreate );
   267         -  pPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate);
   268         -  if( !pPage && eCreate==1 ){
   269         -    PgHdr *pPg;
   270         -
   271         -    /* Find a dirty page to write-out and recycle. First try to find a 
   272         -    ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC
   273         -    ** cleared), but if that is not possible settle for any other 
   274         -    ** unreferenced dirty page.
   275         -    */
   276         -    expensive_assert( pcacheCheckSynced(pCache) );
   277         -    for(pPg=pCache->pSynced; 
   278         -        pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); 
   279         -        pPg=pPg->pDirtyPrev
   280         -    );
   281         -    pCache->pSynced = pPg;
   282         -    if( !pPg ){
   283         -      for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev);
   284         -    }
   285         -    if( pPg ){
   286         -      int rc;
          275  +  eCreate = createFlag & pCache->eCreate;
          276  +  assert( eCreate==0 || eCreate==1 || eCreate==2 );
          277  +  assert( createFlag==0 || pCache->eCreate==eCreate );
          278  +  assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) );
          279  +  return sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate);
          280  +}
          281  +
          282  +/*
          283  +** If the sqlite3PcacheFetch() routine is unable to allocate a new
          284  +** page because new clean pages are available for reuse and the cache
          285  +** size limit has been reached, then this routine can be invoked to 
          286  +** try harder to allocate a page.  This routine might invoke the stress
          287  +** callback to spill dirty pages to the journal.  It will then try to
          288  +** allocate the new page and will only fail to allocate a new page on
          289  +** an OOM error.
          290  +**
          291  +** This routine should be invoked only after sqlite3PcacheFetch() fails.
          292  +*/
          293  +int sqlite3PcacheFetchStress(
          294  +  PCache *pCache,                 /* Obtain the page from this cache */
          295  +  Pgno pgno,                      /* Page number to obtain */
          296  +  sqlite3_pcache_page **ppPage    /* Write result here */
          297  +){
          298  +  PgHdr *pPg;
          299  +  if( pCache->eCreate==2 ) return 0;
          300  +
          301  +
          302  +  /* Find a dirty page to write-out and recycle. First try to find a 
          303  +  ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC
          304  +  ** cleared), but if that is not possible settle for any other 
          305  +  ** unreferenced dirty page.
          306  +  */
          307  +  expensive_assert( pcacheCheckSynced(pCache) );
          308  +  for(pPg=pCache->pSynced; 
          309  +      pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); 
          310  +      pPg=pPg->pDirtyPrev
          311  +  );
          312  +  pCache->pSynced = pPg;
          313  +  if( !pPg ){
          314  +    for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev);
          315  +  }
          316  +  if( pPg ){
          317  +    int rc;
   287    318   #ifdef SQLITE_LOG_CACHE_SPILL
   288         -      sqlite3_log(SQLITE_FULL, 
   289         -                  "spill page %d making room for %d - cache used: %d/%d",
   290         -                  pPg->pgno, pgno,
   291         -                  sqlite3GlobalConfig.pcache.xPagecount(pCache->pCache),
   292         -                  numberOfCachePages(pCache));
          319  +    sqlite3_log(SQLITE_FULL, 
          320  +                "spill page %d making room for %d - cache used: %d/%d",
          321  +                pPg->pgno, pgno,
          322  +                sqlite3GlobalConfig.pcache.xPagecount(pCache->pCache),
          323  +                numberOfCachePages(pCache));
   293    324   #endif
   294         -      rc = pCache->xStress(pCache->pStress, pPg);
   295         -      if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){
   296         -        return rc;
   297         -      }
   298         -    }
   299         -
   300         -    pPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2);
   301         -  }
   302         -
   303         -  if( pPage ){
   304         -    pPgHdr = (PgHdr *)pPage->pExtra;
   305         -
   306         -    if( !pPgHdr->pPage ){
   307         -      memset(pPgHdr, 0, sizeof(PgHdr));
   308         -      pPgHdr->pPage = pPage;
   309         -      pPgHdr->pData = pPage->pBuf;
   310         -      pPgHdr->pExtra = (void *)&pPgHdr[1];
   311         -      memset(pPgHdr->pExtra, 0, pCache->szExtra);
   312         -      pPgHdr->pCache = pCache;
   313         -      pPgHdr->pgno = pgno;
   314         -    }
   315         -    assert( pPgHdr->pCache==pCache );
   316         -    assert( pPgHdr->pgno==pgno );
   317         -    assert( pPgHdr->pData==pPage->pBuf );
   318         -    assert( pPgHdr->pExtra==(void *)&pPgHdr[1] );
   319         -
   320         -    if( 0==pPgHdr->nRef ){
   321         -      pCache->nRef++;
   322         -    }
   323         -    pPgHdr->nRef++;
   324         -    if( pgno==1 ){
   325         -      pCache->pPage1 = pPgHdr;
          325  +    rc = pCache->xStress(pCache->pStress, pPg);
          326  +    if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){
          327  +      return rc;
   326    328       }
   327    329     }
   328         -  *ppPage = pPgHdr;
   329         -  return (pPgHdr==0 && eCreate) ? SQLITE_NOMEM : SQLITE_OK;
          330  +  *ppPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2);
          331  +  return *ppPage==0 ? SQLITE_NOMEM : SQLITE_OK;
          332  +}
          333  +
          334  +/*
          335  +** This is a helper routine for sqlite3PcacheFetchFinish()
          336  +**
          337  +** In the uncommon case where the page being fetched has not been
          338  +** initialized, this routine is invoked to do the initialization.
          339  +** This routine is broken out into a separate function since it
          340  +** requires extra stack manipulation that can be avoided in the common
          341  +** case.
          342  +*/
          343  +static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit(
          344  +  PCache *pCache,             /* Obtain the page from this cache */
          345  +  Pgno pgno,                  /* Page number obtained */
          346  +  sqlite3_pcache_page *pPage  /* Page obtained by prior PcacheFetch() call */
          347  +){
          348  +  PgHdr *pPgHdr;
          349  +  assert( pPage!=0 );
          350  +  pPgHdr = (PgHdr*)pPage->pExtra;
          351  +  assert( pPgHdr->pPage==0 );
          352  + memset(pPgHdr, 0, sizeof(PgHdr));
          353  +  pPgHdr->pPage = pPage;
          354  +  pPgHdr->pData = pPage->pBuf;
          355  +  pPgHdr->pExtra = (void *)&pPgHdr[1];
          356  +  memset(pPgHdr->pExtra, 0, pCache->szExtra);
          357  +  pPgHdr->pCache = pCache;
          358  +  pPgHdr->pgno = pgno;
          359  +  return sqlite3PcacheFetchFinish(pCache,pgno,pPage);
          360  +}
          361  +
          362  +/*
          363  +** This routine converts the sqlite3_pcache_page object returned by
          364  +** sqlite3PcacheFetch() into an initialized PgHdr object.  This routine
          365  +** must be called after sqlite3PcacheFetch() in order to get a usable
          366  +** result.
          367  +*/
          368  +PgHdr *sqlite3PcacheFetchFinish(
          369  +  PCache *pCache,             /* Obtain the page from this cache */
          370  +  Pgno pgno,                  /* Page number obtained */
          371  +  sqlite3_pcache_page *pPage  /* Page obtained by prior PcacheFetch() call */
          372  +){
          373  +  PgHdr *pPgHdr;
          374  +
          375  +  if( pPage==0 ) return 0;
          376  +  pPgHdr = (PgHdr *)pPage->pExtra;
          377  +
          378  +  if( !pPgHdr->pPage ){
          379  +    return pcacheFetchFinishWithInit(pCache, pgno, pPage);
          380  +  }
          381  +  if( 0==pPgHdr->nRef ){
          382  +    pCache->nRef++;
          383  +  }
          384  +  pPgHdr->nRef++;
          385  +  if( pgno==1 ){
          386  +    pCache->pPage1 = pPgHdr;
          387  +  }
          388  +  return pPgHdr;
   330    389   }
   331    390   
   332    391   /*
   333    392   ** Decrement the reference count on a page. If the page is clean and the
   334    393   ** reference count drops to 0, then it is made elible for recycling.
   335    394   */
   336    395   void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
................................................................................
   467    526     }
   468    527   }
   469    528   
   470    529   /*
   471    530   ** Close a cache.
   472    531   */
   473    532   void sqlite3PcacheClose(PCache *pCache){
   474         -  if( pCache->pCache ){
   475         -    sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
   476         -  }
          533  +  assert( pCache->pCache!=0 );
          534  +  sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
   477    535   }
   478    536   
   479    537   /* 
   480    538   ** Discard the contents of the cache.
   481    539   */
   482    540   void sqlite3PcacheClear(PCache *pCache){
   483    541     sqlite3PcacheTruncate(pCache, 0);
................................................................................
   578    636     return p->nRef;
   579    637   }
   580    638   
   581    639   /* 
   582    640   ** Return the total number of pages in the cache.
   583    641   */
   584    642   int sqlite3PcachePagecount(PCache *pCache){
   585         -  int nPage = 0;
   586         -  if( pCache->pCache ){
   587         -    nPage = sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache);
   588         -  }
   589         -  return nPage;
          643  +  assert( pCache->pCache!=0 );
          644  +  return sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache);
   590    645   }
   591    646   
   592    647   #ifdef SQLITE_TEST
   593    648   /*
   594    649   ** Get the suggested cache-size value.
   595    650   */
   596    651   int sqlite3PcacheGetCachesize(PCache *pCache){
................................................................................
   598    653   }
   599    654   #endif
   600    655   
   601    656   /*
   602    657   ** Set the suggested cache-size value.
   603    658   */
   604    659   void sqlite3PcacheSetCachesize(PCache *pCache, int mxPage){
          660  +  assert( pCache->pCache!=0 );
   605    661     pCache->szCache = mxPage;
   606         -  if( pCache->pCache ){
   607         -    sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache,
   608         -                                           numberOfCachePages(pCache));
   609         -  }
          662  +  sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache,
          663  +                                         numberOfCachePages(pCache));
   610    664   }
   611    665   
   612    666   /*
   613    667   ** Free up as much memory as possible from the page cache.
   614    668   */
   615    669   void sqlite3PcacheShrink(PCache *pCache){
   616         -  if( pCache->pCache ){
   617         -    sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache);
   618         -  }
          670  +  assert( pCache->pCache!=0 );
          671  +  sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache);
   619    672   }
   620    673   
   621    674   #if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG)
   622    675   /*
   623    676   ** For all dirty pages currently in the cache, invoke the specified
   624    677   ** callback. This is only used if the SQLITE_CHECK_PAGES macro is
   625    678   ** defined.

Changes to src/pcache.h.

    64     64   */
    65     65   void sqlite3PCacheBufferSetup(void *, int sz, int n);
    66     66   
    67     67   /* Create a new pager cache.
    68     68   ** Under memory stress, invoke xStress to try to make pages clean.
    69     69   ** Only clean and unpinned pages can be reclaimed.
    70     70   */
    71         -void sqlite3PcacheOpen(
           71  +int sqlite3PcacheOpen(
    72     72     int szPage,                    /* Size of every page */
    73     73     int szExtra,                   /* Extra space associated with each page */
    74     74     int bPurgeable,                /* True if pages are on backing store */
    75     75     int (*xStress)(void*, PgHdr*), /* Call to try to make pages clean */
    76     76     void *pStress,                 /* Argument to xStress */
    77     77     PCache *pToInit                /* Preallocated space for the PCache */
    78     78   );
    79     79   
    80     80   /* Modify the page-size after the cache has been created. */
    81         -void sqlite3PcacheSetPageSize(PCache *, int);
           81  +int sqlite3PcacheSetPageSize(PCache *, int);
    82     82   
    83     83   /* Return the size in bytes of a PCache object.  Used to preallocate
    84     84   ** storage space.
    85     85   */
    86     86   int sqlite3PcacheSize(void);
    87     87   
    88     88   /* One release per successful fetch.  Page is pinned until released.
    89     89   ** Reference counted. 
    90     90   */
    91         -int sqlite3PcacheFetch(PCache*, Pgno, int createFlag, PgHdr**);
           91  +sqlite3_pcache_page *sqlite3PcacheFetch(PCache*, Pgno, int createFlag);
           92  +int sqlite3PcacheFetchStress(PCache*, Pgno, sqlite3_pcache_page**);
           93  +PgHdr *sqlite3PcacheFetchFinish(PCache*, Pgno, sqlite3_pcache_page *pPage);
    92     94   void sqlite3PcacheRelease(PgHdr*);
    93     95   
    94     96   void sqlite3PcacheDrop(PgHdr*);         /* Remove page from cache */
    95     97   void sqlite3PcacheMakeDirty(PgHdr*);    /* Make sure page is marked dirty */
    96     98   void sqlite3PcacheMakeClean(PgHdr*);    /* Mark a single page as clean */
    97     99   void sqlite3PcacheCleanAll(PCache*);    /* Mark all dirty list pages as clean */
    98    100   

Changes to src/pragma.c.

    57     57   #define PragTyp_SHRINK_MEMORY                 26
    58     58   #define PragTyp_SOFT_HEAP_LIMIT               27
    59     59   #define PragTyp_STATS                         28
    60     60   #define PragTyp_SYNCHRONOUS                   29
    61     61   #define PragTyp_TABLE_INFO                    30
    62     62   #define PragTyp_TEMP_STORE                    31
    63     63   #define PragTyp_TEMP_STORE_DIRECTORY          32
    64         -#define PragTyp_WAL_AUTOCHECKPOINT            33
    65         -#define PragTyp_WAL_CHECKPOINT                34
    66         -#define PragTyp_ACTIVATE_EXTENSIONS           35
    67         -#define PragTyp_HEXKEY                        36
    68         -#define PragTyp_KEY                           37
    69         -#define PragTyp_REKEY                         38
    70         -#define PragTyp_LOCK_STATUS                   39
    71         -#define PragTyp_PARSER_TRACE                  40
           64  +#define PragTyp_THREADS                       33
           65  +#define PragTyp_WAL_AUTOCHECKPOINT            34
           66  +#define PragTyp_WAL_CHECKPOINT                35
           67  +#define PragTyp_ACTIVATE_EXTENSIONS           36
           68  +#define PragTyp_HEXKEY                        37
           69  +#define PragTyp_KEY                           38
           70  +#define PragTyp_REKEY                         39
           71  +#define PragTyp_LOCK_STATUS                   40
           72  +#define PragTyp_PARSER_TRACE                  41
    72     73   #define PragFlag_NeedSchema           0x01
    73     74   static const struct sPragmaNames {
    74     75     const char *const zName;  /* Name of pragma */
    75     76     u8 ePragTyp;              /* PragTyp_XXX value */
    76     77     u8 mPragFlag;             /* Zero or more PragFlag_XXX values */
    77     78     u32 iArg;                 /* Extra argument */
    78     79   } aPragmaNames[] = {
................................................................................
   414    415       /* ePragFlag: */ 0,
   415    416       /* iArg:      */ 0 },
   416    417     { /* zName:     */ "temp_store_directory",
   417    418       /* ePragTyp:  */ PragTyp_TEMP_STORE_DIRECTORY,
   418    419       /* ePragFlag: */ 0,
   419    420       /* iArg:      */ 0 },
   420    421   #endif
          422  +  { /* zName:     */ "threads",
          423  +    /* ePragTyp:  */ PragTyp_THREADS,
          424  +    /* ePragFlag: */ 0,
          425  +    /* iArg:      */ 0 },
   421    426   #if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS)
   422    427     { /* zName:     */ "user_version",
   423    428       /* ePragTyp:  */ PragTyp_HEADER_VALUE,
   424    429       /* ePragFlag: */ 0,
   425    430       /* iArg:      */ 0 },
   426    431   #endif
   427    432   #if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
................................................................................
   461    466   #if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
   462    467     { /* zName:     */ "writable_schema",
   463    468       /* ePragTyp:  */ PragTyp_FLAG,
   464    469       /* ePragFlag: */ 0,
   465    470       /* iArg:      */ SQLITE_WriteSchema|SQLITE_RecoveryMode },
   466    471   #endif
   467    472   };
   468         -/* Number of pragmas: 56 on by default, 69 total. */
          473  +/* Number of pragmas: 57 on by default, 70 total. */
   469    474   /* End of the automatically generated pragma table.
   470    475   ***************************************************************************/
   471    476   
   472    477   /*
   473    478   ** Interpret the given string as a safety level.  Return 0 for OFF,
   474    479   ** 1 for ON or NORMAL and 2 for FULL.  Return 1 for an empty or 
   475    480   ** unrecognized string argument.  The FULL option is disallowed
................................................................................
  2268   2273       sqlite3_int64 N;
  2269   2274       if( zRight && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK ){
  2270   2275         sqlite3_soft_heap_limit64(N);
  2271   2276       }
  2272   2277       returnSingleInt(pParse, "soft_heap_limit",  sqlite3_soft_heap_limit64(-1));
  2273   2278       break;
  2274   2279     }
         2280  +
         2281  +  /*
         2282  +  **   PRAGMA threads
         2283  +  **   PRAGMA threads = N
         2284  +  **
         2285  +  ** Configure the maximum number of worker threads.  Return the new
         2286  +  ** maximum, which might be less than requested.
         2287  +  */
         2288  +  case PragTyp_THREADS: {
         2289  +    sqlite3_int64 N;
         2290  +    if( zRight
         2291  +     && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK
         2292  +     && N>=0
         2293  +    ){
         2294  +      sqlite3_limit(db, SQLITE_LIMIT_WORKER_THREADS, (int)(N&0x7fffffff));
         2295  +    }
         2296  +    returnSingleInt(pParse, "threads",
         2297  +                    sqlite3_limit(db, SQLITE_LIMIT_WORKER_THREADS, -1));
         2298  +    break;
         2299  +  }
  2275   2300   
  2276   2301   #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
  2277   2302     /*
  2278   2303     ** Report the current state of file logs for all databases
  2279   2304     */
  2280   2305     case PragTyp_LOCK_STATUS: {
  2281   2306       static const char *const azLockName[] = {

Changes to src/select.c.

   451    451     Parse *pParse,       /* Parsing context */
   452    452     ExprList *pList,     /* Form the KeyInfo object from this ExprList */
   453    453     int iStart,          /* Begin with this column of pList */
   454    454     int nExtra           /* Add this many extra columns to the end */
   455    455   );
   456    456   
   457    457   /*
   458         -** Insert code into "v" that will push the record in register regData
   459         -** into the sorter.
          458  +** Generate code that will push the record in registers regData
          459  +** through regData+nData-1 onto the sorter.
   460    460   */
   461    461   static void pushOntoSorter(
   462    462     Parse *pParse,         /* Parser context */
   463    463     SortCtx *pSort,        /* Information about the ORDER BY clause */
   464    464     Select *pSelect,       /* The whole SELECT statement */
   465         -  int regData            /* Register holding data to be sorted */
   466         -){
   467         -  Vdbe *v = pParse->pVdbe;
   468         -  int nExpr = pSort->pOrderBy->nExpr;
   469         -  int regRecord = ++pParse->nMem;
   470         -  int regBase = pParse->nMem+1;
   471         -  int nOBSat = pSort->nOBSat;
   472         -  int op;
   473         -
   474         -  pParse->nMem += nExpr+2;        /* nExpr+2 registers allocated at regBase */
   475         -  sqlite3ExprCacheClear(pParse);
   476         -  sqlite3ExprCodeExprList(pParse, pSort->pOrderBy, regBase, 0);
   477         -  sqlite3VdbeAddOp2(v, OP_Sequence, pSort->iECursor, regBase+nExpr);
   478         -  sqlite3ExprCodeMove(pParse, regData, regBase+nExpr+1, 1);
   479         -  sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase+nOBSat, nExpr+2-nOBSat,regRecord);
          465  +  int regData,           /* First register holding data to be sorted */
          466  +  int nData,             /* Number of elements in the data array */
          467  +  int nPrefixReg         /* No. of reg prior to regData available for use */
          468  +){
          469  +  Vdbe *v = pParse->pVdbe;                         /* Stmt under construction */
          470  +  int bSeq = ((pSort->sortFlags & SORTFLAG_UseSorter)==0);
          471  +  int nExpr = pSort->pOrderBy->nExpr;              /* No. of ORDER BY terms */
          472  +  int nBase = nExpr + bSeq + nData;                /* Fields in sorter record */
          473  +  int regBase;                                     /* Regs for sorter record */
          474  +  int regRecord = ++pParse->nMem;                  /* Assembled sorter record */
          475  +  int nOBSat = pSort->nOBSat;                      /* ORDER BY terms to skip */
          476  +  int op;                            /* Opcode to add sorter record to sorter */
          477  +
          478  +  assert( bSeq==0 || bSeq==1 );
          479  +  if( nPrefixReg ){
          480  +    assert( nPrefixReg==nExpr+bSeq );
          481  +    regBase = regData - nExpr - bSeq;
          482  +  }else{
          483  +    regBase = pParse->nMem + 1;
          484  +    pParse->nMem += nBase;
          485  +  }
          486  +  sqlite3ExprCodeExprList(pParse, pSort->pOrderBy, regBase, SQLITE_ECEL_DUP);
          487  +  if( bSeq ){
          488  +    sqlite3VdbeAddOp2(v, OP_Sequence, pSort->iECursor, regBase+nExpr);
          489  +  }
          490  +  if( nPrefixReg==0 ){
          491  +    sqlite3VdbeAddOp3(v, OP_Move, regData, regBase+nExpr+bSeq, nData);
          492  +  }
          493  +
          494  +  sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase+nOBSat, nBase-nOBSat, regRecord);
   480    495     if( nOBSat>0 ){
   481    496       int regPrevKey;   /* The first nOBSat columns of the previous row */
   482    497       int addrFirst;    /* Address of the OP_IfNot opcode */
   483    498       int addrJmp;      /* Address of the OP_Jump opcode */
   484    499       VdbeOp *pOp;      /* Opcode that opens the sorter */
   485    500       int nKey;         /* Number of sorting key columns, including OP_Sequence */
   486    501       KeyInfo *pKI;     /* Original KeyInfo on the sorter table */
   487    502   
   488    503       regPrevKey = pParse->nMem+1;
   489    504       pParse->nMem += pSort->nOBSat;
   490         -    nKey = nExpr - pSort->nOBSat + 1;
   491         -    addrFirst = sqlite3VdbeAddOp1(v, OP_IfNot, regBase+nExpr); VdbeCoverage(v);
          505  +    nKey = nExpr - pSort->nOBSat + bSeq;
          506  +    if( bSeq ){
          507  +      addrFirst = sqlite3VdbeAddOp1(v, OP_IfNot, regBase+nExpr); 
          508  +    }else{
          509  +      addrFirst = sqlite3VdbeAddOp1(v, OP_SequenceTest, pSort->iECursor);
          510  +    }
          511  +    VdbeCoverage(v);
   492    512       sqlite3VdbeAddOp3(v, OP_Compare, regPrevKey, regBase, pSort->nOBSat);
   493    513       pOp = sqlite3VdbeGetOp(v, pSort->addrSortIndex);
   494    514       if( pParse->db->mallocFailed ) return;
   495         -    pOp->p2 = nKey + 1;
          515  +    pOp->p2 = nKey + nData;
   496    516       pKI = pOp->p4.pKeyInfo;
   497    517       memset(pKI->aSortOrder, 0, pKI->nField); /* Makes OP_Jump below testable */
   498    518       sqlite3VdbeChangeP4(v, -1, (char*)pKI, P4_KEYINFO);
   499    519       pOp->p4.pKeyInfo = keyInfoFromExprList(pParse, pSort->pOrderBy, nOBSat, 1);
   500    520       addrJmp = sqlite3VdbeCurrentAddr(v);
   501    521       sqlite3VdbeAddOp3(v, OP_Jump, addrJmp+1, 0, addrJmp+1); VdbeCoverage(v);
   502    522       pSort->labelBkOut = sqlite3VdbeMakeLabel(v);
................................................................................
   622    642     Vdbe *v = pParse->pVdbe;
   623    643     int i;
   624    644     int hasDistinct;        /* True if the DISTINCT keyword is present */
   625    645     int regResult;              /* Start of memory holding result set */
   626    646     int eDest = pDest->eDest;   /* How to dispose of results */
   627    647     int iParm = pDest->iSDParm; /* First argument to disposal method */
   628    648     int nResultCol;             /* Number of result columns */
          649  +  int nPrefixReg = 0;         /* Number of extra registers before regResult */
   629    650   
   630    651     assert( v );
   631    652     assert( pEList!=0 );
   632    653     hasDistinct = pDistinct ? pDistinct->eTnctType : WHERE_DISTINCT_NOOP;
   633    654     if( pSort && pSort->pOrderBy==0 ) pSort = 0;
   634    655     if( pSort==0 && !hasDistinct ){
   635    656       assert( iContinue!=0 );
................................................................................
   637    658     }
   638    659   
   639    660     /* Pull the requested columns.
   640    661     */
   641    662     nResultCol = pEList->nExpr;
   642    663   
   643    664     if( pDest->iSdst==0 ){
          665  +    if( pSort ){
          666  +      nPrefixReg = pSort->pOrderBy->nExpr;
          667  +      if( !(pSort->sortFlags & SORTFLAG_UseSorter) ) nPrefixReg++;
          668  +      pParse->nMem += nPrefixReg;
          669  +    }
   644    670       pDest->iSdst = pParse->nMem+1;
   645    671       pParse->nMem += nResultCol;
   646    672     }else if( pDest->iSdst+nResultCol > pParse->nMem ){
   647    673       /* This is an error condition that can result, for example, when a SELECT
   648    674       ** on the right-hand side of an INSERT contains more result columns than
   649    675       ** there are columns in the table on the left.  The error will be caught
   650    676       ** and reported later.  But we need to make sure enough memory is allocated
................................................................................
   753    779   
   754    780       /* Store the result as data using a unique key.
   755    781       */
   756    782       case SRT_Fifo:
   757    783       case SRT_DistFifo:
   758    784       case SRT_Table:
   759    785       case SRT_EphemTab: {
   760         -      int r1 = sqlite3GetTempReg(pParse);
          786  +      int r1 = sqlite3GetTempRange(pParse, nPrefixReg+1);
   761    787         testcase( eDest==SRT_Table );
   762    788         testcase( eDest==SRT_EphemTab );
   763         -      sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1);
          789  +      sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1+nPrefixReg);
   764    790   #ifndef SQLITE_OMIT_CTE
   765    791         if( eDest==SRT_DistFifo ){
   766    792           /* If the destination is DistFifo, then cursor (iParm+1) is open
   767    793           ** on an ephemeral index. If the current row is already present
   768    794           ** in the index, do not write it to the output. If not, add the
   769    795           ** current row to the index and proceed with writing it to the
   770    796           ** output table as well.  */
................................................................................
   771    797           int addr = sqlite3VdbeCurrentAddr(v) + 4;
   772    798           sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, addr, r1, 0); VdbeCoverage(v);
   773    799           sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm+1, r1);
   774    800           assert( pSort==0 );
   775    801         }
   776    802   #endif
   777    803         if( pSort ){
   778         -        pushOntoSorter(pParse, pSort, p, r1);
          804  +        pushOntoSorter(pParse, pSort, p, r1+nPrefixReg, 1, nPrefixReg);
   779    805         }else{
   780    806           int r2 = sqlite3GetTempReg(pParse);
   781    807           sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, r2);
   782    808           sqlite3VdbeAddOp3(v, OP_Insert, iParm, r1, r2);
   783    809           sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
   784    810           sqlite3ReleaseTempReg(pParse, r2);
   785    811         }
   786         -      sqlite3ReleaseTempReg(pParse, r1);
          812  +      sqlite3ReleaseTempRange(pParse, r1, nPrefixReg+1);
   787    813         break;
   788    814       }
   789    815   
   790    816   #ifndef SQLITE_OMIT_SUBQUERY
   791    817       /* If we are creating a set for an "expr IN (SELECT ...)" construct,
   792    818       ** then there should be a single item on the stack.  Write this
   793    819       ** item into the set table with bogus data.
................................................................................
   797    823         pDest->affSdst =
   798    824                     sqlite3CompareAffinity(pEList->a[0].pExpr, pDest->affSdst);
   799    825         if( pSort ){
   800    826           /* At first glance you would think we could optimize out the
   801    827           ** ORDER BY in this case since the order of entries in the set
   802    828           ** does not matter.  But there might be a LIMIT clause, in which
   803    829           ** case the order does matter */
   804         -        pushOntoSorter(pParse, pSort, p, regResult);
          830  +        pushOntoSorter(pParse, pSort, p, regResult, 1, nPrefixReg);
   805    831         }else{
   806    832           int r1 = sqlite3GetTempReg(pParse);
   807    833           sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult,1,r1, &pDest->affSdst, 1);
   808    834           sqlite3ExprCacheAffinityChange(pParse, regResult, 1);
   809    835           sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, r1);
   810    836           sqlite3ReleaseTempReg(pParse, r1);
   811    837         }
................................................................................
   823    849       /* If this is a scalar select that is part of an expression, then
   824    850       ** store the results in the appropriate memory cell and break out
   825    851       ** of the scan loop.
   826    852       */
   827    853       case SRT_Mem: {
   828    854         assert( nResultCol==1 );
   829    855         if( pSort ){
   830         -        pushOntoSorter(pParse, pSort, p, regResult);
          856  +        pushOntoSorter(pParse, pSort, p, regResult, 1, nPrefixReg);
   831    857         }else{
   832         -        sqlite3ExprCodeMove(pParse, regResult, iParm, 1);
          858  +        assert( regResult==iParm );
   833    859           /* The LIMIT clause will jump out of the loop for us */
   834    860         }
   835    861         break;
   836    862       }
   837    863   #endif /* #ifndef SQLITE_OMIT_SUBQUERY */
   838    864   
   839    865       case SRT_Coroutine:       /* Send data to a co-routine */
   840    866       case SRT_Output: {        /* Return the results */
   841    867         testcase( eDest==SRT_Coroutine );
   842    868         testcase( eDest==SRT_Output );
   843    869         if( pSort ){
   844         -        int r1 = sqlite3GetTempReg(pParse);
   845         -        sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1);
   846         -        pushOntoSorter(pParse, pSort, p, r1);
   847         -        sqlite3ReleaseTempReg(pParse, r1);
          870  +        pushOntoSorter(pParse, pSort, p, regResult, nResultCol, nPrefixReg);
   848    871         }else if( eDest==SRT_Coroutine ){
   849    872           sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm);
   850    873         }else{
   851    874           sqlite3VdbeAddOp2(v, OP_ResultRow, regResult, nResultCol);
   852    875           sqlite3ExprCacheAffinityChange(pParse, regResult, nResultCol);
   853    876         }
   854    877         break;
................................................................................
  1120   1143   ){
  1121   1144     Vdbe *v = pParse->pVdbe;                     /* The prepared statement */
  1122   1145     int addrBreak = sqlite3VdbeMakeLabel(v);     /* Jump here to exit loop */
  1123   1146     int addrContinue = sqlite3VdbeMakeLabel(v);  /* Jump here for next cycle */
  1124   1147     int addr;
  1125   1148     int addrOnce = 0;
  1126   1149     int iTab;
  1127         -  int pseudoTab = 0;
  1128   1150     ExprList *pOrderBy = pSort->pOrderBy;
  1129   1151     int eDest = pDest->eDest;
  1130   1152     int iParm = pDest->iSDParm;
  1131   1153     int regRow;
  1132   1154     int regRowid;
  1133   1155     int nKey;
         1156  +  int iSortTab;                   /* Sorter cursor to read from */
         1157  +  int nSortData;                  /* Trailing values to read from sorter */
         1158  +  u8 p5;                          /* p5 parameter for 1st OP_Column */
         1159  +  int i;
         1160  +  int bSeq;                       /* True if sorter record includes seq. no. */
         1161  +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
         1162  +  struct ExprList_item *aOutEx = p->pEList->a;
         1163  +#endif
  1134   1164   
  1135   1165     if( pSort->labelBkOut ){
  1136   1166       sqlite3VdbeAddOp2(v, OP_Gosub, pSort->regReturn, pSort->labelBkOut);
  1137   1167       sqlite3VdbeAddOp2(v, OP_Goto, 0, addrBreak);
  1138   1168       sqlite3VdbeResolveLabel(v, pSort->labelBkOut);
  1139         -    addrOnce = sqlite3CodeOnce(pParse); VdbeCoverage(v);
  1140   1169     }
  1141   1170     iTab = pSort->iECursor;
  1142         -  regRow = sqlite3GetTempReg(pParse);
  1143   1171     if( eDest==SRT_Output || eDest==SRT_Coroutine ){
  1144         -    pseudoTab = pParse->nTab++;
  1145         -    sqlite3VdbeAddOp3(v, OP_OpenPseudo, pseudoTab, regRow, nColumn);
  1146   1172       regRowid = 0;
         1173  +    regRow = pDest->iSdst;
         1174  +    nSortData = nColumn;
  1147   1175     }else{
  1148   1176       regRowid = sqlite3GetTempReg(pParse);
         1177  +    regRow = sqlite3GetTempReg(pParse);
         1178  +    nSortData = 1;
  1149   1179     }
  1150   1180     nKey = pOrderBy->nExpr - pSort->nOBSat;
  1151   1181     if( pSort->sortFlags & SORTFLAG_UseSorter ){
  1152   1182       int regSortOut = ++pParse->nMem;
  1153         -    int ptab2 = pParse->nTab++;
  1154         -    sqlite3VdbeAddOp3(v, OP_OpenPseudo, ptab2, regSortOut, nKey+2);
         1183  +    iSortTab = pParse->nTab++;
         1184  +    if( pSort->labelBkOut ){
         1185  +      addrOnce = sqlite3CodeOnce(pParse); VdbeCoverage(v);
         1186  +    }
         1187  +    sqlite3VdbeAddOp3(v, OP_OpenPseudo, iSortTab, regSortOut, nKey+1+nSortData);
  1155   1188       if( addrOnce ) sqlite3VdbeJumpHere(v, addrOnce);
  1156   1189       addr = 1 + sqlite3VdbeAddOp2(v, OP_SorterSort, iTab, addrBreak);
  1157   1190       VdbeCoverage(v);
  1158   1191       codeOffset(v, p->iOffset, addrContinue);
  1159   1192       sqlite3VdbeAddOp2(v, OP_SorterData, iTab, regSortOut);
  1160         -    sqlite3VdbeAddOp3(v, OP_Column, ptab2, nKey+1, regRow);
  1161         -    sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE);
         1193  +    p5 = OPFLAG_CLEARCACHE;
         1194  +    bSeq = 0;
  1162   1195     }else{
  1163         -    if( addrOnce ) sqlite3VdbeJumpHere(v, addrOnce);
  1164   1196       addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak); VdbeCoverage(v);
  1165   1197       codeOffset(v, p->iOffset, addrContinue);
  1166         -    sqlite3VdbeAddOp3(v, OP_Column, iTab, nKey+1, regRow);
         1198  +    iSortTab = iTab;
         1199  +    p5 = 0;
         1200  +    bSeq = 1;
         1201  +  }
         1202  +  for(i=0; i<nSortData; i++){
         1203  +    sqlite3VdbeAddOp3(v, OP_Column, iSortTab, nKey+bSeq+i, regRow+i);
         1204  +    if( i==0 ) sqlite3VdbeChangeP5(v, p5);
         1205  +    VdbeComment((v, "%s", aOutEx[i].zName ? aOutEx[i].zName : aOutEx[i].zSpan));
  1167   1206     }
  1168   1207     switch( eDest ){
  1169   1208       case SRT_Table:
  1170   1209       case SRT_EphemTab: {
  1171   1210         testcase( eDest==SRT_Table );
  1172   1211         testcase( eDest==SRT_EphemTab );
  1173   1212         sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, regRowid);
................................................................................
  1188   1227         assert( nColumn==1 );
  1189   1228         sqlite3ExprCodeMove(pParse, regRow, iParm, 1);
  1190   1229         /* The LIMIT clause will terminate the loop for us */
  1191   1230         break;
  1192   1231       }
  1193   1232   #endif
  1194   1233       default: {
  1195         -      int i;
  1196   1234         assert( eDest==SRT_Output || eDest==SRT_Coroutine ); 
  1197   1235         testcase( eDest==SRT_Output );
  1198   1236         testcase( eDest==SRT_Coroutine );
  1199         -      for(i=0; i<nColumn; i++){
  1200         -        assert( regRow!=pDest->iSdst+i );
  1201         -        sqlite3VdbeAddOp3(v, OP_Column, pseudoTab, i, pDest->iSdst+i);
  1202         -        if( i==0 ){
  1203         -          sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE);
  1204         -        }
  1205         -      }
  1206   1237         if( eDest==SRT_Output ){
  1207   1238           sqlite3VdbeAddOp2(v, OP_ResultRow, pDest->iSdst, nColumn);
  1208   1239           sqlite3ExprCacheAffinityChange(pParse, pDest->iSdst, nColumn);
  1209   1240         }else{
  1210   1241           sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm);
  1211   1242         }
  1212   1243         break;
  1213   1244       }
  1214   1245     }
  1215         -  sqlite3ReleaseTempReg(pParse, regRow);
  1216         -  sqlite3ReleaseTempReg(pParse, regRowid);
  1217         -
         1246  +  if( regRowid ){
         1247  +    sqlite3ReleaseTempReg(pParse, regRow);
         1248  +    sqlite3ReleaseTempReg(pParse, regRowid);
         1249  +  }
  1218   1250     /* The bottom of the loop
  1219   1251     */
  1220   1252     sqlite3VdbeResolveLabel(v, addrContinue);
  1221   1253     if( pSort->sortFlags & SORTFLAG_UseSorter ){
  1222   1254       sqlite3VdbeAddOp2(v, OP_SorterNext, iTab, addr); VdbeCoverage(v);
  1223   1255     }else{
  1224   1256       sqlite3VdbeAddOp2(v, OP_Next, iTab, addr); VdbeCoverage(v);
................................................................................
  4751   4783     */
  4752   4784     if( sSort.pOrderBy ){
  4753   4785       KeyInfo *pKeyInfo;
  4754   4786       pKeyInfo = keyInfoFromExprList(pParse, sSort.pOrderBy, 0, 0);
  4755   4787       sSort.iECursor = pParse->nTab++;
  4756   4788       sSort.addrSortIndex =
  4757   4789         sqlite3VdbeAddOp4(v, OP_OpenEphemeral,
  4758         -                           sSort.iECursor, sSort.pOrderBy->nExpr+2, 0,
  4759         -                           (char*)pKeyInfo, P4_KEYINFO);
         4790  +          sSort.iECursor, sSort.pOrderBy->nExpr+1+pEList->nExpr, 0,
         4791  +          (char*)pKeyInfo, P4_KEYINFO
         4792  +      );
  4760   4793     }else{
  4761   4794       sSort.addrSortIndex = -1;
  4762   4795     }
  4763   4796   
  4764   4797     /* If the output is destined for a temporary table, open that table.
  4765   4798     */
  4766   4799     if( pDest->eDest==SRT_EphemTab ){
................................................................................
  4883   4916       ** SELECT statement.
  4884   4917       */
  4885   4918       memset(&sNC, 0, sizeof(sNC));
  4886   4919       sNC.pParse = pParse;
  4887   4920       sNC.pSrcList = pTabList;
  4888   4921       sNC.pAggInfo = &sAggInfo;
  4889   4922       sAggInfo.mnReg = pParse->nMem+1;
  4890         -    sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr+1 : 0;
         4923  +    sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0;
  4891   4924       sAggInfo.pGroupBy = pGroupBy;
  4892   4925       sqlite3ExprAnalyzeAggList(&sNC, pEList);
  4893   4926       sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy);
  4894   4927       if( pHaving ){
  4895   4928         sqlite3ExprAnalyzeAggregates(&sNC, pHaving);
  4896   4929       }
  4897   4930       sAggInfo.nAccumulator = sAggInfo.nColumn;
................................................................................
  4976   5009   
  4977   5010           explainTempTable(pParse, 
  4978   5011               (sDistinct.isTnct && (p->selFlags&SF_Distinct)==0) ?
  4979   5012                       "DISTINCT" : "GROUP BY");
  4980   5013   
  4981   5014           groupBySort = 1;
  4982   5015           nGroupBy = pGroupBy->nExpr;
  4983         -        nCol = nGroupBy + 1;
  4984         -        j = nGroupBy+1;
         5016  +        nCol = nGroupBy;
         5017  +        j = nGroupBy;
  4985   5018           for(i=0; i<sAggInfo.nColumn; i++){
  4986   5019             if( sAggInfo.aCol[i].iSorterColumn>=j ){
  4987   5020               nCol++;
  4988   5021               j++;
  4989   5022             }
  4990   5023           }
  4991   5024           regBase = sqlite3GetTempRange(pParse, nCol);
  4992   5025           sqlite3ExprCacheClear(pParse);
  4993   5026           sqlite3ExprCodeExprList(pParse, pGroupBy, regBase, 0);
  4994         -        sqlite3VdbeAddOp2(v, OP_Sequence, sAggInfo.sortingIdx,regBase+nGroupBy);
  4995         -        j = nGroupBy+1;
         5027  +        j = nGroupBy;
  4996   5028           for(i=0; i<sAggInfo.nColumn; i++){
  4997   5029             struct AggInfo_col *pCol = &sAggInfo.aCol[i];
  4998   5030             if( pCol->iSorterColumn>=j ){
  4999   5031               int r1 = j + regBase;
  5000   5032               int r2;
  5001   5033   
  5002   5034               r2 = sqlite3ExprCodeGetColumn(pParse, 

Changes to src/shell.c.

   471    471     int cnt;               /* Number of records displayed so far */
   472    472     FILE *out;             /* Write results here */
   473    473     FILE *traceOut;        /* Output for sqlite3_trace() */
   474    474     int nErr;              /* Number of errors seen */
   475    475     int mode;              /* An output mode setting */
   476    476     int writableSchema;    /* True if PRAGMA writable_schema=ON */
   477    477     int showHeader;        /* True to show column names in List or Column mode */
          478  +  unsigned shellFlgs;    /* Various flags */
   478    479     char *zDestTable;      /* Name of destination table when MODE_Insert */
   479    480     char separator[20];    /* Separator character for MODE_List */
   480    481     char newline[20];      /* Record separator in MODE_Csv */
   481    482     int colWidth[100];     /* Requested width of each column when in column mode*/
   482    483     int actualWidth[100];  /* Actual width of each column */
   483    484     char nullvalue[20];    /* The text to print when a NULL comes back from
   484    485                            ** the database */
................................................................................
   494    495     int iIndent;           /* Index of current op in aiIndent[] */
   495    496   #if defined(SQLITE_ENABLE_SESSION)
   496    497     int nSession;             /* Number of active sessions */
   497    498     OpenSession aSession[4];  /* Array of sessions.  [0] is in focus. */
   498    499   #endif
   499    500   };
   500    501   
          502  +/*
          503  +** These are the allowed shellFlgs values
          504  +*/
          505  +#define SHFLG_Scratch     0x00001     /* The --scratch option is used */
          506  +#define SHFLG_Pagecache   0x00002     /* The --pagecache option is used */
          507  +#define SHFLG_Lookaside   0x00004     /* Lookaside memory is used */
          508  +
   501    509   /*
   502    510   ** These are the allowed modes.
   503    511   */
   504    512   #define MODE_Line     0  /* One column per line.  Blank line between records */
   505    513   #define MODE_Column   1  /* One record per line in neat columns */
   506    514   #define MODE_List     2  /* One record per line with a separator */
   507    515   #define MODE_Semi     3  /* Same as MODE_List but append ";" to each line */
................................................................................
  1110   1118       
  1111   1119       iHiwtr = iCur = -1;
  1112   1120       sqlite3_status(SQLITE_STATUS_MEMORY_USED, &iCur, &iHiwtr, bReset);
  1113   1121       fprintf(pArg->out, "Memory Used:                         %d (max %d) bytes\n", iCur, iHiwtr);
  1114   1122       iHiwtr = iCur = -1;
  1115   1123       sqlite3_status(SQLITE_STATUS_MALLOC_COUNT, &iCur, &iHiwtr, bReset);
  1116   1124       fprintf(pArg->out, "Number of Outstanding Allocations:   %d (max %d)\n", iCur, iHiwtr);
  1117         -/*
  1118         -** Not currently used by the CLI.
  1119         -**    iHiwtr = iCur = -1;
  1120         -**    sqlite3_status(SQLITE_STATUS_PAGECACHE_USED, &iCur, &iHiwtr, bReset);
  1121         -**    fprintf(pArg->out, "Number of Pcache Pages Used:         %d (max %d) pages\n", iCur, iHiwtr);
  1122         -*/
         1125  +    if( pArg->shellFlgs & SHFLG_Pagecache ){
         1126  +      iHiwtr = iCur = -1;
         1127  +      sqlite3_status(SQLITE_STATUS_PAGECACHE_USED, &iCur, &iHiwtr, bReset);
         1128  +      fprintf(pArg->out, "Number of Pcache Pages Used:         %d (max %d) pages\n", iCur, iHiwtr);
         1129  +    }
  1123   1130       iHiwtr = iCur = -1;
  1124   1131       sqlite3_status(SQLITE_STATUS_PAGECACHE_OVERFLOW, &iCur, &iHiwtr, bReset);
  1125   1132       fprintf(pArg->out, "Number of Pcache Overflow Bytes:     %d (max %d) bytes\n", iCur, iHiwtr);
  1126         -/*
  1127         -** Not currently used by the CLI.
  1128         -**    iHiwtr = iCur = -1;
  1129         -**    sqlite3_status(SQLITE_STATUS_SCRATCH_USED, &iCur, &iHiwtr, bReset);
  1130         -**    fprintf(pArg->out, "Number of Scratch Allocations Used:  %d (max %d)\n", iCur, iHiwtr);
  1131         -*/
         1133  +    if( pArg->shellFlgs & SHFLG_Scratch ){
         1134  +      iHiwtr = iCur = -1;
         1135  +      sqlite3_status(SQLITE_STATUS_SCRATCH_USED, &iCur, &iHiwtr, bReset);
         1136  +      fprintf(pArg->out, "Number of Scratch Allocations Used:  %d (max %d)\n", iCur, iHiwtr);
         1137  +    }
  1132   1138       iHiwtr = iCur = -1;
  1133   1139       sqlite3_status(SQLITE_STATUS_SCRATCH_OVERFLOW, &iCur, &iHiwtr, bReset);
  1134   1140       fprintf(pArg->out, "Number of Scratch Overflow Bytes:    %d (max %d) bytes\n", iCur, iHiwtr);
  1135   1141       iHiwtr = iCur = -1;
  1136   1142       sqlite3_status(SQLITE_STATUS_MALLOC_SIZE, &iCur, &iHiwtr, bReset);
  1137   1143       fprintf(pArg->out, "Largest Allocation:                  %d bytes\n", iHiwtr);
  1138   1144       iHiwtr = iCur = -1;
................................................................................
  1145   1151       iHiwtr = iCur = -1;
  1146   1152       sqlite3_status(SQLITE_STATUS_PARSER_STACK, &iCur, &iHiwtr, bReset);
  1147   1153       fprintf(pArg->out, "Deepest Parser Stack:                %d (max %d)\n", iCur, iHiwtr);
  1148   1154   #endif
  1149   1155     }
  1150   1156   
  1151   1157     if( pArg && pArg->out && db ){
  1152         -    iHiwtr = iCur = -1;
  1153         -    sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_USED, &iCur, &iHiwtr, bReset);
  1154         -    fprintf(pArg->out, "Lookaside Slots Used:                %d (max %d)\n", iCur, iHiwtr);
  1155         -    sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_HIT, &iCur, &iHiwtr, bReset);
  1156         -    fprintf(pArg->out, "Successful lookaside attempts:       %d\n", iHiwtr);
  1157         -    sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE, &iCur, &iHiwtr, bReset);
  1158         -    fprintf(pArg->out, "Lookaside failures due to size:      %d\n", iHiwtr);
  1159         -    sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL, &iCur, &iHiwtr, bReset);
  1160         -    fprintf(pArg->out, "Lookaside failures due to OOM:       %d\n", iHiwtr);
         1158  +    if( pArg->shellFlgs & SHFLG_Lookaside ){
         1159  +      iHiwtr = iCur = -1;
         1160  +      sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_USED, &iCur, &iHiwtr, bReset);
         1161  +      fprintf(pArg->out, "Lookaside Slots Used:                %d (max %d)\n", iCur, iHiwtr);
         1162  +      sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_HIT, &iCur, &iHiwtr, bReset);
         1163  +      fprintf(pArg->out, "Successful lookaside attempts:       %d\n", iHiwtr);
         1164  +      sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE, &iCur, &iHiwtr, bReset);
         1165  +      fprintf(pArg->out, "Lookaside failures due to size:      %d\n", iHiwtr);
         1166  +      sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL, &iCur, &iHiwtr, bReset);
         1167  +      fprintf(pArg->out, "Lookaside failures due to OOM:       %d\n", iHiwtr);
         1168  +    }
  1161   1169       iHiwtr = iCur = -1;
  1162   1170       sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_USED, &iCur, &iHiwtr, bReset);
  1163   1171       fprintf(pArg->out, "Pager Heap Usage:                    %d bytes\n", iCur);    iHiwtr = iCur = -1;
  1164   1172       sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_HIT, &iCur, &iHiwtr, 1);
  1165   1173       fprintf(pArg->out, "Page cache hits:                     %d\n", iCur);
  1166   1174       iHiwtr = iCur = -1;
  1167   1175       sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_MISS, &iCur, &iHiwtr, 1);
................................................................................
  4058   4066     "   -heap SIZE           Size of heap for memsys3 or memsys5\n"
  4059   4067   #endif
  4060   4068     "   -help                show this message\n"
  4061   4069     "   -html                set output mode to HTML\n"
  4062   4070     "   -interactive         force interactive I/O\n"
  4063   4071     "   -line                set output mode to 'line'\n"
  4064   4072     "   -list                set output mode to 'list'\n"
         4073  +  "   -lookaside SIZE N    use N entries of SZ bytes for lookaside memory\n"
  4065   4074     "   -mmap N              default mmap size set to N\n"
  4066   4075   #ifdef SQLITE_ENABLE_MULTIPLEX
  4067   4076     "   -multiplex           enable the multiplexor VFS\n"
  4068   4077   #endif
  4069   4078     "   -newline SEP         set newline character(s) for CSV\n"
  4070   4079     "   -nullvalue TEXT      set text string for NULL values. Default ''\n"
         4080  +  "   -pagecache SIZE N    use N slots of SZ bytes each for page cache memory\n"
         4081  +  "   -scratch SIZE N      use N slots of SZ bytes each for scratch memory\n"
  4071   4082     "   -separator SEP       set output field separator. Default: '|'\n"
  4072   4083     "   -stats               print memory stats before each finalize\n"
  4073   4084     "   -version             show SQLite version\n"
  4074   4085     "   -vfs NAME            use NAME as the default VFS\n"
  4075   4086   #ifdef SQLITE_ENABLE_VFSTRACE
  4076   4087     "   -vfstrace            enable tracing of all VFS calls\n"
  4077   4088   #endif
................................................................................
  4094   4105   */
  4095   4106   static void main_init(ShellState *data) {
  4096   4107     memset(data, 0, sizeof(*data));
  4097   4108     data->mode = MODE_List;
  4098   4109     memcpy(data->separator,"|", 2);
  4099   4110     memcpy(data->newline,"\r\n", 3);
  4100   4111     data->showHeader = 0;
         4112  +  data->shellFlgs = SHFLG_Lookaside;
  4101   4113     sqlite3_config(SQLITE_CONFIG_URI, 1);
  4102   4114     sqlite3_config(SQLITE_CONFIG_LOG, shellLog, data);
         4115  +  sqlite3_config(SQLITE_CONFIG_MULTITHREAD);
  4103   4116     sqlite3_snprintf(sizeof(mainPrompt), mainPrompt,"sqlite> ");
  4104   4117     sqlite3_snprintf(sizeof(continuePrompt), continuePrompt,"   ...> ");
  4105         -  sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
  4106   4118   }
  4107   4119   
  4108   4120   /*
  4109   4121   ** Output text to the console in a font that attracts extra attention.
  4110   4122   */
  4111   4123   #ifdef _WIN32
  4112   4124   static void printBold(const char *zText){
................................................................................
  4207   4219         sqlite3_int64 szHeap;
  4208   4220   
  4209   4221         zSize = cmdline_option_value(argc, argv, ++i);
  4210   4222         szHeap = integerValue(zSize);
  4211   4223         if( szHeap>0x7fff0000 ) szHeap = 0x7fff0000;
  4212   4224         sqlite3_config(SQLITE_CONFIG_HEAP, malloc((int)szHeap), (int)szHeap, 64);
  4213   4225   #endif
         4226  +    }else if( strcmp(z,"-scratch")==0 ){
         4227  +      int n, sz;
         4228  +      sz = (int)integerValue(cmdline_option_value(argc,argv,++i));
         4229  +      if( sz>400000 ) sz = 400000;
         4230  +      if( sz<2500 ) sz = 2500;
         4231  +      n = (int)integerValue(cmdline_option_value(argc,argv,++i));
         4232  +      if( n>10 ) n = 10;
         4233  +      if( n<1 ) n = 1;
         4234  +      sqlite3_config(SQLITE_CONFIG_SCRATCH, malloc(n*sz+1), sz, n);
         4235  +      data.shellFlgs |= SHFLG_Scratch;
         4236  +    }else if( strcmp(z,"-pagecache")==0 ){
         4237  +      int n, sz;
         4238  +      sz = (int)integerValue(cmdline_option_value(argc,argv,++i));
         4239  +      if( sz>70000 ) sz = 70000;
         4240  +      if( sz<800 ) sz = 800;
         4241  +      n = (int)integerValue(cmdline_option_value(argc,argv,++i));
         4242  +      if( n<10 ) n = 10;
         4243  +      sqlite3_config(SQLITE_CONFIG_PAGECACHE, malloc(n*sz+1), sz, n);
         4244  +      data.shellFlgs |= SHFLG_Pagecache;
         4245  +    }else if( strcmp(z,"-lookaside")==0 ){
         4246  +      int n, sz;
         4247  +      sz = (int)integerValue(cmdline_option_value(argc,argv,++i));
         4248  +      if( sz<0 ) sz = 0;
         4249  +      n = (int)integerValue(cmdline_option_value(argc,argv,++i));
         4250  +      if( n<0 ) n = 0;
         4251  +      sqlite3_config(SQLITE_CONFIG_LOOKASIDE, sz, n);
         4252  +      if( sz*n==0 ) data.shellFlgs &= ~SHFLG_Lookaside;
  4214   4253   #ifdef SQLITE_ENABLE_VFSTRACE
  4215   4254       }else if( strcmp(z,"-vfstrace")==0 ){
  4216   4255         extern int vfstrace_register(
  4217   4256            const char *zTraceName,
  4218   4257            const char *zOldVfsName,
  4219   4258            int (*xOut)(const char*,void*),
  4220   4259            void *pOutArg,
................................................................................
  4322   4361         return 0;
  4323   4362       }else if( strcmp(z,"-interactive")==0 ){
  4324   4363         stdin_is_interactive = 1;
  4325   4364       }else if( strcmp(z,"-batch")==0 ){
  4326   4365         stdin_is_interactive = 0;
  4327   4366       }else if( strcmp(z,"-heap")==0 ){
  4328   4367         i++;
         4368  +    }else if( strcmp(z,"-scratch")==0 ){
         4369  +      i+=2;
         4370  +    }else if( strcmp(z,"-pagecache")==0 ){
         4371  +      i+=2;
         4372  +    }else if( strcmp(z,"-lookaside")==0 ){
         4373  +      i+=2;
  4329   4374       }else if( strcmp(z,"-mmap")==0 ){
  4330   4375         i++;
  4331   4376       }else if( strcmp(z,"-vfs")==0 ){
  4332   4377         i++;
  4333   4378   #ifdef SQLITE_ENABLE_VFSTRACE
  4334   4379       }else if( strcmp(z,"-vfstrace")==0 ){
  4335   4380         i++;

Changes to src/sqlite.h.in.

  3069   3069   **
  3070   3070   ** [[SQLITE_LIMIT_VARIABLE_NUMBER]]
  3071   3071   ** ^(<dt>SQLITE_LIMIT_VARIABLE_NUMBER</dt>
  3072   3072   ** <dd>The maximum index number of any [parameter] in an SQL statement.)^
  3073   3073   **
  3074   3074   ** [[SQLITE_LIMIT_TRIGGER_DEPTH]] ^(<dt>SQLITE_LIMIT_TRIGGER_DEPTH</dt>
  3075   3075   ** <dd>The maximum depth of recursion for triggers.</dd>)^
         3076  +**
         3077  +** [[SQLITE_LIMIT_WORKER_THREADS]] ^(<dt>SQLITE_LIMIT_WORKER_THREADS</dt>
         3078  +** <dd>The maximum number of auxiliary worker threads that a single
         3079  +** [prepared statement] may start.</dd>)^
  3076   3080   ** </dl>
  3077   3081   */
  3078   3082   #define SQLITE_LIMIT_LENGTH                    0
  3079   3083   #define SQLITE_LIMIT_SQL_LENGTH                1
  3080   3084   #define SQLITE_LIMIT_COLUMN                    2
  3081   3085   #define SQLITE_LIMIT_EXPR_DEPTH                3
  3082   3086   #define SQLITE_LIMIT_COMPOUND_SELECT           4
  3083   3087   #define SQLITE_LIMIT_VDBE_OP                   5
  3084   3088   #define SQLITE_LIMIT_FUNCTION_ARG              6
  3085   3089   #define SQLITE_LIMIT_ATTACHED                  7
  3086   3090   #define SQLITE_LIMIT_LIKE_PATTERN_LENGTH       8
  3087   3091   #define SQLITE_LIMIT_VARIABLE_NUMBER           9
  3088   3092   #define SQLITE_LIMIT_TRIGGER_DEPTH            10
         3093  +#define SQLITE_LIMIT_WORKER_THREADS           11
  3089   3094   
  3090   3095   /*
  3091   3096   ** CAPI3REF: Compiling An SQL Statement
  3092   3097   ** KEYWORDS: {SQL statement compiler}
  3093   3098   **
  3094   3099   ** To execute an SQL query, it must first be compiled into a byte-code
  3095   3100   ** program using one of these routines.
................................................................................
  6156   6161   #define SQLITE_TESTCTRL_SCRATCHMALLOC           17
  6157   6162   #define SQLITE_TESTCTRL_LOCALTIME_FAULT         18
  6158   6163   #define SQLITE_TESTCTRL_EXPLAIN_STMT            19
  6159   6164   #define SQLITE_TESTCTRL_NEVER_CORRUPT           20
  6160   6165   #define SQLITE_TESTCTRL_VDBE_COVERAGE           21
  6161   6166   #define SQLITE_TESTCTRL_BYTEORDER               22
  6162   6167   #define SQLITE_TESTCTRL_ISINIT                  23
  6163         -#define SQLITE_TESTCTRL_LAST                    23
         6168  +#define SQLITE_TESTCTRL_SORTER_MMAP             24
         6169  +#define SQLITE_TESTCTRL_LAST                    24
  6164   6170   
  6165   6171   /*
  6166   6172   ** CAPI3REF: SQLite Runtime Status
  6167   6173   **
  6168   6174   ** ^This interface is used to retrieve runtime status information
  6169   6175   ** about the performance of SQLite, and optionally to reset various
  6170   6176   ** highwater marks.  ^The first argument is an integer code for

Changes to src/sqliteInt.h.

   429    429   ** Provide a default value for SQLITE_TEMP_STORE in case it is not specified
   430    430   ** on the command-line
   431    431   */
   432    432   #ifndef SQLITE_TEMP_STORE
   433    433   # define SQLITE_TEMP_STORE 1
   434    434   # define SQLITE_TEMP_STORE_xc 1  /* Exclude from ctime.c */
   435    435   #endif
          436  +
          437  +/*
          438  +** If no value has been provided for SQLITE_MAX_WORKER_THREADS, or if
          439  +** SQLITE_TEMP_STORE is set to 3 (never use temporary files), set it 
          440  +** to zero.
          441  +*/
          442  +#if SQLITE_TEMP_STORE==3 || SQLITE_THREADSAFE==0
          443  +# undef SQLITE_MAX_WORKER_THREADS
          444  +# define SQLITE_MAX_WORKER_THREADS 0
          445  +#endif
          446  +#ifndef SQLITE_MAX_WORKER_THREADS
          447  +# define SQLITE_MAX_WORKER_THREADS 8
          448  +#endif
          449  +#ifndef SQLITE_DEFAULT_WORKER_THREADS
          450  +# define SQLITE_DEFAULT_WORKER_THREADS 0
          451  +#endif
          452  +#if SQLITE_DEFAULT_WORKER_THREADS>SQLITE_MAX_WORKER_THREADS
          453  +# undef SQLITE_MAX_WORKER_THREADS
          454  +# define SQLITE_MAX_WORKER_THREADS SQLITE_DEFAULT_WORKER_THREADS
          455  +#endif
          456  +
   436    457   
   437    458   /*
   438    459   ** GCC does not define the offsetof() macro so we'll have to do it
   439    460   ** ourselves.
   440    461   */
   441    462   #ifndef offsetof
   442    463   #define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD))
................................................................................
   814    835   typedef struct NameContext NameContext;
   815    836   typedef struct Parse Parse;
   816    837   typedef struct PreUpdate PreUpdate;
   817    838   typedef struct PrintfArguments PrintfArguments;
   818    839   typedef struct RowSet RowSet;
   819    840   typedef struct Savepoint Savepoint;
   820    841   typedef struct Select Select;
          842  +typedef struct SQLiteThread SQLiteThread;
   821    843   typedef struct SelectDest SelectDest;
   822    844   typedef struct SrcList SrcList;
   823    845   typedef struct StrAccum StrAccum;
   824    846   typedef struct Table Table;
   825    847   typedef struct TableLock TableLock;
   826    848   typedef struct Token Token;
   827    849   typedef struct Trigger Trigger;
................................................................................
   916    938   #define DB_UnresetViews    0x0002  /* Some views have defined column names */
   917    939   #define DB_Empty           0x0004  /* The file is empty (length 0 bytes) */
   918    940   
   919    941   /*
   920    942   ** The number of different kinds of things that can be limited
   921    943   ** using the sqlite3_limit() interface.
   922    944   */
   923         -#define SQLITE_N_LIMIT (SQLITE_LIMIT_TRIGGER_DEPTH+1)
          945  +#define SQLITE_N_LIMIT (SQLITE_LIMIT_WORKER_THREADS+1)
   924    946   
   925    947   /*
   926    948   ** Lookaside malloc is a set of fixed-size buffers that can be used
   927    949   ** to satisfy small transient memory allocation requests for objects
   928    950   ** associated with a particular database connection.  The use of
   929    951   ** lookaside malloc provides a significant performance enhancement
   930    952   ** (approx 10%) by avoiding numerous malloc/free requests while parsing
................................................................................
   993   1015     u8 vtabOnConflict;            /* Value to return for s3_vtab_on_conflict() */
   994   1016     u8 isTransactionSavepoint;    /* True if the outermost savepoint is a TS */
   995   1017     int nextPagesize;             /* Pagesize after VACUUM if >0 */
   996   1018     u32 magic;                    /* Magic number for detect library misuse */
   997   1019     int nChange;                  /* Value returned by sqlite3_changes() */
   998   1020     int nTotalChange;             /* Value returned by sqlite3_total_changes() */
   999   1021     int aLimit[SQLITE_N_LIMIT];   /* Limits */
         1022  +  int nMaxSorterMmap;           /* Maximum size of regions mapped by sorter */
  1000   1023     struct sqlite3InitInfo {      /* Information used during initialization */
  1001   1024       int newTnum;                /* Rootpage of table being initialized */
  1002   1025       u8 iDb;                     /* Which db file is being initialized */
  1003   1026       u8 busy;                    /* TRUE if currently initializing */
  1004   1027       u8 orphanTrigger;           /* Last statement is orphaned TEMP trigger */
  1005   1028     } init;
  1006   1029     int nVdbeActive;              /* Number of VDBEs currently running */
................................................................................
  1663   1686   ** The r1 and r2 member variables are only used by the optimized comparison
  1664   1687   ** functions vdbeRecordCompareInt() and vdbeRecordCompareString().
  1665   1688   */
  1666   1689   struct UnpackedRecord {
  1667   1690     KeyInfo *pKeyInfo;  /* Collation and sort-order information */
  1668   1691     u16 nField;         /* Number of entries in apMem[] */
  1669   1692     i8 default_rc;      /* Comparison result if keys are equal */
  1670         -  u8 isCorrupt;       /* Corruption detected by xRecordCompare() */
         1693  +  u8 errCode;         /* Error detected by xRecordCompare (CORRUPT or NOMEM) */
  1671   1694     Mem *aMem;          /* Values */
  1672   1695     int r1;             /* Value to return if (lhs > rhs) */
  1673   1696     int r2;             /* Value to return if (rhs < lhs) */
  1674   1697   };
  1675   1698   
  1676   1699   
  1677   1700   /*
................................................................................
  3711   3734   #endif
  3712   3735   #define MEMTYPE_HEAP       0x01  /* General heap allocations */
  3713   3736   #define MEMTYPE_LOOKASIDE  0x02  /* Might have been lookaside memory */
  3714   3737   #define MEMTYPE_SCRATCH    0x04  /* Scratch allocations */
  3715   3738   #define MEMTYPE_PCACHE     0x08  /* Page cache allocations */
  3716   3739   #define MEMTYPE_DB         0x10  /* Uses sqlite3DbMalloc, not sqlite_malloc */
  3717   3740   
         3741  +/*
         3742  +** Threading interface
         3743  +*/
         3744  +#if SQLITE_MAX_WORKER_THREADS>0
         3745  +int sqlite3ThreadCreate(SQLiteThread**,void*(*)(void*),void*);
         3746  +int sqlite3ThreadJoin(SQLiteThread*, void**);
         3747  +#endif
         3748  +
  3718   3749   #endif /* _SQLITEINT_H_ */

Changes to src/test1.c.

  2712   2712     return TCL_OK;
  2713   2713   
  2714   2714   bad_args:
  2715   2715     Tcl_AppendResult(interp, "wrong # args: should be \"",
  2716   2716         Tcl_GetStringFromObj(objv[0], 0), " <DB> <utf8> <utf16le> <utf16be>", 0);
  2717   2717     return TCL_ERROR;
  2718   2718   }
         2719  +
         2720  +/*
         2721  +** Usage: add_test_utf16bin_collate <db ptr>
         2722  +**
         2723  +** Add a utf-16 collation sequence named "utf16bin" to the database
         2724  +** handle. This collation sequence compares arguments in the same way as the
         2725  +** built-in collation "binary".
         2726  +*/
         2727  +static int test_utf16bin_collate_func(
         2728  +  void *pCtx, 
         2729  +  int nA, const void *zA,
         2730  +  int nB, const void *zB
         2731  +){
         2732  +  int nCmp = (nA>nB ? nB : nA);
         2733  +  int res = memcmp(zA, zB, nCmp);
         2734  +  if( res==0 ) res = nA - nB;
         2735  +  return res;
         2736  +}
         2737  +static int test_utf16bin_collate(
         2738  +  void * clientData,
         2739  +  Tcl_Interp *interp,
         2740  +  int objc,
         2741  +  Tcl_Obj *CONST objv[]
         2742  +){
         2743  +  sqlite3 *db;
         2744  +  int rc;
         2745  +
         2746  +  if( objc!=2 ) goto bad_args;
         2747  +  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
         2748  +
         2749  +  rc = sqlite3_create_collation(db, "utf16bin", SQLITE_UTF16, 0, 
         2750  +      test_utf16bin_collate_func
         2751  +  );
         2752  +  if( sqlite3TestErrCode(interp, db, rc) ) return TCL_ERROR;
         2753  +  return TCL_OK;
         2754  +
         2755  +bad_args:
         2756  +  Tcl_WrongNumArgs(interp, 1, objv, "DB");
         2757  +  return TCL_ERROR;
         2758  +}
  2719   2759   
  2720   2760   /*
  2721   2761   ** When the collation needed callback is invoked, record the name of 
  2722   2762   ** the requested collating function here.  The recorded name is linked
  2723   2763   ** to a TCL variable and used to make sure that the requested collation
  2724   2764   ** name is correct.
  2725   2765   */
................................................................................
  5891   5931     Tcl_Obj *CONST objv[]
  5892   5932   ){
  5893   5933     struct Verb {
  5894   5934       const char *zName;
  5895   5935       int i;
  5896   5936     } aVerb[] = {
  5897   5937       { "SQLITE_TESTCTRL_LOCALTIME_FAULT", SQLITE_TESTCTRL_LOCALTIME_FAULT }, 
         5938  +    { "SQLITE_TESTCTRL_SORTER_MMAP", SQLITE_TESTCTRL_SORTER_MMAP }, 
  5898   5939     };
  5899   5940     int iVerb;
  5900   5941     int iFlag;
  5901   5942     int rc;
  5902   5943   
  5903   5944     if( objc<2 ){
  5904   5945       Tcl_WrongNumArgs(interp, 1, objv, "VERB ARGS...");
................................................................................
  5918   5959           Tcl_WrongNumArgs(interp, 2, objv, "ONOFF");
  5919   5960           return TCL_ERROR;
  5920   5961         }
  5921   5962         if( Tcl_GetBooleanFromObj(interp, objv[2], &val) ) return TCL_ERROR;
  5922   5963         sqlite3_test_control(SQLITE_TESTCTRL_LOCALTIME_FAULT, val);
  5923   5964         break;
  5924   5965       }
         5966  +
         5967  +    case SQLITE_TESTCTRL_SORTER_MMAP: {
         5968  +      int val;
         5969  +      sqlite3 *db;
         5970  +      if( objc!=4 ){
         5971  +        Tcl_WrongNumArgs(interp, 2, objv, "DB LIMIT");
         5972  +        return TCL_ERROR;
         5973  +      }
         5974  +      if( getDbPointer(interp, Tcl_GetString(objv[2]), &db) ) return TCL_ERROR;
         5975  +      if( Tcl_GetIntFromObj(interp, objv[3], &val) ) return TCL_ERROR;
         5976  +      sqlite3_test_control(SQLITE_TESTCTRL_SORTER_MMAP, db, val);
         5977  +      break;
         5978  +    }
  5925   5979     }
  5926   5980   
  5927   5981     Tcl_ResetResult(interp);
  5928   5982     return TCL_OK;
  5929   5983   }
  5930   5984   
  5931   5985   #if SQLITE_OS_UNIX
................................................................................
  6331   6385         sqlite3_free(zErrMsg);
  6332   6386         return TCL_ERROR;
  6333   6387       }
  6334   6388     }
  6335   6389     return TCL_OK;
  6336   6390   }
  6337   6391   
         6392  +/*
         6393  +**     sorter_test_fakeheap BOOL
         6394  +**
         6395  +*/
         6396  +static int sorter_test_fakeheap(
         6397  +  void * clientData,
         6398  +  Tcl_Interp *interp,
         6399  +  int objc,
         6400  +  Tcl_Obj *CONST objv[]
         6401  +){
         6402  +  int bArg;
         6403  +  if( objc!=2 ){
         6404  +    Tcl_WrongNumArgs(interp, 1, objv, "BOOL");
         6405  +    return TCL_ERROR;
         6406  +  }
         6407  +
         6408  +  if( Tcl_GetBooleanFromObj(interp, objv[1], &bArg) ){
         6409  +    return TCL_ERROR;
         6410  +  }
         6411  +
         6412  +  if( bArg ){
         6413  +    if( sqlite3GlobalConfig.pHeap==0 ){
         6414  +      sqlite3GlobalConfig.pHeap = SQLITE_INT_TO_PTR(-1);
         6415  +    }
         6416  +  }else{
         6417  +    if( sqlite3GlobalConfig.pHeap==SQLITE_INT_TO_PTR(-1) ){
         6418  +      sqlite3GlobalConfig.pHeap = 0;
         6419  +    }
         6420  +  }
         6421  +
         6422  +  Tcl_ResetResult(interp);
         6423  +  return TCL_OK;
         6424  +}
         6425  +
         6426  +/*
         6427  +**     sorter_test_sort4_helper DB SQL1 NSTEP SQL2
         6428  +**
         6429  +** Compile SQL statement $SQL1 and step it $NSTEP times. For each row, 
         6430  +** check that the leftmost and rightmost columns returned are both integers,
         6431  +** and that both contain the same value.
         6432  +**
         6433  +** Then execute statement $SQL2. Check that the statement returns the same
         6434  +** set of integers in the same order as in the previous step (using $SQL1).
         6435  +*/
         6436  +static int sorter_test_sort4_helper(
         6437  +  void * clientData,
         6438  +  Tcl_Interp *interp,
         6439  +  int objc,
         6440  +  Tcl_Obj *CONST objv[]
         6441  +){
         6442  +  const char *zSql1;
         6443  +  const char *zSql2;
         6444  +  int nStep; 
         6445  +  int iStep; 
         6446  +  int iCksum1 = 0; 
         6447  +  int iCksum2 = 0; 
         6448  +  int rc;
         6449  +  int iB;
         6450  +  sqlite3 *db;
         6451  +  sqlite3_stmt *pStmt;
         6452  +  
         6453  +  if( objc!=5 ){
         6454  +    Tcl_WrongNumArgs(interp, 1, objv, "DB SQL1 NSTEP SQL2");
         6455  +    return TCL_ERROR;
         6456  +  }
         6457  +
         6458  +  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
         6459  +  zSql1 = Tcl_GetString(objv[2]);
         6460  +  if( Tcl_GetIntFromObj(interp, objv[3], &nStep) ) return TCL_ERROR;
         6461  +  zSql2 = Tcl_GetString(objv[4]);
         6462  +
         6463  +  rc = sqlite3_prepare_v2(db, zSql1, -1, &pStmt, 0);
         6464  +  if( rc!=SQLITE_OK ) goto sql_error;
         6465  +
         6466  +  iB = sqlite3_column_count(pStmt)-1;
         6467  +  for(iStep=0; iStep<nStep && SQLITE_ROW==sqlite3_step(pStmt); iStep++){
         6468  +    int a = sqlite3_column_int(pStmt, 0);
         6469  +    if( a!=sqlite3_column_int(pStmt, iB) ){
         6470  +      Tcl_AppendResult(interp, "data error: (a!=b)", 0);
         6471  +      return TCL_ERROR;
         6472  +    }
         6473  +
         6474  +    iCksum1 += (iCksum1 << 3) + a;
         6475  +  }
         6476  +  rc = sqlite3_finalize(pStmt);
         6477  +  if( rc!=SQLITE_OK ) goto sql_error;
         6478  +
         6479  +  rc = sqlite3_prepare_v2(db, zSql2, -1, &pStmt, 0);
         6480  +  if( rc!=SQLITE_OK ) goto sql_error;
         6481  +  for(iStep=0; SQLITE_ROW==sqlite3_step(pStmt); iStep++){
         6482  +    int a = sqlite3_column_int(pStmt, 0);
         6483  +    iCksum2 += (iCksum2 << 3) + a;
         6484  +  }
         6485  +  rc = sqlite3_finalize(pStmt);
         6486  +  if( rc!=SQLITE_OK ) goto sql_error;
         6487  +
         6488  +  if( iCksum1!=iCksum2 ){
         6489  +    Tcl_AppendResult(interp, "checksum mismatch", 0);
         6490  +    return TCL_ERROR;
         6491  +  }
         6492  +
         6493  +  return TCL_OK;
         6494  + sql_error:
         6495  +  Tcl_AppendResult(interp, "sql error: ", sqlite3_errmsg(db), 0);
         6496  +  return TCL_ERROR;
         6497  +}
         6498  +
  6338   6499   
  6339   6500   /*
  6340   6501   ** Register commands with the TCL interpreter.
  6341   6502   */
  6342   6503   int Sqlitetest1_Init(Tcl_Interp *interp){
  6343   6504     extern int sqlite3_search_count;
  6344   6505     extern int sqlite3_found_count;
................................................................................
  6533   6694        { "sqlite3_create_function_v2", test_create_function_v2, 0 },
  6534   6695   
  6535   6696        /* Functions from os.h */
  6536   6697   #ifndef SQLITE_OMIT_UTF16
  6537   6698        { "add_test_collate",        test_collate, 0            },
  6538   6699        { "add_test_collate_needed", test_collate_needed, 0     },
  6539   6700        { "add_test_function",       test_function, 0           },
         6701  +     { "add_test_utf16bin_collate",    test_utf16bin_collate, 0        },
  6540   6702   #endif
  6541   6703        { "sqlite3_test_errstr",     test_errstr, 0             },
  6542   6704        { "tcl_variable_type",       tcl_variable_type, 0       },
  6543   6705   #ifndef SQLITE_OMIT_SHARED_CACHE
  6544   6706        { "sqlite3_enable_shared_cache", test_enable_shared, 0  },
  6545   6707        { "sqlite3_shared_cache_report", sqlite3BtreeSharedCacheReport, 0},
  6546   6708   #endif
................................................................................
  6566   6728        { "print_explain_query_plan", test_print_eqp, 0  },
  6567   6729   #endif
  6568   6730        { "sqlite3_test_control", test_test_control },
  6569   6731   #if SQLITE_OS_UNIX
  6570   6732        { "getrusage", test_getrusage },
  6571   6733   #endif
  6572   6734        { "load_static_extension", tclLoadStaticExtensionCmd },
         6735  +     { "sorter_test_fakeheap", sorter_test_fakeheap },
         6736  +     { "sorter_test_sort4_helper", sorter_test_sort4_helper },
  6573   6737     };
  6574   6738     static int bitmask_size = sizeof(Bitmask)*8;
  6575   6739     int i;
  6576   6740     extern int sqlite3_sync_count, sqlite3_fullsync_count;
  6577   6741     extern int sqlite3_opentemp_count;
  6578   6742     extern int sqlite3_like_count;
  6579   6743     extern int sqlite3_xferopt_count;

Changes to src/test_config.c.

    98     98   #endif
    99     99   
   100    100   #if SQLITE_MAX_MMAP_SIZE>0
   101    101     Tcl_SetVar2(interp, "sqlite_options", "mmap", "1", TCL_GLOBAL_ONLY);
   102    102   #else
   103    103     Tcl_SetVar2(interp, "sqlite_options", "mmap", "0", TCL_GLOBAL_ONLY);
   104    104   #endif
          105  +
          106  +  Tcl_SetVar2(interp, "sqlite_options", "worker_threads", 
          107  +      STRINGVALUE(SQLITE_MAX_WORKER_THREADS), TCL_GLOBAL_ONLY
          108  +  );
   105    109   
   106    110   #if 1 /* def SQLITE_MEMDEBUG */
   107    111     Tcl_SetVar2(interp, "sqlite_options", "memdebug", "1", TCL_GLOBAL_ONLY);
   108    112   #else
   109    113     Tcl_SetVar2(interp, "sqlite_options", "memdebug", "0", TCL_GLOBAL_ONLY);
   110    114   #endif
   111    115   

Changes to src/test_malloc.c.

  1248   1248     }
  1249   1249   
  1250   1250     rc = sqlite3_config(SQLITE_CONFIG_COVERING_INDEX_SCAN, bUseCis);
  1251   1251     Tcl_SetResult(interp, (char *)sqlite3ErrName(rc), TCL_VOLATILE);
  1252   1252   
  1253   1253     return TCL_OK;
  1254   1254   }
         1255  +
  1255   1256   
  1256   1257   /*
  1257   1258   ** Usage:    sqlite3_dump_memsys3  FILENAME
  1258   1259   **           sqlite3_dump_memsys5  FILENAME
  1259   1260   **
  1260   1261   ** Write a summary of unfreed memsys3 allocations to FILENAME.
  1261   1262   */

Added src/threads.c.

            1  +/*
            2  +** 2012 July 21
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +******************************************************************************
           12  +**
           13  +** This file presents a simple cross-platform threading interface for
           14  +** use internally by SQLite.
           15  +**
           16  +** A "thread" can be created using sqlite3ThreadCreate().  This thread
           17  +** runs independently of its creator until it is joined using
           18  +** sqlite3ThreadJoin(), at which point it terminates.
           19  +**
           20  +** Threads do not have to be real.  It could be that the work of the
           21  +** "thread" is done by the main thread at either the sqlite3ThreadCreate()
           22  +** or sqlite3ThreadJoin() call.  This is, in fact, what happens in
           23  +** single threaded systems.  Nothing in SQLite requires multiple threads.
           24  +** This interface exists so that applications that want to take advantage
           25  +** of multiple cores can do so, while also allowing applications to stay
           26  +** single-threaded if desired.
           27  +*/
           28  +#include "sqliteInt.h"
           29  +
           30  +#if SQLITE_MAX_WORKER_THREADS>0
           31  +
           32  +/********************************* Unix Pthreads ****************************/
           33  +#if SQLITE_OS_UNIX && defined(SQLITE_MUTEX_PTHREADS) && SQLITE_THREADSAFE>0
           34  +
           35  +#define SQLITE_THREADS_IMPLEMENTED 1  /* Prevent the single-thread code below */
           36  +#include <pthread.h>
           37  +
           38  +/* A running thread */
           39  +struct SQLiteThread {
           40  +  pthread_t tid;                 /* Thread ID */
           41  +  int done;                      /* Set to true when thread finishes */
           42  +  void *pOut;                    /* Result returned by the thread */
           43  +  void *(*xTask)(void*);         /* The thread routine */
           44  +  void *pIn;                     /* Argument to the thread */
           45  +};
           46  +
           47  +/* Create a new thread */
           48  +int sqlite3ThreadCreate(
           49  +  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
           50  +  void *(*xTask)(void*),    /* Routine to run in a separate thread */
           51  +  void *pIn                 /* Argument passed into xTask() */
           52  +){
           53  +  SQLiteThread *p;
           54  +  int rc;
           55  +
           56  +  assert( ppThread!=0 );
           57  +  assert( xTask!=0 );
           58  +  /* This routine is never used in single-threaded mode */
           59  +  assert( sqlite3GlobalConfig.bCoreMutex!=0 );
           60  +
           61  +  *ppThread = 0;
           62  +  p = sqlite3Malloc(sizeof(*p));
           63  +  if( p==0 ) return SQLITE_NOMEM;
           64  +  memset(p, 0, sizeof(*p));
           65  +  p->xTask = xTask;
           66  +  p->pIn = pIn;
           67  +  if( sqlite3FaultSim(200) ){
           68  +    rc = 1;
           69  +  }else{    
           70  +    rc = pthread_create(&p->tid, 0, xTask, pIn);
           71  +  }
           72  +  if( rc ){
           73  +    p->done = 1;
           74  +    p->pOut = xTask(pIn);
           75  +  }
           76  +  *ppThread = p;
           77  +  return SQLITE_OK;
           78  +}
           79  +
           80  +/* Get the results of the thread */
           81  +int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
           82  +  int rc;
           83  +
           84  +  assert( ppOut!=0 );
           85  +  if( NEVER(p==0) ) return SQLITE_NOMEM;
           86  +  if( p->done ){
           87  +    *ppOut = p->pOut;
           88  +    rc = SQLITE_OK;
           89  +  }else{
           90  +    rc = pthread_join(p->tid, ppOut) ? SQLITE_ERROR : SQLITE_OK;
           91  +  }
           92  +  sqlite3_free(p);
           93  +  return rc;
           94  +}
           95  +
           96  +#endif /* SQLITE_OS_UNIX && defined(SQLITE_MUTEX_PTHREADS) */
           97  +/******************************** End Unix Pthreads *************************/
           98  +
           99  +
          100  +/********************************* Win32 Threads ****************************/
          101  +#if SQLITE_OS_WIN && !SQLITE_OS_WINRT && SQLITE_THREADSAFE>0
          102  +
          103  +#define SQLITE_THREADS_IMPLEMENTED 1  /* Prevent the single-thread code below */
          104  +#include <process.h>
          105  +
          106  +/* A running thread */
          107  +struct SQLiteThread {
          108  +  uintptr_t tid;           /* The thread handle */
          109  +  unsigned id;             /* The thread identifier */
          110  +  void *(*xTask)(void*);   /* The routine to run as a thread */
          111  +  void *pIn;               /* Argument to xTask */
          112  +  void *pResult;           /* Result of xTask */
          113  +};
          114  +
          115  +/* Thread procedure Win32 compatibility shim */
          116  +static unsigned __stdcall sqlite3ThreadProc(
          117  +  void *pArg  /* IN: Pointer to the SQLiteThread structure */
          118  +){
          119  +  SQLiteThread *p = (SQLiteThread *)pArg;
          120  +
          121  +  assert( p!=0 );
          122  +#if 0
          123  +  /*
          124  +  ** This assert appears to trigger spuriously on certain
          125  +  ** versions of Windows, possibly due to _beginthreadex()
          126  +  ** and/or CreateThread() not fully setting their thread
          127  +  ** ID parameter before starting the thread.
          128  +  */
          129  +  assert( p->id==GetCurrentThreadId() );
          130  +#endif
          131  +  assert( p->xTask!=0 );
          132  +  p->pResult = p->xTask(p->pIn);
          133  +
          134  +  _endthreadex(0);
          135  +  return 0; /* NOT REACHED */
          136  +}
          137  +
          138  +/* Create a new thread */
          139  +int sqlite3ThreadCreate(
          140  +  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
          141  +  void *(*xTask)(void*),    /* Routine to run in a separate thread */
          142  +  void *pIn                 /* Argument passed into xTask() */
          143  +){
          144  +  SQLiteThread *p;
          145  +
          146  +  assert( ppThread!=0 );
          147  +  assert( xTask!=0 );
          148  +  *ppThread = 0;
          149  +  p = sqlite3Malloc(sizeof(*p));
          150  +  if( p==0 ) return SQLITE_NOMEM;
          151  +  if( sqlite3GlobalConfig.bCoreMutex==0 ){
          152  +    memset(p, 0, sizeof(*p));
          153  +  }else{
          154  +    p->xTask = xTask;
          155  +    p->pIn = pIn;
          156  +    p->tid = _beginthreadex(0, 0, sqlite3ThreadProc, p, 0, &p->id);
          157  +    if( p->tid==0 ){
          158  +      memset(p, 0, sizeof(*p));
          159  +    }
          160  +  }
          161  +  if( p->xTask==0 ){
          162  +    p->id = GetCurrentThreadId();
          163  +    p->pResult = xTask(pIn);
          164  +  }
          165  +  *ppThread = p;
          166  +  return SQLITE_OK;
          167  +}
          168  +
          169  +DWORD sqlite3Win32Wait(HANDLE hObject); /* os_win.c */
          170  +
          171  +/* Get the results of the thread */
          172  +int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
          173  +  DWORD rc;
          174  +  BOOL bRc;
          175  +
          176  +  assert( ppOut!=0 );
          177  +  if( NEVER(p==0) ) return SQLITE_NOMEM;
          178  +  if( p->xTask==0 ){
          179  +    assert( p->id==GetCurrentThreadId() );
          180  +    rc = WAIT_OBJECT_0;
          181  +    assert( p->tid==0 );
          182  +  }else{
          183  +    assert( p->id!=0 && p->id!=GetCurrentThreadId() );
          184  +    rc = sqlite3Win32Wait((HANDLE)p->tid);
          185  +    assert( rc!=WAIT_IO_COMPLETION );
          186  +    bRc = CloseHandle((HANDLE)p->tid);
          187  +    assert( bRc );
          188  +  }
          189  +  if( rc==WAIT_OBJECT_0 ) *ppOut = p->pResult;
          190  +  sqlite3_free(p);
          191  +  return (rc==WAIT_OBJECT_0) ? SQLITE_OK : SQLITE_ERROR;
          192  +}
          193  +
          194  +#endif /* SQLITE_OS_WIN && !SQLITE_OS_WINRT */
          195  +/******************************** End Win32 Threads *************************/
          196  +
          197  +
          198  +/********************************* Single-Threaded **************************/
          199  +#ifndef SQLITE_THREADS_IMPLEMENTED
          200  +/*
          201  +** This implementation does not actually create a new thread.  It does the
          202  +** work of the thread in the main thread, when either the thread is created
          203  +** or when it is joined
          204  +*/
          205  +
          206  +/* A running thread */
          207  +struct SQLiteThread {
          208  +  void *(*xTask)(void*);   /* The routine to run as a thread */
          209  +  void *pIn;               /* Argument to xTask */
          210  +  void *pResult;           /* Result of xTask */
          211  +};
          212  +
          213  +/* Create a new thread */
          214  +int sqlite3ThreadCreate(
          215  +  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
          216  +  void *(*xTask)(void*),    /* Routine to run in a separate thread */
          217  +  void *pIn                 /* Argument passed into xTask() */
          218  +){
          219  +  SQLiteThread *p;
          220  +
          221  +  assert( ppThread!=0 );
          222  +  assert( xTask!=0 );
          223  +  *ppThread = 0;
          224  +  p = sqlite3Malloc(sizeof(*p));
          225  +  if( p==0 ) return SQLITE_NOMEM;
          226  +  if( (SQLITE_PTR_TO_INT(p)/17)&1 ){
          227  +    p->xTask = xTask;
          228  +    p->pIn = pIn;
          229  +  }else{
          230  +    p->xTask = 0;
          231  +    p->pResult = xTask(pIn);
          232  +  }
          233  +  *ppThread = p;
          234  +  return SQLITE_OK;
          235  +}
          236  +
          237  +/* Get the results of the thread */
          238  +int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
          239  +
          240  +  assert( ppOut!=0 );
          241  +  if( NEVER(p==0) ) return SQLITE_NOMEM;
          242  +  if( p->xTask ){
          243  +    *ppOut = p->xTask(p->pIn);
          244  +  }else{
          245  +    *ppOut = p->pResult;
          246  +  }
          247  +  sqlite3_free(p);
          248  +
          249  +#if defined(SQLITE_TEST)
          250  +  {
          251  +    void *pTstAlloc = sqlite3Malloc(10);
          252  +    if (!pTstAlloc) return SQLITE_NOMEM;
          253  +    sqlite3_free(pTstAlloc);
          254  +  }
          255  +#endif
          256  +
          257  +  return SQLITE_OK;
          258  +}
          259  +
          260  +#endif /* !defined(SQLITE_THREADS_IMPLEMENTED) */
          261  +/****************************** End Single-Threaded *************************/
          262  +#endif /* SQLITE_MAX_WORKER_THREADS>0 */

Changes to src/vdbe.c.

  1171   1171     pIn1 = &aMem[p1];
  1172   1172     pOut = &aMem[p2];
  1173   1173     do{
  1174   1174       assert( pOut<=&aMem[(p->nMem-p->nCursor)] );
  1175   1175       assert( pIn1<=&aMem[(p->nMem-p->nCursor)] );
  1176   1176       assert( memIsValid(pIn1) );
  1177   1177       memAboutToChange(p, pOut);
  1178         -    VdbeMemReleaseExtern(pOut);
         1178  +    sqlite3VdbeMemRelease(pOut);
  1179   1179       zMalloc = pOut->zMalloc;
  1180   1180       memcpy(pOut, pIn1, sizeof(Mem));
  1181   1181   #ifdef SQLITE_DEBUG
  1182   1182       if( pOut->pScopyFrom>=&aMem[p1] && pOut->pScopyFrom<&aMem[p1+pOp->p3] ){
  1183   1183         pOut->pScopyFrom += p1 - pOp->p2;
  1184   1184       }
  1185   1185   #endif
................................................................................
  1551   1551     sqlite3_value **apVal;
  1552   1552     int n;
  1553   1553   
  1554   1554     n = pOp->p5;
  1555   1555     apVal = p->apArg;
  1556   1556     assert( apVal || n==0 );
  1557   1557     assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
  1558         -  pOut = &aMem[pOp->p3];
  1559         -  memAboutToChange(p, pOut);
         1558  +  ctx.pOut = &aMem[pOp->p3];
         1559  +  memAboutToChange(p, ctx.pOut);
  1560   1560   
  1561   1561     assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) );
  1562   1562     assert( pOp->p3<pOp->p2 || pOp->p3>=pOp->p2+n );
  1563   1563     pArg = &aMem[pOp->p2];
  1564   1564     for(i=0; i<n; i++, pArg++){
  1565   1565       assert( memIsValid(pArg) );
  1566   1566       apVal[i] = pArg;
................................................................................
  1568   1568       REGISTER_TRACE(pOp->p2+i, pArg);
  1569   1569     }
  1570   1570   
  1571   1571     assert( pOp->p4type==P4_FUNCDEF );
  1572   1572     ctx.pFunc = pOp->p4.pFunc;
  1573   1573     ctx.iOp = pc;
  1574   1574     ctx.pVdbe = p;
  1575         -
  1576         -  /* The output cell may already have a buffer allocated. Move
  1577         -  ** the pointer to ctx.s so in case the user-function can use
  1578         -  ** the already allocated buffer instead of allocating a new one.
  1579         -  */
  1580         -  memcpy(&ctx.s, pOut, sizeof(Mem));
  1581         -  pOut->flags = MEM_Null;
  1582         -  pOut->xDel = 0;
  1583         -  pOut->zMalloc = 0;
  1584         -  MemSetTypeFlag(&ctx.s, MEM_Null);
         1575  +  MemSetTypeFlag(ctx.pOut, MEM_Null);
  1585   1576   
  1586   1577     ctx.fErrorOrAux = 0;
  1587   1578     if( ctx.pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
  1588   1579       assert( pOp>aOp );
  1589   1580       assert( pOp[-1].p4type==P4_COLLSEQ );
  1590   1581       assert( pOp[-1].opcode==OP_CollSeq );
  1591   1582       ctx.pColl = pOp[-1].p4.pColl;
  1592   1583     }
  1593   1584     db->lastRowid = lastRowid;
  1594   1585     (*ctx.pFunc->xFunc)(&ctx, n, apVal); /* IMP: R-24505-23230 */
  1595   1586     lastRowid = db->lastRowid;
  1596   1587   
  1597         -  if( db->mallocFailed ){
  1598         -    /* Even though a malloc() has failed, the implementation of the
  1599         -    ** user function may have called an sqlite3_result_XXX() function
  1600         -    ** to return a value. The following call releases any resources
  1601         -    ** associated with such a value.
  1602         -    */
  1603         -    sqlite3VdbeMemRelease(&ctx.s);
  1604         -    goto no_mem;
  1605         -  }
  1606         -
  1607   1588     /* If the function returned an error, throw an exception */
  1608   1589     if( ctx.fErrorOrAux ){
  1609   1590       if( ctx.isError ){
  1610         -      sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&ctx.s));
         1591  +      sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(ctx.pOut));
  1611   1592         rc = ctx.isError;
  1612   1593       }
  1613   1594       sqlite3VdbeDeleteAuxData(p, pc, pOp->p1);
  1614   1595     }
  1615   1596   
  1616   1597     /* Copy the result of the function into register P3 */
  1617         -  sqlite3VdbeChangeEncoding(&ctx.s, encoding);
  1618         -  assert( pOut->flags==MEM_Null );
  1619         -  memcpy(pOut, &ctx.s, sizeof(Mem));
  1620         -  if( sqlite3VdbeMemTooBig(pOut) ){
         1598  +  sqlite3VdbeChangeEncoding(ctx.pOut, encoding);
         1599  +  if( sqlite3VdbeMemTooBig(ctx.pOut) ){
  1621   1600       goto too_big;
  1622   1601     }
  1623   1602   
  1624         -#if 0
  1625         -  /* The app-defined function has done something that as caused this
  1626         -  ** statement to expire.  (Perhaps the function called sqlite3_exec()
  1627         -  ** with a CREATE TABLE statement.)
  1628         -  */
  1629         -  if( p->expired ) rc = SQLITE_ABORT;
  1630         -#endif
  1631         -
  1632         -  REGISTER_TRACE(pOp->p3, pOut);
  1633         -  UPDATE_MAX_BLOBSIZE(pOut);
         1603  +  REGISTER_TRACE(pOp->p3, ctx.pOut);
         1604  +  UPDATE_MAX_BLOBSIZE(ctx.pOut);
  1634   1605     break;
  1635   1606   }
  1636   1607   
  1637   1608   /* Opcode: BitAnd P1 P2 P3 * *
  1638   1609   ** Synopsis:  r[P3]=r[P1]&r[P2]
  1639   1610   **
  1640   1611   ** Take the bit-wise AND of the values in register P1 and P2 and
................................................................................
  1775   1746     }
  1776   1747     break;
  1777   1748   }
  1778   1749   #endif
  1779   1750   
  1780   1751   #ifndef SQLITE_OMIT_CAST
  1781   1752   /* Opcode: Cast P1 P2 * * *
         1753  +** Synopsis: affinity(r[P1])
  1782   1754   **
  1783   1755   ** Force the value in register P1 to be the type defined by P2.
  1784   1756   ** 
  1785   1757   ** <ul>
  1786   1758   ** <li value="97"> TEXT
  1787   1759   ** <li value="98"> BLOB
  1788   1760   ** <li value="99"> NUMERIC
................................................................................
  3396   3368         pCx->isTable = 1;
  3397   3369       }
  3398   3370     }
  3399   3371     pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
  3400   3372     break;
  3401   3373   }
  3402   3374   
  3403         -/* Opcode: SorterOpen P1 P2 * P4 *
         3375  +/* Opcode: SorterOpen P1 P2 P3 P4 *
  3404   3376   **
  3405   3377   ** This opcode works like OP_OpenEphemeral except that it opens
  3406   3378   ** a transient index that is specifically designed to sort large
  3407   3379   ** tables using an external merge-sort algorithm.
         3380  +**
         3381  +** If argument P3 is non-zero, then it indicates that the sorter may
         3382  +** assume that a stable sort considering the first P3 fields of each
         3383  +** key is sufficient to produce the required results.
  3408   3384   */
  3409   3385   case OP_SorterOpen: {
  3410   3386     VdbeCursor *pCx;
  3411   3387   
  3412   3388     assert( pOp->p1>=0 );
  3413   3389     assert( pOp->p2>=0 );
  3414   3390     pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
  3415   3391     if( pCx==0 ) goto no_mem;
  3416   3392     pCx->pKeyInfo = pOp->p4.pKeyInfo;
  3417   3393     assert( pCx->pKeyInfo->db==db );
  3418   3394     assert( pCx->pKeyInfo->enc==ENC(db) );
  3419         -  rc = sqlite3VdbeSorterInit(db, pCx);
         3395  +  rc = sqlite3VdbeSorterInit(db, pOp->p3, pCx);
         3396  +  break;
         3397  +}
         3398  +
         3399  +/* Opcode: SequenceTest P1 P2 * * *
         3400  +** Synopsis: if( cursor[P1].ctr++ ) pc = P2
         3401  +**
         3402  +** P1 is a sorter cursor. If the sequence counter is currently zero, jump
         3403  +** to P2. Regardless of whether or not the jump is taken, increment the
         3404  +** the sequence value.
         3405  +*/
         3406  +case OP_SequenceTest: {
         3407  +  VdbeCursor *pC;
         3408  +  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
         3409  +  pC = p->apCsr[pOp->p1];
         3410  +  assert( pC->pSorter );
         3411  +  if( (pC->seqCount++)==0 ){
         3412  +    pc = pOp->p2 - 1;
         3413  +  }
  3420   3414     break;
  3421   3415   }
  3422   3416   
  3423   3417   /* Opcode: OpenPseudo P1 P2 P3 * *
  3424   3418   ** Synopsis: P3 columns in r[P2]
  3425   3419   **
  3426   3420   ** Open a new cursor that points to a fake table that contains a single
................................................................................
  4312   4306     int nKeyCol;
  4313   4307   
  4314   4308     pC = p->apCsr[pOp->p1];
  4315   4309     assert( isSorter(pC) );
  4316   4310     assert( pOp->p4type==P4_INT32 );
  4317   4311     pIn3 = &aMem[pOp->p3];
  4318   4312     nKeyCol = pOp->p4.i;
         4313  +  res = 0;
  4319   4314     rc = sqlite3VdbeSorterCompare(pC, pIn3, nKeyCol, &res);
  4320   4315     VdbeBranchTaken(res!=0,2);
  4321   4316     if( res ){
  4322   4317       pc = pOp->p2-1;
  4323   4318     }
  4324   4319     break;
  4325   4320   };
................................................................................
  4576   4571     assert( pC!=0 );
  4577   4572     assert( isSorter(pC)==(pOp->opcode==OP_SorterSort) );
  4578   4573     res = 1;
  4579   4574   #ifdef SQLITE_DEBUG
  4580   4575     pC->seekOp = OP_Rewind;
  4581   4576   #endif
  4582   4577     if( isSorter(pC) ){
  4583         -    rc = sqlite3VdbeSorterRewind(db, pC, &res);
         4578  +    rc = sqlite3VdbeSorterRewind(pC, &res);
  4584   4579     }else{
  4585   4580       pCrsr = pC->pCursor;
  4586   4581       assert( pCrsr );
  4587   4582       rc = sqlite3BtreeFirst(pCrsr, &res);
  4588   4583       pC->deferredMoveto = 0;
  4589   4584       pC->cacheStatus = CACHE_STALE;
  4590   4585       pC->rowidIsValid = 0;
................................................................................
  4754   4749     pCrsr = pC->pCursor;
  4755   4750     if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++;
  4756   4751     assert( pCrsr!=0 );
  4757   4752     assert( pC->isTable==0 );
  4758   4753     rc = ExpandBlob(pIn2);
  4759   4754     if( rc==SQLITE_OK ){
  4760   4755       if( isSorter(pC) ){
  4761         -      rc = sqlite3VdbeSorterWrite(db, pC, pIn2);
         4756  +      rc = sqlite3VdbeSorterWrite(pC, pIn2);
  4762   4757       }else{
  4763   4758         nKey = pIn2->n;
  4764   4759         zKey = pIn2->z;
  4765   4760         rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, 
  4766   4761             ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
  4767   4762             );
  4768   4763         assert( pC->deferredMoveto==0 );
................................................................................
  5667   5662   ** successors.
  5668   5663   */
  5669   5664   case OP_AggStep: {
  5670   5665     int n;
  5671   5666     int i;
  5672   5667     Mem *pMem;
  5673   5668     Mem *pRec;
         5669  +  Mem t;
  5674   5670     sqlite3_context ctx;
  5675   5671     sqlite3_value **apVal;
  5676   5672   
  5677   5673     n = pOp->p5;
  5678   5674     assert( n>=0 );
  5679   5675     pRec = &aMem[pOp->p2];
  5680   5676     apVal = p->apArg;
................................................................................
  5684   5680       apVal[i] = pRec;
  5685   5681       memAboutToChange(p, pRec);
  5686   5682     }
  5687   5683     ctx.pFunc = pOp->p4.pFunc;
  5688   5684     assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
  5689   5685     ctx.pMem = pMem = &aMem[pOp->p3];
  5690   5686     pMem->n++;
  5691         -  ctx.s.flags = MEM_Null;
  5692         -  ctx.s.z = 0;
  5693         -  ctx.s.zMalloc = 0;
  5694         -  ctx.s.xDel = 0;
  5695         -  ctx.s.db = db;
         5687  +  t.flags = MEM_Null;
         5688  +  t.z = 0;
         5689  +  t.zMalloc = 0;
         5690  +  t.xDel = 0;
         5691  +  t.db = db;
         5692  +  ctx.pOut = &t;
  5696   5693     ctx.isError = 0;
  5697   5694     ctx.pColl = 0;
  5698   5695     ctx.skipFlag = 0;
  5699   5696     if( ctx.pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
  5700   5697       assert( pOp>p->aOp );
  5701   5698       assert( pOp[-1].p4type==P4_COLLSEQ );
  5702   5699       assert( pOp[-1].opcode==OP_CollSeq );
  5703   5700       ctx.pColl = pOp[-1].p4.pColl;
  5704   5701     }
  5705   5702     (ctx.pFunc->xStep)(&ctx, n, apVal); /* IMP: R-24505-23230 */
  5706   5703     if( ctx.isError ){
  5707         -    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&ctx.s));
         5704  +    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&t));
  5708   5705       rc = ctx.isError;
  5709   5706     }
  5710   5707     if( ctx.skipFlag ){
  5711   5708       assert( pOp[-1].opcode==OP_CollSeq );
  5712   5709       i = pOp[-1].p1;
  5713   5710       if( i ) sqlite3VdbeMemSetInt64(&aMem[i], 1);
  5714   5711     }
  5715         -
  5716         -  sqlite3VdbeMemRelease(&ctx.s);
  5717         -
         5712  +  sqlite3VdbeMemRelease(&t);
  5718   5713     break;
  5719   5714   }
  5720   5715   
  5721   5716   /* Opcode: AggFinal P1 P2 * P4 *
  5722   5717   ** Synopsis: accum=r[P1] N=P2
  5723   5718   **
  5724   5719   ** Execute the finalizer function for an aggregate.  P1 is
................................................................................
  6160   6155       sqlite3VdbeMemSetNull(pDest);
  6161   6156       break;
  6162   6157     }
  6163   6158     pVtab = pCur->pVtabCursor->pVtab;
  6164   6159     pModule = pVtab->pModule;
  6165   6160     assert( pModule->xColumn );
  6166   6161     memset(&sContext, 0, sizeof(sContext));
  6167         -
  6168         -  /* The output cell may already have a buffer allocated. Move
  6169         -  ** the current contents to sContext.s so in case the user-function 
  6170         -  ** can use the already allocated buffer instead of allocating a 
  6171         -  ** new one.
  6172         -  */
  6173         -  sqlite3VdbeMemMove(&sContext.s, pDest);
  6174         -  MemSetTypeFlag(&sContext.s, MEM_Null);
  6175         -
         6162  +  sContext.pOut = pDest;
         6163  +  MemSetTypeFlag(pDest, MEM_Null);
  6176   6164     rc = pModule->xColumn(pCur->pVtabCursor, &sContext, pOp->p2);
  6177   6165     sqlite3VtabImportErrmsg(p, pVtab);
  6178   6166     if( sContext.isError ){
  6179   6167       rc = sContext.isError;
  6180   6168     }
  6181         -
  6182         -  /* Copy the result of the function to the P3 register. We
  6183         -  ** do this regardless of whether or not an error occurred to ensure any
  6184         -  ** dynamic allocation in sContext.s (a Mem struct) is  released.
  6185         -  */
  6186         -  sqlite3VdbeChangeEncoding(&sContext.s, encoding);
  6187         -  sqlite3VdbeMemMove(pDest, &sContext.s);
         6169  +  sqlite3VdbeChangeEncoding(pDest, encoding);
  6188   6170     REGISTER_TRACE(pOp->p3, pDest);
  6189   6171     UPDATE_MAX_BLOBSIZE(pDest);
  6190   6172   
  6191   6173     if( sqlite3VdbeMemTooBig(pDest) ){
  6192   6174       goto too_big;
  6193   6175     }
  6194   6176     break;

Changes to src/vdbeInt.h.

   262    262   ** But this file is the only place where the internal details of this
   263    263   ** structure are known.
   264    264   **
   265    265   ** This structure is defined inside of vdbeInt.h because it uses substructures
   266    266   ** (Mem) which are only defined there.
   267    267   */
   268    268   struct sqlite3_context {
          269  +  Mem *pOut;            /* The return value is stored here */
   269    270     FuncDef *pFunc;       /* Pointer to function information.  MUST BE FIRST */
   270         -  Mem s;                /* The return value is stored here */
   271    271     Mem *pMem;            /* Memory cell used to store aggregate context */
   272    272     CollSeq *pColl;       /* Collating sequence */
   273    273     Vdbe *pVdbe;          /* The VM that owns this context */
   274    274     int iOp;              /* Instruction number of OP_Function */
   275    275     int isError;          /* Error code returned by the function. */
   276    276     u8 skipFlag;          /* Skip skip accumulator loading if true */
   277    277     u8 fErrorOrAux;       /* isError!=0 or pVdbe->pAuxData modified */
................................................................................
   458    458   int sqlite3VdbeCloseStatement(Vdbe *, int);
   459    459   void sqlite3VdbeFrameDelete(VdbeFrame*);
   460    460   int sqlite3VdbeFrameRestore(VdbeFrame *);
   461    461   void sqlite3VdbePreUpdateHook(
   462    462       Vdbe *, VdbeCursor *, int, const char*, Table *, i64, int);
   463    463   int sqlite3VdbeTransferError(Vdbe *p);
   464    464   
   465         -int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
          465  +int sqlite3VdbeSorterInit(sqlite3 *, int, VdbeCursor *);
   466    466   void sqlite3VdbeSorterReset(sqlite3 *, VdbeSorter *);
   467    467   void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
   468    468   int sqlite3VdbeSorterRowkey(const VdbeCursor *, Mem *);
   469    469   int sqlite3VdbeSorterNext(sqlite3 *, const VdbeCursor *, int *);
   470         -int sqlite3VdbeSorterRewind(sqlite3 *, const VdbeCursor *, int *);
   471         -int sqlite3VdbeSorterWrite(sqlite3 *, const VdbeCursor *, Mem *);
          470  +int sqlite3VdbeSorterRewind(const VdbeCursor *, int *);
          471  +int sqlite3VdbeSorterWrite(const VdbeCursor *, Mem *);
   472    472   int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int, int *);
   473    473   
   474    474   #if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
   475    475     void sqlite3VdbeEnter(Vdbe*);
   476    476     void sqlite3VdbeLeave(Vdbe*);
   477    477   #else
   478    478   # define sqlite3VdbeEnter(X)

Changes to src/vdbeapi.c.

   219    219   static void setResultStrOrError(
   220    220     sqlite3_context *pCtx,  /* Function context */
   221    221     const char *z,          /* String pointer */
   222    222     int n,                  /* Bytes in string, or negative */
   223    223     u8 enc,                 /* Encoding of z.  0 for BLOBs */
   224    224     void (*xDel)(void*)     /* Destructor function */
   225    225   ){
   226         -  if( sqlite3VdbeMemSetStr(&pCtx->s, z, n, enc, xDel)==SQLITE_TOOBIG ){
          226  +  if( sqlite3VdbeMemSetStr(pCtx->pOut, z, n, enc, xDel)==SQLITE_TOOBIG ){
   227    227       sqlite3_result_error_toobig(pCtx);
   228    228     }
   229    229   }
   230    230   void sqlite3_result_blob(
   231    231     sqlite3_context *pCtx, 
   232    232     const void *z, 
   233    233     int n, 
   234    234     void (*xDel)(void *)
   235    235   ){
   236    236     assert( n>=0 );
   237         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          237  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   238    238     setResultStrOrError(pCtx, z, n, 0, xDel);
   239    239   }
   240    240   void sqlite3_result_double(sqlite3_context *pCtx, double rVal){
   241         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   242         -  sqlite3VdbeMemSetDouble(&pCtx->s, rVal);
          241  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          242  +  sqlite3VdbeMemSetDouble(pCtx->pOut, rVal);
   243    243   }
   244    244   void sqlite3_result_error(sqlite3_context *pCtx, const char *z, int n){
   245         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          245  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   246    246     pCtx->isError = SQLITE_ERROR;
   247    247     pCtx->fErrorOrAux = 1;
   248         -  sqlite3VdbeMemSetStr(&pCtx->s, z, n, SQLITE_UTF8, SQLITE_TRANSIENT);
          248  +  sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF8, SQLITE_TRANSIENT);
   249    249   }
   250    250   #ifndef SQLITE_OMIT_UTF16
   251    251   void sqlite3_result_error16(sqlite3_context *pCtx, const void *z, int n){
   252         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          252  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   253    253     pCtx->isError = SQLITE_ERROR;
   254    254     pCtx->fErrorOrAux = 1;
   255         -  sqlite3VdbeMemSetStr(&pCtx->s, z, n, SQLITE_UTF16NATIVE, SQLITE_TRANSIENT);
          255  +  sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF16NATIVE, SQLITE_TRANSIENT);
   256    256   }
   257    257   #endif
   258    258   void sqlite3_result_int(sqlite3_context *pCtx, int iVal){
   259         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   260         -  sqlite3VdbeMemSetInt64(&pCtx->s, (i64)iVal);
          259  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          260  +  sqlite3VdbeMemSetInt64(pCtx->pOut, (i64)iVal);
   261    261   }
   262    262   void sqlite3_result_int64(sqlite3_context *pCtx, i64 iVal){
   263         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   264         -  sqlite3VdbeMemSetInt64(&pCtx->s, iVal);
          263  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          264  +  sqlite3VdbeMemSetInt64(pCtx->pOut, iVal);
   265    265   }
   266    266   void sqlite3_result_null(sqlite3_context *pCtx){
   267         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   268         -  sqlite3VdbeMemSetNull(&pCtx->s);
          267  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          268  +  sqlite3VdbeMemSetNull(pCtx->pOut);
   269    269   }
   270    270   void sqlite3_result_text(
   271    271     sqlite3_context *pCtx, 
   272    272     const char *z, 
   273    273     int n,
   274    274     void (*xDel)(void *)
   275    275   ){
   276         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          276  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   277    277     setResultStrOrError(pCtx, z, n, SQLITE_UTF8, xDel);
   278    278   }
   279    279   #ifndef SQLITE_OMIT_UTF16
   280    280   void sqlite3_result_text16(
   281    281     sqlite3_context *pCtx, 
   282    282     const void *z, 
   283    283     int n, 
   284    284     void (*xDel)(void *)
   285    285   ){
   286         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          286  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   287    287     setResultStrOrError(pCtx, z, n, SQLITE_UTF16NATIVE, xDel);
   288    288   }
   289    289   void sqlite3_result_text16be(
   290    290     sqlite3_context *pCtx, 
   291    291     const void *z, 
   292    292     int n, 
   293    293     void (*xDel)(void *)
   294    294   ){
   295         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          295  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   296    296     setResultStrOrError(pCtx, z, n, SQLITE_UTF16BE, xDel);
   297    297   }
   298    298   void sqlite3_result_text16le(
   299    299     sqlite3_context *pCtx, 
   300    300     const void *z, 
   301    301     int n, 
   302    302     void (*xDel)(void *)
   303    303   ){
   304         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          304  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   305    305     setResultStrOrError(pCtx, z, n, SQLITE_UTF16LE, xDel);
   306    306   }
   307    307   #endif /* SQLITE_OMIT_UTF16 */
   308    308   void sqlite3_result_value(sqlite3_context *pCtx, sqlite3_value *pValue){
   309         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   310         -  sqlite3VdbeMemCopy(&pCtx->s, pValue);
          309  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          310  +  sqlite3VdbeMemCopy(pCtx->pOut, pValue);
   311    311   }
   312    312   void sqlite3_result_zeroblob(sqlite3_context *pCtx, int n){
   313         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   314         -  sqlite3VdbeMemSetZeroBlob(&pCtx->s, n);
          313  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          314  +  sqlite3VdbeMemSetZeroBlob(pCtx->pOut, n);
   315    315   }
   316    316   void sqlite3_result_error_code(sqlite3_context *pCtx, int errCode){
   317    317     pCtx->isError = errCode;
   318    318     pCtx->fErrorOrAux = 1;
   319         -  if( pCtx->s.flags & MEM_Null ){
   320         -    sqlite3VdbeMemSetStr(&pCtx->s, sqlite3ErrStr(errCode), -1, 
          319  +  if( pCtx->pOut->flags & MEM_Null ){
          320  +    sqlite3VdbeMemSetStr(pCtx->pOut, sqlite3ErrStr(errCode), -1, 
   321    321                            SQLITE_UTF8, SQLITE_STATIC);
   322    322     }
   323    323   }
   324    324   
   325    325   /* Force an SQLITE_TOOBIG error. */
   326    326   void sqlite3_result_error_toobig(sqlite3_context *pCtx){
   327         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          327  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   328    328     pCtx->isError = SQLITE_TOOBIG;
   329    329     pCtx->fErrorOrAux = 1;
   330         -  sqlite3VdbeMemSetStr(&pCtx->s, "string or blob too big", -1, 
          330  +  sqlite3VdbeMemSetStr(pCtx->pOut, "string or blob too big", -1, 
   331    331                          SQLITE_UTF8, SQLITE_STATIC);
   332    332   }
   333    333   
   334    334   /* An SQLITE_NOMEM error. */
   335    335   void sqlite3_result_error_nomem(sqlite3_context *pCtx){
   336         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   337         -  sqlite3VdbeMemSetNull(&pCtx->s);
          336  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          337  +  sqlite3VdbeMemSetNull(pCtx->pOut);
   338    338     pCtx->isError = SQLITE_NOMEM;
   339    339     pCtx->fErrorOrAux = 1;
   340         -  pCtx->s.db->mallocFailed = 1;
          340  +  pCtx->pOut->db->mallocFailed = 1;
   341    341   }
   342    342   
   343    343   /*
   344    344   ** This function is called after a transaction has been committed. It 
   345    345   ** invokes callbacks registered with sqlite3_wal_hook() as required.
   346    346   */
   347    347   static int doWalCallbacks(sqlite3 *db){
................................................................................
   564    564   ** returns a copy of the pointer to the database connection (the 1st
   565    565   ** parameter) of the sqlite3_create_function() and
   566    566   ** sqlite3_create_function16() routines that originally registered the
   567    567   ** application defined function.
   568    568   */
   569    569   sqlite3 *sqlite3_context_db_handle(sqlite3_context *p){
   570    570     assert( p && p->pFunc );
   571         -  return p->s.db;
          571  +  return p->pOut->db;
   572    572   }
   573    573   
   574    574   /*
   575    575   ** Return the current time for a statement
   576    576   */
   577    577   sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context *p){
   578    578     Vdbe *v = p->pVdbe;
   579    579     int rc;
   580    580     if( v->iCurrentTime==0 ){
   581         -    rc = sqlite3OsCurrentTimeInt64(p->s.db->pVfs, &v->iCurrentTime);
          581  +    rc = sqlite3OsCurrentTimeInt64(p->pOut->db->pVfs, &v->iCurrentTime);
   582    582       if( rc ) v->iCurrentTime = 0;
   583    583     }
   584    584     return v->iCurrentTime;
   585    585   }
   586    586   
   587    587   /*
   588    588   ** The following is the implementation of an SQL function that always
................................................................................
   631    631   /*
   632    632   ** Allocate or return the aggregate context for a user function.  A new
   633    633   ** context is allocated on the first call.  Subsequent calls return the
   634    634   ** same context that was returned on prior calls.
   635    635   */
   636    636   void *sqlite3_aggregate_context(sqlite3_context *p, int nByte){
   637    637     assert( p && p->pFunc && p->pFunc->xStep );
   638         -  assert( sqlite3_mutex_held(p->s.db->mutex) );
          638  +  assert( sqlite3_mutex_held(p->pOut->db->mutex) );
   639    639     testcase( nByte<0 );
   640    640     if( (p->pMem->flags & MEM_Agg)==0 ){
   641    641       return createAggContext(p, nByte);
   642    642     }else{
   643    643       return (void*)p->pMem->z;
   644    644     }
   645    645   }
................................................................................
   647    647   /*
   648    648   ** Return the auxilary data pointer, if any, for the iArg'th argument to
   649    649   ** the user-function defined by pCtx.
   650    650   */
   651    651   void *sqlite3_get_auxdata(sqlite3_context *pCtx, int iArg){
   652    652     AuxData *pAuxData;
   653    653   
   654         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          654  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   655    655     for(pAuxData=pCtx->pVdbe->pAuxData; pAuxData; pAuxData=pAuxData->pNext){
   656    656       if( pAuxData->iOp==pCtx->iOp && pAuxData->iArg==iArg ) break;
   657    657     }
   658    658   
   659    659     return (pAuxData ? pAuxData->pAux : 0);
   660    660   }
   661    661   
................................................................................
   669    669     int iArg, 
   670    670     void *pAux, 
   671    671     void (*xDelete)(void*)
   672    672   ){
   673    673     AuxData *pAuxData;
   674    674     Vdbe *pVdbe = pCtx->pVdbe;
   675    675   
   676         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          676  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   677    677     if( iArg<0 ) goto failed;
   678    678   
   679    679     for(pAuxData=pVdbe->pAuxData; pAuxData; pAuxData=pAuxData->pNext){
   680    680       if( pAuxData->iOp==pCtx->iOp && pAuxData->iArg==iArg ) break;
   681    681     }
   682    682     if( pAuxData==0 ){
   683    683       pAuxData = sqlite3DbMallocZero(pVdbe->db, sizeof(AuxData));

Changes to src/vdbeaux.c.

  3183   3183   /*
  3184   3184   ** This function compares two index or table record keys in the same way
  3185   3185   ** as the sqlite3VdbeRecordCompare() routine. Unlike VdbeRecordCompare(),
  3186   3186   ** this function deserializes and compares values using the
  3187   3187   ** sqlite3VdbeSerialGet() and sqlite3MemCompare() functions. It is used
  3188   3188   ** in assert() statements to ensure that the optimized code in
  3189   3189   ** sqlite3VdbeRecordCompare() returns results with these two primitives.
         3190  +**
         3191  +** Return true if the result of comparison is equivalent to desiredResult.
         3192  +** Return false if there is a disagreement.
  3190   3193   */
  3191   3194   static int vdbeRecordCompareDebug(
  3192   3195     int nKey1, const void *pKey1, /* Left key */
  3193         -  const UnpackedRecord *pPKey2  /* Right key */
         3196  +  const UnpackedRecord *pPKey2, /* Right key */
         3197  +  int desiredResult             /* Correct answer */
  3194   3198   ){
  3195   3199     u32 d1;            /* Offset into aKey[] of next data element */
  3196   3200     u32 idx1;          /* Offset into aKey[] of next header element */
  3197   3201     u32 szHdr1;        /* Number of bytes in header */
  3198   3202     int i = 0;
  3199   3203     int rc = 0;
  3200   3204     const unsigned char *aKey1 = (const unsigned char *)pKey1;
  3201   3205     KeyInfo *pKeyInfo;
  3202   3206     Mem mem1;
  3203   3207   
  3204   3208     pKeyInfo = pPKey2->pKeyInfo;
         3209  +  if( pKeyInfo->db==0 ) return 1;
  3205   3210     mem1.enc = pKeyInfo->enc;
  3206   3211     mem1.db = pKeyInfo->db;
  3207   3212     /* mem1.flags = 0;  // Will be initialized by sqlite3VdbeSerialGet() */
  3208   3213     VVA_ONLY( mem1.zMalloc = 0; ) /* Only needed by assert() statements */
  3209   3214   
  3210   3215     /* Compilers may complain that mem1.u.i is potentially uninitialized.
  3211   3216     ** We could initialize it, as shown here, to silence those complaints.
................................................................................
  3248   3253       */
  3249   3254       rc = sqlite3MemCompare(&mem1, &pPKey2->aMem[i], pKeyInfo->aColl[i]);
  3250   3255       if( rc!=0 ){
  3251   3256         assert( mem1.zMalloc==0 );  /* See comment below */
  3252   3257         if( pKeyInfo->aSortOrder[i] ){
  3253   3258           rc = -rc;  /* Invert the result for DESC sort order. */
  3254   3259         }
  3255         -      return rc;
         3260  +      goto debugCompareEnd;
  3256   3261       }
  3257   3262       i++;
  3258   3263     }while( idx1<szHdr1 && i<pPKey2->nField );
  3259   3264   
  3260   3265     /* No memory allocation is ever used on mem1.  Prove this using
  3261   3266     ** the following assert().  If the assert() fails, it indicates a
  3262   3267     ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1).
  3263   3268     */
  3264   3269     assert( mem1.zMalloc==0 );
  3265   3270   
  3266   3271     /* rc==0 here means that one of the keys ran out of fields and
  3267   3272     ** all the fields up to that point were equal. Return the the default_rc
  3268   3273     ** value.  */
  3269         -  return pPKey2->default_rc;
         3274  +  rc = pPKey2->default_rc;
         3275  +
         3276  +debugCompareEnd:
         3277  +  if( desiredResult==0 && rc==0 ) return 1;
         3278  +  if( desiredResult<0 && rc<0 ) return 1;
         3279  +  if( desiredResult>0 && rc>0 ) return 1;
         3280  +  if( CORRUPT_DB ) return 1;
         3281  +  if( pKeyInfo->db->mallocFailed ) return 1;
         3282  +  return 0;
  3270   3283   }
  3271   3284   #endif
  3272   3285   
  3273   3286   /*
  3274   3287   ** Both *pMem1 and *pMem2 contain string values. Compare the two values
  3275   3288   ** using the collation sequence pColl. As usual, return a negative , zero
  3276   3289   ** or positive value if *pMem1 is less than, equal to or greater than 
  3277   3290   ** *pMem2, respectively. Similar in spirit to "rc = (*pMem1) - (*pMem2);".
  3278   3291   */
  3279   3292   static int vdbeCompareMemString(
  3280   3293     const Mem *pMem1,
  3281   3294     const Mem *pMem2,
  3282         -  const CollSeq *pColl
         3295  +  const CollSeq *pColl,
         3296  +  u8 *prcErr                      /* If an OOM occurs, set to SQLITE_NOMEM */
  3283   3297   ){
  3284   3298     if( pMem1->enc==pColl->enc ){
  3285   3299       /* The strings are already in the correct encoding.  Call the
  3286   3300        ** comparison function directly */
  3287   3301       return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
  3288   3302     }else{
  3289   3303       int rc;
................................................................................
  3298   3312       v1 = sqlite3ValueText((sqlite3_value*)&c1, pColl->enc);
  3299   3313       n1 = v1==0 ? 0 : c1.n;
  3300   3314       v2 = sqlite3ValueText((sqlite3_value*)&c2, pColl->enc);
  3301   3315       n2 = v2==0 ? 0 : c2.n;
  3302   3316       rc = pColl->xCmp(pColl->pUser, n1, v1, n2, v2);
  3303   3317       sqlite3VdbeMemRelease(&c1);
  3304   3318       sqlite3VdbeMemRelease(&c2);
         3319  +    if( (v1==0 || v2==0) && prcErr ) *prcErr = SQLITE_NOMEM;
  3305   3320       return rc;
  3306   3321     }
  3307   3322   }
  3308   3323   
  3309   3324   /*
  3310   3325   ** Compare the values contained by the two memory cells, returning
  3311   3326   ** negative, zero or positive if pMem1 is less than, equal to, or greater
................................................................................
  3380   3395       /* The collation sequence must be defined at this point, even if
  3381   3396       ** the user deletes the collation sequence after the vdbe program is
  3382   3397       ** compiled (this was not always the case).
  3383   3398       */
  3384   3399       assert( !pColl || pColl->xCmp );
  3385   3400   
  3386   3401       if( pColl ){
  3387         -      return vdbeCompareMemString(pMem1, pMem2, pColl);
         3402  +      return vdbeCompareMemString(pMem1, pMem2, pColl, 0);
  3388   3403       }
  3389   3404       /* If a NULL pointer was passed as the collate function, fall through
  3390   3405       ** to the blob case and use memcmp().  */
  3391   3406     }
  3392   3407    
  3393   3408     /* Both values must be blobs.  Compare using memcmp().  */
  3394   3409     rc = memcmp(pMem1->z, pMem2->z, (pMem1->n>pMem2->n)?pMem2->n:pMem1->n);
................................................................................
  3452   3467   ** If argument bSkip is non-zero, it is assumed that the caller has already
  3453   3468   ** determined that the first fields of the keys are equal.
  3454   3469   **
  3455   3470   ** Key1 and Key2 do not have to contain the same number of fields. If all 
  3456   3471   ** fields that appear in both keys are equal, then pPKey2->default_rc is 
  3457   3472   ** returned.
  3458   3473   **
  3459         -** If database corruption is discovered, set pPKey2->isCorrupt to non-zero
  3460         -** and return 0.
         3474  +** If database corruption is discovered, set pPKey2->errCode to 
         3475  +** SQLITE_CORRUPT and return 0. If an OOM error is encountered, 
         3476  +** pPKey2->errCode is set to SQLITE_NOMEM and, if it is not NULL, the
         3477  +** malloc-failed flag set on database handle (pPKey2->pKeyInfo->db).
  3461   3478   */
  3462   3479   int sqlite3VdbeRecordCompare(
  3463   3480     int nKey1, const void *pKey1,   /* Left key */
  3464   3481     UnpackedRecord *pPKey2,         /* Right key */
  3465   3482     int bSkip                       /* If true, skip the first field */
  3466   3483   ){
  3467   3484     u32 d1;                         /* Offset into aKey[] of next data element */
................................................................................
  3484   3501       d1 = szHdr1 + sqlite3VdbeSerialTypeLen(s1);
  3485   3502       i = 1;
  3486   3503       pRhs++;
  3487   3504     }else{
  3488   3505       idx1 = getVarint32(aKey1, szHdr1);
  3489   3506       d1 = szHdr1;
  3490   3507       if( d1>(unsigned)nKey1 ){ 
  3491         -      pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT;
         3508  +      pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
  3492   3509         return 0;  /* Corruption */
  3493   3510       }
  3494   3511       i = 0;
  3495   3512     }
  3496   3513   
  3497   3514     VVA_ONLY( mem1.zMalloc = 0; ) /* Only needed by assert() statements */
  3498   3515     assert( pPKey2->pKeyInfo->nField+pPKey2->pKeyInfo->nXField>=pPKey2->nField 
................................................................................
  3563   3580         }else if( !(serial_type & 0x01) ){
  3564   3581           rc = +1;
  3565   3582         }else{
  3566   3583           mem1.n = (serial_type - 12) / 2;
  3567   3584           testcase( (d1+mem1.n)==(unsigned)nKey1 );
  3568   3585           testcase( (d1+mem1.n+1)==(unsigned)nKey1 );
  3569   3586           if( (d1+mem1.n) > (unsigned)nKey1 ){
  3570         -          pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT;
         3587  +          pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
  3571   3588             return 0;                /* Corruption */
  3572   3589           }else if( pKeyInfo->aColl[i] ){
  3573   3590             mem1.enc = pKeyInfo->enc;
  3574   3591             mem1.db = pKeyInfo->db;
  3575   3592             mem1.flags = MEM_Str;
  3576   3593             mem1.z = (char*)&aKey1[d1];
  3577         -          rc = vdbeCompareMemString(&mem1, pRhs, pKeyInfo->aColl[i]);
         3594  +          rc = vdbeCompareMemString(
         3595  +              &mem1, pRhs, pKeyInfo->aColl[i], &pPKey2->errCode
         3596  +          );
  3578   3597           }else{
  3579   3598             int nCmp = MIN(mem1.n, pRhs->n);
  3580   3599             rc = memcmp(&aKey1[d1], pRhs->z, nCmp);
  3581   3600             if( rc==0 ) rc = mem1.n - pRhs->n; 
  3582   3601           }
  3583   3602         }
  3584   3603       }
................................................................................
  3590   3609         if( serial_type<12 || (serial_type & 0x01) ){
  3591   3610           rc = -1;
  3592   3611         }else{
  3593   3612           int nStr = (serial_type - 12) / 2;
  3594   3613           testcase( (d1+nStr)==(unsigned)nKey1 );
  3595   3614           testcase( (d1+nStr+1)==(unsigned)nKey1 );
  3596   3615           if( (d1+nStr) > (unsigned)nKey1 ){
  3597         -          pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT;
         3616  +          pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
  3598   3617             return 0;                /* Corruption */
  3599   3618           }else{
  3600   3619             int nCmp = MIN(nStr, pRhs->n);
  3601   3620             rc = memcmp(&aKey1[d1], pRhs->z, nCmp);
  3602   3621             if( rc==0 ) rc = nStr - pRhs->n;
  3603   3622           }
  3604   3623         }
................................................................................
  3610   3629         rc = (serial_type!=0);
  3611   3630       }
  3612   3631   
  3613   3632       if( rc!=0 ){
  3614   3633         if( pKeyInfo->aSortOrder[i] ){
  3615   3634           rc = -rc;
  3616   3635         }
  3617         -      assert( CORRUPT_DB
  3618         -          || (rc<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0)
  3619         -          || (rc>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0)
  3620         -          || pKeyInfo->db->mallocFailed
  3621         -      );
         3636  +      assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, rc) );
  3622   3637         assert( mem1.zMalloc==0 );  /* See comment below */
  3623   3638         return rc;
  3624   3639       }
  3625   3640   
  3626   3641       i++;
  3627   3642       pRhs++;
  3628   3643       d1 += sqlite3VdbeSerialTypeLen(serial_type);
................................................................................
  3634   3649     ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1).  */
  3635   3650     assert( mem1.zMalloc==0 );
  3636   3651   
  3637   3652     /* rc==0 here means that one or both of the keys ran out of fields and
  3638   3653     ** all the fields up to that point were equal. Return the the default_rc
  3639   3654     ** value.  */
  3640   3655     assert( CORRUPT_DB 
  3641         -       || pPKey2->default_rc==vdbeRecordCompareDebug(nKey1, pKey1, pPKey2) 
         3656  +       || vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, pPKey2->default_rc) 
  3642   3657          || pKeyInfo->db->mallocFailed
  3643   3658     );
  3644   3659     return pPKey2->default_rc;
  3645   3660   }
  3646   3661   
  3647   3662   /*
  3648   3663   ** This function is an optimized version of sqlite3VdbeRecordCompare() 
................................................................................
  3733   3748       res = sqlite3VdbeRecordCompare(nKey1, pKey1, pPKey2, 1);
  3734   3749     }else{
  3735   3750       /* The first fields of the two keys are equal and there are no trailing
  3736   3751       ** fields. Return pPKey2->default_rc in this case. */
  3737   3752       res = pPKey2->default_rc;
  3738   3753     }
  3739   3754   
  3740         -  assert( (res==0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)==0)
  3741         -       || (res<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0)
  3742         -       || (res>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0)
  3743         -       || CORRUPT_DB
  3744         -  );
         3755  +  assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) );
  3745   3756     return res;
  3746   3757   }
  3747   3758   
  3748   3759   /*
  3749   3760   ** This function is an optimized version of sqlite3VdbeRecordCompare() 
  3750   3761   ** that (a) the first field of pPKey2 is a string, that (b) the first field
  3751   3762   ** uses the collation sequence BINARY and (c) that the size-of-header varint 
................................................................................
  3771   3782     }else{
  3772   3783       int nCmp;
  3773   3784       int nStr;
  3774   3785       int szHdr = aKey1[0];
  3775   3786   
  3776   3787       nStr = (serial_type-12) / 2;
  3777   3788       if( (szHdr + nStr) > nKey1 ){
  3778         -      pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT;
         3789  +      pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
  3779   3790         return 0;    /* Corruption */
  3780   3791       }
  3781   3792       nCmp = MIN( pPKey2->aMem[0].n, nStr );
  3782   3793       res = memcmp(&aKey1[szHdr], pPKey2->aMem[0].z, nCmp);
  3783   3794   
  3784   3795       if( res==0 ){
  3785   3796         res = nStr - pPKey2->aMem[0].n;
................................................................................
  3797   3808       }else if( res>0 ){
  3798   3809         res = pPKey2->r2;
  3799   3810       }else{
  3800   3811         res = pPKey2->r1;
  3801   3812       }
  3802   3813     }
  3803   3814   
  3804         -  assert( (res==0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)==0)
  3805         -       || (res<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0)
  3806         -       || (res>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0)
         3815  +  assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res)
  3807   3816          || CORRUPT_DB
  3808   3817          || pPKey2->pKeyInfo->db->mallocFailed
  3809   3818     );
  3810   3819     return res;
  3811   3820   }
  3812   3821   
  3813   3822   /*

Changes to src/vdbemem.c.

   196    196       pMem->n += pMem->u.nZero;
   197    197       pMem->flags &= ~(MEM_Zero|MEM_Term);
   198    198     }
   199    199     return SQLITE_OK;
   200    200   }
   201    201   #endif
   202    202   
   203         -
   204    203   /*
   205         -** Make sure the given Mem is \u0000 terminated.
          204  +** It is already known that pMem contains an unterminated string.
          205  +** Add the zero terminator.
   206    206   */
   207         -int sqlite3VdbeMemNulTerminate(Mem *pMem){
   208         -  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
   209         -  if( (pMem->flags & MEM_Term)!=0 || (pMem->flags & MEM_Str)==0 ){
   210         -    return SQLITE_OK;   /* Nothing to do */
   211         -  }
          207  +static SQLITE_NOINLINE int vdbeMemAddTerminator(Mem *pMem){
   212    208     if( sqlite3VdbeMemGrow(pMem, pMem->n+2, 1) ){
   213    209       return SQLITE_NOMEM;
   214    210     }
   215    211     pMem->z[pMem->n] = 0;
   216    212     pMem->z[pMem->n+1] = 0;
   217    213     pMem->flags |= MEM_Term;
   218    214     return SQLITE_OK;
   219    215   }
          216  +
          217  +/*
          218  +** Make sure the given Mem is \u0000 terminated.
          219  +*/
          220  +int sqlite3VdbeMemNulTerminate(Mem *pMem){
          221  +  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
          222  +  testcase( (pMem->flags & (MEM_Term|MEM_Str))==(MEM_Term|MEM_Str) );
          223  +  testcase( (pMem->flags & (MEM_Term|MEM_Str))==0 );
          224  +  if( (pMem->flags & (MEM_Term|MEM_Str))!=MEM_Str ){
          225  +    return SQLITE_OK;   /* Nothing to do */
          226  +  }else{
          227  +    return vdbeMemAddTerminator(pMem);
          228  +  }
          229  +}
   220    230   
   221    231   /*
   222    232   ** Add MEM_Str to the set of representations for the given Mem.  Numbers
   223    233   ** are converted using sqlite3_snprintf().  Converting a BLOB to a string
   224    234   ** is a no-op.
   225    235   **
   226    236   ** Existing representations MEM_Int and MEM_Real are invalidated if
................................................................................
   276    286   ** Return SQLITE_ERROR if the finalizer reports an error.  SQLITE_OK
   277    287   ** otherwise.
   278    288   */
   279    289   int sqlite3VdbeMemFinalize(Mem *pMem, FuncDef *pFunc){
   280    290     int rc = SQLITE_OK;
   281    291     if( ALWAYS(pFunc && pFunc->xFinalize) ){
   282    292       sqlite3_context ctx;
          293  +    Mem t;
   283    294       assert( (pMem->flags & MEM_Null)!=0 || pFunc==pMem->u.pDef );
   284    295       assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
   285    296       memset(&ctx, 0, sizeof(ctx));
   286         -    ctx.s.flags = MEM_Null;
   287         -    ctx.s.db = pMem->db;
          297  +    memset(&t, 0, sizeof(t));
          298  +    t.flags = MEM_Null;
          299  +    t.db = pMem->db;
          300  +    ctx.pOut = &t;
   288    301       ctx.pMem = pMem;
   289    302       ctx.pFunc = pFunc;
   290    303       pFunc->xFinalize(&ctx); /* IMP: R-24505-23230 */
   291    304       assert( 0==(pMem->flags&MEM_Dyn) && !pMem->xDel );
   292    305       sqlite3DbFree(pMem->db, pMem->zMalloc);
   293         -    memcpy(pMem, &ctx.s, sizeof(ctx.s));
          306  +    memcpy(pMem, &t, sizeof(t));
   294    307       rc = ctx.isError;
   295    308     }
   296    309     return rc;
   297    310   }
   298    311   
   299    312   /*
   300    313   ** If the memory cell contains a string value that must be freed by
................................................................................
   600    613     sqlite3VdbeMemGrow(pMem, n, 0);
   601    614     if( pMem->z ){
   602    615       pMem->n = n;
   603    616       memset(pMem->z, 0, n);
   604    617     }
   605    618   #endif
   606    619   }
          620  +
          621  +/*
          622  +** The pMem is known to contain content that needs to be destroyed prior
          623  +** to a value change.  So invoke the destructor, then set the value to
          624  +** a 64-bit integer.
          625  +*/
          626  +static SQLITE_NOINLINE void vdbeReleaseAndSetInt64(Mem *pMem, i64 val){
          627  +  sqlite3VdbeMemReleaseExternal(pMem);
          628  +  pMem->u.i = val;
          629  +  pMem->flags = MEM_Int;
          630  +}
   607    631   
   608    632   /*
   609    633   ** Delete any previous value and set the value stored in *pMem to val,
   610    634   ** manifest type INTEGER.
   611    635   */
   612    636   void sqlite3VdbeMemSetInt64(Mem *pMem, i64 val){
   613         -  sqlite3VdbeMemRelease(pMem);
   614         -  pMem->u.i = val;
   615         -  pMem->flags = MEM_Int;
          637  +  if( VdbeMemDynamic(pMem) ){
          638  +    vdbeReleaseAndSetInt64(pMem, val);
          639  +  }else{
          640  +    pMem->u.i = val;
          641  +    pMem->flags = MEM_Int;
          642  +  }
   616    643   }
   617    644   
   618    645   #ifndef SQLITE_OMIT_FLOATING_POINT
   619    646   /*
   620    647   ** Delete any previous value and set the value stored in *pMem to val,
   621    648   ** manifest type REAL.
   622    649   */
................................................................................
   904    931       }else{
   905    932         sqlite3VdbeMemRelease(pMem);
   906    933       }
   907    934     }
   908    935   
   909    936     return rc;
   910    937   }
          938  +
          939  +/*
          940  +** The pVal argument is known to be a value other than NULL.
          941  +** Convert it into a string with encoding enc and return a pointer
          942  +** to a zero-terminated version of that string.
          943  +*/
          944  +SQLITE_NOINLINE const void *valueToText(sqlite3_value* pVal, u8 enc){
          945  +  assert( pVal!=0 );
          946  +  assert( pVal->db==0 || sqlite3_mutex_held(pVal->db->mutex) );
          947  +  assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) );
          948  +  assert( (pVal->flags & MEM_RowSet)==0 );
          949  +  assert( (pVal->flags & (MEM_Null))==0 );
          950  +  if( pVal->flags & (MEM_Blob|MEM_Str) ){
          951  +    pVal->flags |= MEM_Str;
          952  +    if( pVal->flags & MEM_Zero ){
          953  +      sqlite3VdbeMemExpandBlob(pVal);
          954  +    }
          955  +    if( pVal->enc != (enc & ~SQLITE_UTF16_ALIGNED) ){
          956  +      sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED);
          957  +    }
          958  +    if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&SQLITE_PTR_TO_INT(pVal->z)) ){
          959  +      assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 );
          960  +      if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){
          961  +        return 0;
          962  +      }
          963  +    }
          964  +    sqlite3VdbeMemNulTerminate(pVal); /* IMP: R-31275-44060 */
          965  +  }else{
          966  +    sqlite3VdbeMemStringify(pVal, enc, 0);
          967  +    assert( 0==(1&SQLITE_PTR_TO_INT(pVal->z)) );
          968  +  }
          969  +  assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || pVal->db==0
          970  +              || pVal->db->mallocFailed );
          971  +  if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){
          972  +    return pVal->z;
          973  +  }else{
          974  +    return 0;
          975  +  }
          976  +}
   911    977   
   912    978   /* This function is only available internally, it is not part of the
   913    979   ** external API. It works in a similar way to sqlite3_value_text(),
   914    980   ** except the data returned is in the encoding specified by the second
   915    981   ** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
   916    982   ** SQLITE_UTF8.
   917    983   **
   918    984   ** (2006-02-16:)  The enc value can be or-ed with SQLITE_UTF16_ALIGNED.
   919    985   ** If that is the case, then the result must be aligned on an even byte
   920    986   ** boundary.
   921    987   */
   922    988   const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
   923    989     if( !pVal ) return 0;
   924         -
   925    990     assert( pVal->db==0 || sqlite3_mutex_held(pVal->db->mutex) );
   926    991     assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) );
   927    992     assert( (pVal->flags & MEM_RowSet)==0 );
   928         -
          993  +  if( (pVal->flags&(MEM_Str|MEM_Term))==(MEM_Str|MEM_Term) && pVal->enc==enc ){
          994  +    return pVal->z;
          995  +  }
   929    996     if( pVal->flags&MEM_Null ){
   930    997       return 0;
   931    998     }
   932         -  assert( (MEM_Blob>>3) == MEM_Str );
   933         -  pVal->flags |= (pVal->flags & MEM_Blob)>>3;
   934         -  ExpandBlob(pVal);
   935         -  if( pVal->flags&MEM_Str ){
   936         -    sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED);
   937         -    if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&SQLITE_PTR_TO_INT(pVal->z)) ){
   938         -      assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 );
   939         -      if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){
   940         -        return 0;
   941         -      }
   942         -    }
   943         -    sqlite3VdbeMemNulTerminate(pVal); /* IMP: R-31275-44060 */
   944         -  }else{
   945         -    assert( (pVal->flags&MEM_Blob)==0 );
   946         -    sqlite3VdbeMemStringify(pVal, enc, 0);
   947         -    assert( 0==(1&SQLITE_PTR_TO_INT(pVal->z)) );
   948         -  }
   949         -  assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || pVal->db==0
   950         -              || pVal->db->mallocFailed );
   951         -  if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){
   952         -    return pVal->z;
   953         -  }else{
   954         -    return 0;
   955         -  }
          999  +  return valueToText(pVal, enc);
   956   1000   }
   957   1001   
   958   1002   /*
   959   1003   ** Create a new sqlite3_value object.
   960   1004   */
   961   1005   sqlite3_value *sqlite3ValueNew(sqlite3 *db){
   962   1006     Mem *p = sqlite3DbMallocZero(db, sizeof(*p));

Changes to src/vdbesort.c.

     1      1   /*
     2         -** 2011 July 9
            2  +** 2011-07-09
     3      3   **
     4      4   ** The author disclaims copyright to this source code.  In place of
     5      5   ** a legal notice, here is a blessing:
     6      6   **
     7      7   **    May you do good and not evil.
     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12     12   ** This file contains code for the VdbeSorter object, used in concert with
    13         -** a VdbeCursor to sort large numbers of keys (as may be required, for
    14         -** example, by CREATE INDEX statements on tables too large to fit in main
    15         -** memory).
    16         -*/
    17         -
           13  +** a VdbeCursor to sort large numbers of keys for CREATE INDEX statements
           14  +** or by SELECT statements with ORDER BY clauses that cannot be satisfied
           15  +** using indexes and without LIMIT clauses.
           16  +**
           17  +** The VdbeSorter object implements a multi-threaded external merge sort
           18  +** algorithm that is efficient even if the number of elements being sorted
           19  +** exceeds the available memory.
           20  +**
           21  +** Here is the (internal, non-API) interface between this module and the
           22  +** rest of the SQLite system:
           23  +**
           24  +**    sqlite3VdbeSorterInit()       Create a new VdbeSorter object.
           25  +**
           26  +**    sqlite3VdbeSorterWrite()      Add a single new row to the VdbeSorter
           27  +**                                  object.  The row is a binary blob in the
           28  +**                                  OP_MakeRecord format that contains both
           29  +**                                  the ORDER BY key columns and result columns
           30  +**                                  in the case of a SELECT w/ ORDER BY, or
           31  +**                                  the complete record for an index entry
           32  +**                                  in the case of a CREATE INDEX.
           33  +**
           34  +**    sqlite3VdbeSorterRewind()     Sort all content previously added.
           35  +**                                  Position the read cursor on the
           36  +**                                  first sorted element.
           37  +**
           38  +**    sqlite3VdbeSorterNext()       Advance the read cursor to the next sorted
           39  +**                                  element.
           40  +**
           41  +**    sqlite3VdbeSorterRowkey()     Return the complete binary blob for the
           42  +**                                  row currently under the read cursor.
           43  +**
           44  +**    sqlite3VdbeSorterCompare()    Compare the binary blob for the row
           45  +**                                  currently under the read cursor against
           46  +**                                  another binary blob X and report if
           47  +**                                  X is strictly less than the read cursor.
           48  +**                                  Used to enforce uniqueness in a
           49  +**                                  CREATE UNIQUE INDEX statement.
           50  +**
           51  +**    sqlite3VdbeSorterClose()      Close the VdbeSorter object and reclaim
           52  +**                                  all resources.
           53  +**
           54  +**    sqlite3VdbeSorterReset()      Refurbish the VdbeSorter for reuse.  This
           55  +**                                  is like Close() followed by Init() only
           56  +**                                  much faster.
           57  +**
           58  +** The interfaces above must be called in a particular order.  Write() can 
           59  +** only occur in between Init()/Reset() and Rewind().  Next(), Rowkey(), and
           60  +** Compare() can only occur in between Rewind() and Close()/Reset(). i.e.
           61  +**
           62  +**   Init()
           63  +**   for each record: Write()
           64  +**   Rewind()
           65  +**     Rowkey()/Compare()
           66  +**   Next() 
           67  +**   Close()
           68  +**
           69  +** Algorithm:
           70  +**
           71  +** Records passed to the sorter via calls to Write() are initially held 
           72  +** unsorted in main memory. Assuming the amount of memory used never exceeds
           73  +** a threshold, when Rewind() is called the set of records is sorted using
           74  +** an in-memory merge sort. In this case, no temporary files are required
           75  +** and subsequent calls to Rowkey(), Next() and Compare() read records 
           76  +** directly from main memory.
           77  +**
           78  +** If the amount of space used to store records in main memory exceeds the
           79  +** threshold, then the set of records currently in memory are sorted and
           80  +** written to a temporary file in "Packed Memory Array" (PMA) format.
           81  +** A PMA created at this point is known as a "level-0 PMA". Higher levels
           82  +** of PMAs may be created by merging existing PMAs together - for example
           83  +** merging two or more level-0 PMAs together creates a level-1 PMA.
           84  +**
           85  +** The threshold for the amount of main memory to use before flushing 
           86  +** records to a PMA is roughly the same as the limit configured for the
           87  +** page-cache of the main database. Specifically, the threshold is set to 
           88  +** the value returned by "PRAGMA main.page_size" multipled by 
           89  +** that returned by "PRAGMA main.cache_size", in bytes.
           90  +**
           91  +** If the sorter is running in single-threaded mode, then all PMAs generated
           92  +** are appended to a single temporary file. Or, if the sorter is running in
           93  +** multi-threaded mode then up to (N+1) temporary files may be opened, where
           94  +** N is the configured number of worker threads. In this case, instead of
           95  +** sorting the records and writing the PMA to a temporary file itself, the
           96  +** calling thread usually launches a worker thread to do so. Except, if
           97  +** there are already N worker threads running, the main thread does the work
           98  +** itself.
           99  +**
          100  +** The sorter is running in multi-threaded mode if (a) the library was built
          101  +** with pre-processor symbol SQLITE_MAX_WORKER_THREADS set to a value greater
          102  +** than zero, and (b) worker threads have been enabled at runtime by calling
          103  +** sqlite3_config(SQLITE_CONFIG_WORKER_THREADS, ...).
          104  +**
          105  +** When Rewind() is called, any data remaining in memory is flushed to a 
          106  +** final PMA. So at this point the data is stored in some number of sorted
          107  +** PMAs within temporary files on disk.
          108  +**
          109  +** If there are fewer than SORTER_MAX_MERGE_COUNT PMAs in total and the
          110  +** sorter is running in single-threaded mode, then these PMAs are merged
          111  +** incrementally as keys are retreived from the sorter by the VDBE.  The
          112  +** MergeEngine object, described in further detail below, performs this
          113  +** merge.
          114  +**
          115  +** Or, if running in multi-threaded mode, then a background thread is
          116  +** launched to merge the existing PMAs. Once the background thread has
          117  +** merged T bytes of data into a single sorted PMA, the main thread 
          118  +** begins reading keys from that PMA while the background thread proceeds
          119  +** with merging the next T bytes of data. And so on.
          120  +**
          121  +** Parameter T is set to half the value of the memory threshold used 
          122  +** by Write() above to determine when to create a new PMA.
          123  +**
          124  +** If there are more than SORTER_MAX_MERGE_COUNT PMAs in total when 
          125  +** Rewind() is called, then a hierarchy of incremental-merges is used. 
          126  +** First, T bytes of data from the first SORTER_MAX_MERGE_COUNT PMAs on 
          127  +** disk are merged together. Then T bytes of data from the second set, and
          128  +** so on, such that no operation ever merges more than SORTER_MAX_MERGE_COUNT
          129  +** PMAs at a time. This done is to improve locality.
          130  +**
          131  +** If running in multi-threaded mode and there are more than
          132  +** SORTER_MAX_MERGE_COUNT PMAs on disk when Rewind() is called, then more
          133  +** than one background thread may be created. Specifically, there may be
          134  +** one background thread for each temporary file on disk, and one background
          135  +** thread to merge the output of each of the others to a single PMA for
          136  +** the main thread to read from.
          137  +*/
    18    138   #include "sqliteInt.h"
    19    139   #include "vdbeInt.h"
    20    140   
    21         -
    22         -typedef struct VdbeSorterIter VdbeSorterIter;
    23         -typedef struct SorterRecord SorterRecord;
    24         -typedef struct FileWriter FileWriter;
    25         -
    26         -/*
    27         -** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES:
    28         -**
    29         -** As keys are added to the sorter, they are written to disk in a series
    30         -** of sorted packed-memory-arrays (PMAs). The size of each PMA is roughly
    31         -** the same as the cache-size allowed for temporary databases. In order
    32         -** to allow the caller to extract keys from the sorter in sorted order,
    33         -** all PMAs currently stored on disk must be merged together. This comment
    34         -** describes the data structure used to do so. The structure supports 
    35         -** merging any number of arrays in a single pass with no redundant comparison 
    36         -** operations.
    37         -**
    38         -** The aIter[] array contains an iterator for each of the PMAs being merged.
    39         -** An aIter[] iterator either points to a valid key or else is at EOF. For 
    40         -** the purposes of the paragraphs below, we assume that the array is actually 
    41         -** N elements in size, where N is the smallest power of 2 greater to or equal 
    42         -** to the number of iterators being merged. The extra aIter[] elements are 
    43         -** treated as if they are empty (always at EOF).
          141  +/* 
          142  +** If SQLITE_DEBUG_SORTER_THREADS is defined, this module outputs various
          143  +** messages to stderr that may be helpful in understanding the performance
          144  +** characteristics of the sorter in multi-threaded mode.
          145  +*/
          146  +#if 0
          147  +# define SQLITE_DEBUG_SORTER_THREADS 1
          148  +#endif
          149  +
          150  +/*
          151  +** Private objects used by the sorter
          152  +*/
          153  +typedef struct MergeEngine MergeEngine;     /* Merge PMAs together */
          154  +typedef struct PmaReader PmaReader;         /* Incrementally read one PMA */
          155  +typedef struct PmaWriter PmaWriter;         /* Incrementally write one PMA */
          156  +typedef struct SorterRecord SorterRecord;   /* A record being sorted */
          157  +typedef struct SortSubtask SortSubtask;     /* A sub-task in the sort process */
          158  +typedef struct SorterFile SorterFile;       /* Temporary file object wrapper */
          159  +typedef struct SorterList SorterList;       /* In-memory list of records */
          160  +typedef struct IncrMerger IncrMerger;       /* Read & merge multiple PMAs */
          161  +
          162  +/*
          163  +** A container for a temp file handle and the current amount of data 
          164  +** stored in the file.
          165  +*/
          166  +struct SorterFile {
          167  +  sqlite3_file *pFd;              /* File handle */
          168  +  i64 iEof;                       /* Bytes of data stored in pFd */
          169  +};
          170  +
          171  +/*
          172  +** An in-memory list of objects to be sorted.
          173  +**
          174  +** If aMemory==0 then each object is allocated separately and the objects
          175  +** are connected using SorterRecord.u.pNext.  If aMemory!=0 then all objects
          176  +** are stored in the aMemory[] bulk memory, one right after the other, and
          177  +** are connected using SorterRecord.u.iNext.
          178  +*/
          179  +struct SorterList {
          180  +  SorterRecord *pList;            /* Linked list of records */
          181  +  u8 *aMemory;                    /* If non-NULL, bulk memory to hold pList */
          182  +  int szPMA;                      /* Size of pList as PMA in bytes */
          183  +};
          184  +
          185  +/*
          186  +** The MergeEngine object is used to combine two or more smaller PMAs into
          187  +** one big PMA using a merge operation.  Separate PMAs all need to be
          188  +** combined into one big PMA in order to be able to step through the sorted
          189  +** records in order.
          190  +**
          191  +** The aReadr[] array contains a PmaReader object for each of the PMAs being
          192  +** merged.  An aReadr[] object either points to a valid key or else is at EOF.
          193  +** ("EOF" means "End Of File".  When aReadr[] is at EOF there is no more data.)
          194  +** For the purposes of the paragraphs below, we assume that the array is
          195  +** actually N elements in size, where N is the smallest power of 2 greater
          196  +** to or equal to the number of PMAs being merged. The extra aReadr[] elements
          197  +** are treated as if they are empty (always at EOF).
    44    198   **
    45    199   ** The aTree[] array is also N elements in size. The value of N is stored in
    46         -** the VdbeSorter.nTree variable.
          200  +** the MergeEngine.nTree variable.
    47    201   **
    48    202   ** The final (N/2) elements of aTree[] contain the results of comparing
    49         -** pairs of iterator keys together. Element i contains the result of 
    50         -** comparing aIter[2*i-N] and aIter[2*i-N+1]. Whichever key is smaller, the
          203  +** pairs of PMA keys together. Element i contains the result of 
          204  +** comparing aReadr[2*i-N] and aReadr[2*i-N+1]. Whichever key is smaller, the
    51    205   ** aTree element is set to the index of it. 
    52    206   **
    53    207   ** For the purposes of this comparison, EOF is considered greater than any
    54    208   ** other key value. If the keys are equal (only possible with two EOF
    55    209   ** values), it doesn't matter which index is stored.
    56    210   **
    57    211   ** The (N/4) elements of aTree[] that precede the final (N/2) described 
    58         -** above contains the index of the smallest of each block of 4 iterators.
    59         -** And so on. So that aTree[1] contains the index of the iterator that 
          212  +** above contains the index of the smallest of each block of 4 PmaReaders
          213  +** And so on. So that aTree[1] contains the index of the PmaReader that 
    60    214   ** currently points to the smallest key value. aTree[0] is unused.
    61    215   **
    62    216   ** Example:
    63    217   **
    64         -**     aIter[0] -> Banana
    65         -**     aIter[1] -> Feijoa
    66         -**     aIter[2] -> Elderberry
    67         -**     aIter[3] -> Currant
    68         -**     aIter[4] -> Grapefruit
    69         -**     aIter[5] -> Apple
    70         -**     aIter[6] -> Durian
    71         -**     aIter[7] -> EOF
          218  +**     aReadr[0] -> Banana
          219  +**     aReadr[1] -> Feijoa
          220  +**     aReadr[2] -> Elderberry
          221  +**     aReadr[3] -> Currant
          222  +**     aReadr[4] -> Grapefruit
          223  +**     aReadr[5] -> Apple
          224  +**     aReadr[6] -> Durian
          225  +**     aReadr[7] -> EOF
    72    226   **
    73    227   **     aTree[] = { X, 5   0, 5    0, 3, 5, 6 }
    74    228   **
    75    229   ** The current element is "Apple" (the value of the key indicated by 
    76         -** iterator 5). When the Next() operation is invoked, iterator 5 will
          230  +** PmaReader 5). When the Next() operation is invoked, PmaReader 5 will
    77    231   ** be advanced to the next key in its segment. Say the next key is
    78    232   ** "Eggplant":
    79    233   **
    80         -**     aIter[5] -> Eggplant
          234  +**     aReadr[5] -> Eggplant
    81    235   **
    82         -** The contents of aTree[] are updated first by comparing the new iterator
    83         -** 5 key to the current key of iterator 4 (still "Grapefruit"). The iterator
          236  +** The contents of aTree[] are updated first by comparing the new PmaReader
          237  +** 5 key to the current key of PmaReader 4 (still "Grapefruit"). The PmaReader
    84    238   ** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree.
    85         -** The value of iterator 6 - "Durian" - is now smaller than that of iterator
          239  +** The value of PmaReader 6 - "Durian" - is now smaller than that of PmaReader
    86    240   ** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Banana<Durian),
    87    241   ** so the value written into element 1 of the array is 0. As follows:
    88    242   **
    89    243   **     aTree[] = { X, 0   0, 6    0, 3, 5, 6 }
    90    244   **
    91    245   ** In other words, each time we advance to the next sorter element, log2(N)
    92    246   ** key comparison operations are required, where N is the number of segments
    93    247   ** being merged (rounded up to the next power of 2).
    94    248   */
          249  +struct MergeEngine {
          250  +  int nTree;                 /* Used size of aTree/aReadr (power of 2) */
          251  +  SortSubtask *pTask;        /* Used by this thread only */
          252  +  int *aTree;                /* Current state of incremental merge */
          253  +  PmaReader *aReadr;         /* Array of PmaReaders to merge data from */
          254  +};
          255  +
          256  +/*
          257  +** This object represents a single thread of control in a sort operation.
          258  +** Exactly VdbeSorter.nTask instances of this object are allocated
          259  +** as part of each VdbeSorter object. Instances are never allocated any
          260  +** other way. VdbeSorter.nTask is set to the number of worker threads allowed
          261  +** (see SQLITE_CONFIG_WORKER_THREADS) plus one (the main thread).  Thus for
          262  +** single-threaded operation, there is exactly one instance of this object
          263  +** and for multi-threaded operation there are two or more instances.
          264  +**
          265  +** Essentially, this structure contains all those fields of the VdbeSorter
          266  +** structure for which each thread requires a separate instance. For example,
          267  +** each thread requries its own UnpackedRecord object to unpack records in
          268  +** as part of comparison operations.
          269  +**
          270  +** Before a background thread is launched, variable bDone is set to 0. Then, 
          271  +** right before it exits, the thread itself sets bDone to 1. This is used for 
          272  +** two purposes:
          273  +**
          274  +**   1. When flushing the contents of memory to a level-0 PMA on disk, to
          275  +**      attempt to select a SortSubtask for which there is not already an
          276  +**      active background thread (since doing so causes the main thread
          277  +**      to block until it finishes).
          278  +**
          279  +**   2. If SQLITE_DEBUG_SORTER_THREADS is defined, to determine if a call
          280  +**      to sqlite3ThreadJoin() is likely to block. Cases that are likely to
          281  +**      block provoke debugging output.
          282  +**
          283  +** In both cases, the effects of the main thread seeing (bDone==0) even
          284  +** after the thread has finished are not dire. So we don't worry about
          285  +** memory barriers and such here.
          286  +*/
          287  +struct SortSubtask {
          288  +  SQLiteThread *pThread;          /* Background thread, if any */
          289  +  int bDone;                      /* Set if thread is finished but not joined */
          290  +  VdbeSorter *pSorter;            /* Sorter that owns this sub-task */
          291  +  UnpackedRecord *pUnpacked;      /* Space to unpack a record */
          292  +  SorterList list;                /* List for thread to write to a PMA */
          293  +  int nPMA;                       /* Number of PMAs currently in file */
          294  +  SorterFile file;                /* Temp file for level-0 PMAs */
          295  +  SorterFile file2;               /* Space for other PMAs */
          296  +};
          297  +
          298  +/*
          299  +** Main sorter structure. A single instance of this is allocated for each 
          300  +** sorter cursor created by the VDBE.
          301  +**
          302  +** mxKeysize:
          303  +**   As records are added to the sorter by calls to sqlite3VdbeSorterWrite(),
          304  +**   this variable is updated so as to be set to the size on disk of the
          305  +**   largest record in the sorter.
          306  +*/
    95    307   struct VdbeSorter {
    96         -  i64 iWriteOff;                  /* Current write offset within file pTemp1 */
    97         -  i64 iReadOff;                   /* Current read offset within file pTemp1 */
    98         -  int nInMemory;                  /* Current size of pRecord list as PMA */
    99         -  int nTree;                      /* Used size of aTree/aIter (power of 2) */
   100         -  int nPMA;                       /* Number of PMAs stored in pTemp1 */
   101    308     int mnPmaSize;                  /* Minimum PMA size, in bytes */
   102    309     int mxPmaSize;                  /* Maximum PMA size, in bytes.  0==no limit */
   103         -  VdbeSorterIter *aIter;          /* Array of iterators to merge */
   104         -  int *aTree;                     /* Current state of incremental merge */
   105         -  sqlite3_file *pTemp1;           /* PMA file 1 */
   106         -  SorterRecord *pRecord;          /* Head of in-memory record list */
   107         -  UnpackedRecord *pUnpacked;      /* Used to unpack keys */
          310  +  int mxKeysize;                  /* Largest serialized key seen so far */
          311  +  int pgsz;                       /* Main database page size */
          312  +  PmaReader *pReader;             /* Readr data from here after Rewind() */
          313  +  MergeEngine *pMerger;           /* Or here, if bUseThreads==0 */
          314  +  sqlite3 *db;                    /* Database connection */
          315  +  KeyInfo *pKeyInfo;              /* How to compare records */
          316  +  UnpackedRecord *pUnpacked;      /* Used by VdbeSorterCompare() */
          317  +  SorterList list;                /* List of in-memory records */
          318  +  int iMemory;                    /* Offset of free space in list.aMemory */
          319  +  int nMemory;                    /* Size of list.aMemory allocation in bytes */
          320  +  u8 bUsePMA;                     /* True if one or more PMAs created */
          321  +  u8 bUseThreads;                 /* True to use background threads */
          322  +  u8 iPrev;                       /* Previous thread used to flush PMA */
          323  +  u8 nTask;                       /* Size of aTask[] array */
          324  +  SortSubtask aTask[1];           /* One or more subtasks */
          325  +};
          326  +
          327  +/*
          328  +** An instance of the following object is used to read records out of a
          329  +** PMA, in sorted order.  The next key to be read is cached in nKey/aKey.
          330  +** aKey might point into aMap or into aBuffer.  If neither of those locations
          331  +** contain a contiguous representation of the key, then aAlloc is allocated
          332  +** and the key is copied into aAlloc and aKey is made to poitn to aAlloc.
          333  +**
          334  +** pFd==0 at EOF.
          335  +*/
          336  +struct PmaReader {
          337  +  i64 iReadOff;               /* Current read offset */
          338  +  i64 iEof;                   /* 1 byte past EOF for this PmaReader */
          339  +  int nAlloc;                 /* Bytes of space at aAlloc */
          340  +  int nKey;                   /* Number of bytes in key */
          341  +  sqlite3_file *pFd;          /* File handle we are reading from */
          342  +  u8 *aAlloc;                 /* Space for aKey if aBuffer and pMap wont work */
          343  +  u8 *aKey;                   /* Pointer to current key */
          344  +  u8 *aBuffer;                /* Current read buffer */
          345  +  int nBuffer;                /* Size of read buffer in bytes */
          346  +  u8 *aMap;                   /* Pointer to mapping of entire file */
          347  +  IncrMerger *pIncr;          /* Incremental merger */
   108    348   };
   109    349   
   110    350   /*
   111         -** The following type is an iterator for a PMA. It caches the current key in 
   112         -** variables nKey/aKey. If the iterator is at EOF, pFile==0.
   113         -*/
   114         -struct VdbeSorterIter {
   115         -  i64 iReadOff;                   /* Current read offset */
   116         -  i64 iEof;                       /* 1 byte past EOF for this iterator */
   117         -  int nAlloc;                     /* Bytes of space at aAlloc */
   118         -  int nKey;                       /* Number of bytes in key */
   119         -  sqlite3_file *pFile;            /* File iterator is reading from */
   120         -  u8 *aAlloc;                     /* Allocated space */
   121         -  u8 *aKey;                       /* Pointer to current key */
   122         -  u8 *aBuffer;                    /* Current read buffer */
   123         -  int nBuffer;                    /* Size of read buffer in bytes */
          351  +** Normally, a PmaReader object iterates through an existing PMA stored 
          352  +** within a temp file. However, if the PmaReader.pIncr variable points to
          353  +** an object of the following type, it may be used to iterate/merge through
          354  +** multiple PMAs simultaneously.
          355  +**
          356  +** There are two types of IncrMerger object - single (bUseThread==0) and 
          357  +** multi-threaded (bUseThread==1). 
          358  +**
          359  +** A multi-threaded IncrMerger object uses two temporary files - aFile[0] 
          360  +** and aFile[1]. Neither file is allowed to grow to more than mxSz bytes in 
          361  +** size. When the IncrMerger is initialized, it reads enough data from 
          362  +** pMerger to populate aFile[0]. It then sets variables within the 
          363  +** corresponding PmaReader object to read from that file and kicks off 
          364  +** a background thread to populate aFile[1] with the next mxSz bytes of 
          365  +** sorted record data from pMerger. 
          366  +**
          367  +** When the PmaReader reaches the end of aFile[0], it blocks until the
          368  +** background thread has finished populating aFile[1]. It then exchanges
          369  +** the contents of the aFile[0] and aFile[1] variables within this structure,
          370  +** sets the PmaReader fields to read from the new aFile[0] and kicks off
          371  +** another background thread to populate the new aFile[1]. And so on, until
          372  +** the contents of pMerger are exhausted.
          373  +**
          374  +** A single-threaded IncrMerger does not open any temporary files of its
          375  +** own. Instead, it has exclusive access to mxSz bytes of space beginning
          376  +** at offset iStartOff of file pTask->file2. And instead of using a 
          377  +** background thread to prepare data for the PmaReader, with a single
          378  +** threaded IncrMerger the allocate part of pTask->file2 is "refilled" with
          379  +** keys from pMerger by the calling thread whenever the PmaReader runs out
          380  +** of data.
          381  +*/
          382  +struct IncrMerger {
          383  +  SortSubtask *pTask;             /* Task that owns this merger */
          384  +  MergeEngine *pMerger;           /* Merge engine thread reads data from */
          385  +  i64 iStartOff;                  /* Offset to start writing file at */
          386  +  int mxSz;                       /* Maximum bytes of data to store */
          387  +  int bEof;                       /* Set to true when merge is finished */
          388  +  int bUseThread;                 /* True to use a bg thread for this object */
          389  +  SorterFile aFile[2];            /* aFile[0] for reading, [1] for writing */
   124    390   };
   125    391   
   126    392   /*
   127         -** An instance of this structure is used to organize the stream of records
   128         -** being written to files by the merge-sort code into aligned, page-sized
   129         -** blocks.  Doing all I/O in aligned page-sized blocks helps I/O to go
   130         -** faster on many operating systems.
          393  +** An instance of this object is used for writing a PMA.
          394  +**
          395  +** The PMA is written one record at a time.  Each record is of an arbitrary
          396  +** size.  But I/O is more efficient if it occurs in page-sized blocks where
          397  +** each block is aligned on a page boundary.  This object caches writes to
          398  +** the PMA so that aligned, page-size blocks are written.
   131    399   */
   132         -struct FileWriter {
          400  +struct PmaWriter {
   133    401     int eFWErr;                     /* Non-zero if in an error state */
   134    402     u8 *aBuffer;                    /* Pointer to write buffer */
   135    403     int nBuffer;                    /* Size of write buffer in bytes */
   136    404     int iBufStart;                  /* First byte of buffer to write */
   137    405     int iBufEnd;                    /* Last byte of buffer to write */
   138    406     i64 iWriteOff;                  /* Offset of start of buffer in file */
   139         -  sqlite3_file *pFile;            /* File to write to */
          407  +  sqlite3_file *pFd;              /* File handle to write to */
   140    408   };
   141    409   
   142    410   /*
   143         -** A structure to store a single record. All in-memory records are connected
   144         -** together into a linked list headed at VdbeSorter.pRecord using the 
   145         -** SorterRecord.pNext pointer.
          411  +** This object is the header on a single record while that record is being
          412  +** held in memory and prior to being written out as part of a PMA.
          413  +**
          414  +** How the linked list is connected depends on how memory is being managed
          415  +** by this module. If using a separate allocation for each in-memory record
          416  +** (VdbeSorter.list.aMemory==0), then the list is always connected using the
          417  +** SorterRecord.u.pNext pointers.
          418  +**
          419  +** Or, if using the single large allocation method (VdbeSorter.list.aMemory!=0),
          420  +** then while records are being accumulated the list is linked using the
          421  +** SorterRecord.u.iNext offset. This is because the aMemory[] array may
          422  +** be sqlite3Realloc()ed while records are being accumulated. Once the VM
          423  +** has finished passing records to the sorter, or when the in-memory buffer
          424  +** is full, the list is sorted. As part of the sorting process, it is
          425  +** converted to use the SorterRecord.u.pNext pointers. See function
          426  +** vdbeSorterSort() for details.
   146    427   */
   147    428   struct SorterRecord {
   148         -  void *pVal;
   149         -  int nVal;
   150         -  SorterRecord *pNext;
          429  +  int nVal;                       /* Size of the record in bytes */
          430  +  union {
          431  +    SorterRecord *pNext;          /* Pointer to next record in list */
          432  +    int iNext;                    /* Offset within aMemory of next record */
          433  +  } u;
          434  +  /* The data for the record immediately follows this header */
   151    435   };
   152    436   
   153         -/* Minimum allowable value for the VdbeSorter.nWorking variable */
          437  +/* Return a pointer to the buffer containing the record data for SorterRecord
          438  +** object p. Should be used as if:
          439  +**
          440  +**   void *SRVAL(SorterRecord *p) { return (void*)&p[1]; }
          441  +*/
          442  +#define SRVAL(p) ((void*)((SorterRecord*)(p) + 1))
          443  +
          444  +/* The minimum PMA size is set to this value multiplied by the database
          445  +** page size in bytes.  */
   154    446   #define SORTER_MIN_WORKING 10
   155    447   
   156         -/* Maximum number of segments to merge in a single pass. */
          448  +/* Maximum number of PMAs that a single MergeEngine can merge */
   157    449   #define SORTER_MAX_MERGE_COUNT 16
   158    450   
          451  +static int vdbeIncrSwap(IncrMerger*);
          452  +static void vdbeIncrFree(IncrMerger *);
          453  +
   159    454   /*
   160         -** Free all memory belonging to the VdbeSorterIter object passed as the second
          455  +** Free all memory belonging to the PmaReader object passed as the
   161    456   ** argument. All structure fields are set to zero before returning.
   162    457   */
   163         -static void vdbeSorterIterZero(sqlite3 *db, VdbeSorterIter *pIter){
   164         -  sqlite3DbFree(db, pIter->aAlloc);
   165         -  sqlite3DbFree(db, pIter->aBuffer);
   166         -  memset(pIter, 0, sizeof(VdbeSorterIter));
          458  +static void vdbePmaReaderClear(PmaReader *pReadr){
          459  +  sqlite3_free(pReadr->aAlloc);
          460  +  sqlite3_free(pReadr->aBuffer);
          461  +  if( pReadr->aMap ) sqlite3OsUnfetch(pReadr->pFd, 0, pReadr->aMap);
          462  +  vdbeIncrFree(pReadr->pIncr);
          463  +  memset(pReadr, 0, sizeof(PmaReader));
   167    464   }
   168    465   
   169    466   /*
   170         -** Read nByte bytes of data from the stream of data iterated by object p.
          467  +** Read the next nByte bytes of data from the PMA p.
   171    468   ** If successful, set *ppOut to point to a buffer containing the data
   172    469   ** and return SQLITE_OK. Otherwise, if an error occurs, return an SQLite
   173    470   ** error code.
   174    471   **
   175         -** The buffer indicated by *ppOut may only be considered valid until the
          472  +** The buffer returned in *ppOut is only valid until the
   176    473   ** next call to this function.
   177    474   */
   178         -static int vdbeSorterIterRead(
   179         -  sqlite3 *db,                    /* Database handle (for malloc) */
   180         -  VdbeSorterIter *p,              /* Iterator */
          475  +static int vdbePmaReadBlob(
          476  +  PmaReader *p,                   /* PmaReader from which to take the blob */
   181    477     int nByte,                      /* Bytes of data to read */
   182    478     u8 **ppOut                      /* OUT: Pointer to buffer containing data */
   183    479   ){
   184    480     int iBuf;                       /* Offset within buffer to read from */
   185    481     int nAvail;                     /* Bytes of data available in buffer */
          482  +
          483  +  if( p->aMap ){
          484  +    *ppOut = &p->aMap[p->iReadOff];
          485  +    p->iReadOff += nByte;
          486  +    return SQLITE_OK;
          487  +  }
          488  +
   186    489     assert( p->aBuffer );
   187    490   
   188    491     /* If there is no more data to be read from the buffer, read the next 
   189    492     ** p->nBuffer bytes of data from the file into it. Or, if there are less
   190    493     ** than p->nBuffer bytes remaining in the PMA, read all remaining data.  */
   191    494     iBuf = p->iReadOff % p->nBuffer;
   192    495     if( iBuf==0 ){
................................................................................
   197    500       if( (p->iEof - p->iReadOff) > (i64)p->nBuffer ){
   198    501         nRead = p->nBuffer;
   199    502       }else{
   200    503         nRead = (int)(p->iEof - p->iReadOff);
   201    504       }
   202    505       assert( nRead>0 );
   203    506   
   204         -    /* Read data from the file. Return early if an error occurs. */
   205         -    rc = sqlite3OsRead(p->pFile, p->aBuffer, nRead, p->iReadOff);
          507  +    /* Readr data from the file. Return early if an error occurs. */
          508  +    rc = sqlite3OsRead(p->pFd, p->aBuffer, nRead, p->iReadOff);
   206    509       assert( rc!=SQLITE_IOERR_SHORT_READ );
   207    510       if( rc!=SQLITE_OK ) return rc;
   208    511     }
   209    512     nAvail = p->nBuffer - iBuf; 
   210    513   
   211    514     if( nByte<=nAvail ){
   212    515       /* The requested data is available in the in-memory buffer. In this
................................................................................
   218    521       /* The requested data is not all available in the in-memory buffer.
   219    522       ** In this case, allocate space at p->aAlloc[] to copy the requested
   220    523       ** range into. Then return a copy of pointer p->aAlloc to the caller.  */
   221    524       int nRem;                     /* Bytes remaining to copy */
   222    525   
   223    526       /* Extend the p->aAlloc[] allocation if required. */
   224    527       if( p->nAlloc<nByte ){
   225         -      int nNew = p->nAlloc*2;
          528  +      u8 *aNew;
          529  +      int nNew = MAX(128, p->nAlloc*2);
   226    530         while( nByte>nNew ) nNew = nNew*2;
   227         -      p->aAlloc = sqlite3DbReallocOrFree(db, p->aAlloc, nNew);
   228         -      if( !p->aAlloc ) return SQLITE_NOMEM;
          531  +      aNew = sqlite3Realloc(p->aAlloc, nNew);
          532  +      if( !aNew ) return SQLITE_NOMEM;
   229    533         p->nAlloc = nNew;
          534  +      p->aAlloc = aNew;
   230    535       }
   231    536   
   232    537       /* Copy as much data as is available in the buffer into the start of
   233    538       ** p->aAlloc[].  */
   234    539       memcpy(p->aAlloc, &p->aBuffer[iBuf], nAvail);
   235    540       p->iReadOff += nAvail;
   236    541       nRem = nByte - nAvail;
   237    542   
   238    543       /* The following loop copies up to p->nBuffer bytes per iteration into
   239    544       ** the p->aAlloc[] buffer.  */
   240    545       while( nRem>0 ){
   241         -      int rc;                     /* vdbeSorterIterRead() return code */
          546  +      int rc;                     /* vdbePmaReadBlob() return code */
   242    547         int nCopy;                  /* Number of bytes to copy */
   243    548         u8 *aNext;                  /* Pointer to buffer to copy data from */
   244    549   
   245    550         nCopy = nRem;
   246    551         if( nRem>p->nBuffer ) nCopy = p->nBuffer;
   247         -      rc = vdbeSorterIterRead(db, p, nCopy, &aNext);
          552  +      rc = vdbePmaReadBlob(p, nCopy, &aNext);
   248    553         if( rc!=SQLITE_OK ) return rc;
   249    554         assert( aNext!=p->aAlloc );
   250    555         memcpy(&p->aAlloc[nByte - nRem], aNext, nCopy);
   251    556         nRem -= nCopy;
   252    557       }
   253    558   
   254    559       *ppOut = p->aAlloc;
................................................................................
   257    562     return SQLITE_OK;
   258    563   }
   259    564   
   260    565   /*
   261    566   ** Read a varint from the stream of data accessed by p. Set *pnOut to
   262    567   ** the value read.
   263    568   */
   264         -static int vdbeSorterIterVarint(sqlite3 *db, VdbeSorterIter *p, u64 *pnOut){
          569  +static int vdbePmaReadVarint(PmaReader *p, u64 *pnOut){
   265    570     int iBuf;
   266    571   
   267         -  iBuf = p->iReadOff % p->nBuffer;
   268         -  if( iBuf && (p->nBuffer-iBuf)>=9 ){
   269         -    p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut);
          572  +  if( p->aMap ){
          573  +    p->iReadOff += sqlite3GetVarint(&p->aMap[p->iReadOff], pnOut);
   270    574     }else{
   271         -    u8 aVarint[16], *a;
   272         -    int i = 0, rc;
   273         -    do{
   274         -      rc = vdbeSorterIterRead(db, p, 1, &a);
   275         -      if( rc ) return rc;
   276         -      aVarint[(i++)&0xf] = a[0];
   277         -    }while( (a[0]&0x80)!=0 );
   278         -    sqlite3GetVarint(aVarint, pnOut);
          575  +    iBuf = p->iReadOff % p->nBuffer;
          576  +    if( iBuf && (p->nBuffer-iBuf)>=9 ){
          577  +      p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut);
          578  +    }else{
          579  +      u8 aVarint[16], *a;
          580  +      int i = 0, rc;
          581  +      do{
          582  +        rc = vdbePmaReadBlob(p, 1, &a);
          583  +        if( rc ) return rc;
          584  +        aVarint[(i++)&0xf] = a[0];
          585  +      }while( (a[0]&0x80)!=0 );
          586  +      sqlite3GetVarint(aVarint, pnOut);
          587  +    }
   279    588     }
   280    589   
   281    590     return SQLITE_OK;
   282    591   }
   283    592   
   284         -
   285         -/*
   286         -** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
   287         -** no error occurs, or an SQLite error code if one does.
   288         -*/
   289         -static int vdbeSorterIterNext(
   290         -  sqlite3 *db,                    /* Database handle (for sqlite3DbMalloc() ) */
   291         -  VdbeSorterIter *pIter           /* Iterator to advance */
   292         -){
   293         -  int rc;                         /* Return Code */
   294         -  u64 nRec = 0;                   /* Size of record in bytes */
   295         -
   296         -  if( pIter->iReadOff>=pIter->iEof ){
   297         -    /* This is an EOF condition */
   298         -    vdbeSorterIterZero(db, pIter);
   299         -    return SQLITE_OK;
   300         -  }
   301         -
   302         -  rc = vdbeSorterIterVarint(db, pIter, &nRec);
   303         -  if( rc==SQLITE_OK ){
   304         -    pIter->nKey = (int)nRec;
   305         -    rc = vdbeSorterIterRead(db, pIter, (int)nRec, &pIter->aKey);
   306         -  }
   307         -
   308         -  return rc;
   309         -}
   310         -
   311         -/*
   312         -** Initialize iterator pIter to scan through the PMA stored in file pFile
   313         -** starting at offset iStart and ending at offset iEof-1. This function 
   314         -** leaves the iterator pointing to the first key in the PMA (or EOF if the 
   315         -** PMA is empty).
   316         -*/
   317         -static int vdbeSorterIterInit(
   318         -  sqlite3 *db,                    /* Database handle */
   319         -  const VdbeSorter *pSorter,      /* Sorter object */
   320         -  i64 iStart,                     /* Start offset in pFile */
   321         -  VdbeSorterIter *pIter,          /* Iterator to populate */
   322         -  i64 *pnByte                     /* IN/OUT: Increment this value by PMA size */
          593  +/*
          594  +** Attempt to memory map file pFile. If successful, set *pp to point to the
          595  +** new mapping and return SQLITE_OK. If the mapping is not attempted 
          596  +** (because the file is too large or the VFS layer is configured not to use
          597  +** mmap), return SQLITE_OK and set *pp to NULL.
          598  +**
          599  +** Or, if an error occurs, return an SQLite error code. The final value of
          600  +** *pp is undefined in this case.
          601  +*/
          602  +static int vdbeSorterMapFile(SortSubtask *pTask, SorterFile *pFile, u8 **pp){
          603  +  int rc = SQLITE_OK;
          604  +  if( pFile->iEof<=(i64)(pTask->pSorter->db->nMaxSorterMmap) ){
          605  +    rc = sqlite3OsFetch(pFile->pFd, 0, (int)pFile->iEof, (void**)pp);
          606  +    testcase( rc!=SQLITE_OK );
          607  +  }
          608  +  return rc;
          609  +}
          610  +
          611  +/*
          612  +** Attach PmaReader pReadr to file pFile (if it is not already attached to
          613  +** that file) and seek it to offset iOff within the file.  Return SQLITE_OK 
          614  +** if successful, or an SQLite error code if an error occurs.
          615  +*/
          616  +static int vdbePmaReaderSeek(
          617  +  SortSubtask *pTask,             /* Task context */
          618  +  PmaReader *pReadr,              /* Reader whose cursor is to be moved */
          619  +  SorterFile *pFile,              /* Sorter file to read from */
          620  +  i64 iOff                        /* Offset in pFile */
   323    621   ){
   324    622     int rc = SQLITE_OK;
   325         -  int nBuf;
   326         -
   327         -  nBuf = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
   328         -
   329         -  assert( pSorter->iWriteOff>iStart );
   330         -  assert( pIter->aAlloc==0 );
   331         -  assert( pIter->aBuffer==0 );
   332         -  pIter->pFile = pSorter->pTemp1;
   333         -  pIter->iReadOff = iStart;
   334         -  pIter->nAlloc = 128;
   335         -  pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
   336         -  pIter->nBuffer = nBuf;
   337         -  pIter->aBuffer = (u8 *)sqlite3DbMallocRaw(db, nBuf);
   338         -
   339         -  if( !pIter->aBuffer ){
   340         -    rc = SQLITE_NOMEM;
   341         -  }else{
   342         -    int iBuf;
   343         -
   344         -    iBuf = iStart % nBuf;
   345         -    if( iBuf ){
   346         -      int nRead = nBuf - iBuf;
   347         -      if( (iStart + nRead) > pSorter->iWriteOff ){
   348         -        nRead = (int)(pSorter->iWriteOff - iStart);
          623  +
          624  +  assert( pReadr->pIncr==0 || pReadr->pIncr->bEof==0 );
          625  +
          626  +  if( sqlite3FaultSim(201) ) return SQLITE_IOERR_READ;
          627  +  if( pReadr->aMap ){
          628  +    sqlite3OsUnfetch(pReadr->pFd, 0, pReadr->aMap);
          629  +    pReadr->aMap = 0;
          630  +  }
          631  +  pReadr->iReadOff = iOff;
          632  +  pReadr->iEof = pFile->iEof;
          633  +  pReadr->pFd = pFile->pFd;
          634  +
          635  +  rc = vdbeSorterMapFile(pTask, pFile, &pReadr->aMap);
          636  +  if( rc==SQLITE_OK && pReadr->aMap==0 ){
          637  +    int pgsz = pTask->pSorter->pgsz;
          638  +    int iBuf = pReadr->iReadOff % pgsz;
          639  +    if( pReadr->aBuffer==0 ){
          640  +      pReadr->aBuffer = (u8*)sqlite3Malloc(pgsz);
          641  +      if( pReadr->aBuffer==0 ) rc = SQLITE_NOMEM;
          642  +      pReadr->nBuffer = pgsz;
          643  +    }
          644  +    if( rc==SQLITE_OK && iBuf ){
          645  +      int nRead = pgsz - iBuf;
          646  +      if( (pReadr->iReadOff + nRead) > pReadr->iEof ){
          647  +        nRead = (int)(pReadr->iEof - pReadr->iReadOff);
   349    648         }
   350    649         rc = sqlite3OsRead(
   351         -          pSorter->pTemp1, &pIter->aBuffer[iBuf], nRead, iStart
          650  +          pReadr->pFd, &pReadr->aBuffer[iBuf], nRead, pReadr->iReadOff
   352    651         );
   353         -    }
   354         -
   355         -    if( rc==SQLITE_OK ){
   356         -      u64 nByte;                       /* Size of PMA in bytes */
   357         -      pIter->iEof = pSorter->iWriteOff;
   358         -      rc = vdbeSorterIterVarint(db, pIter, &nByte);
   359         -      pIter->iEof = pIter->iReadOff + nByte;
   360         -      *pnByte += nByte;
   361         -    }
          652  +      testcase( rc!=SQLITE_OK );
          653  +    }
          654  +  }
          655  +
          656  +  return rc;
          657  +}
          658  +
          659  +/*
          660  +** Advance PmaReader pReadr to the next key in its PMA. Return SQLITE_OK if
          661  +** no error occurs, or an SQLite error code if one does.
          662  +*/
          663  +static int vdbePmaReaderNext(PmaReader *pReadr){
          664  +  int rc = SQLITE_OK;             /* Return Code */
          665  +  u64 nRec = 0;                   /* Size of record in bytes */
          666  +
          667  +
          668  +  if( pReadr->iReadOff>=pReadr->iEof ){
          669  +    IncrMerger *pIncr = pReadr->pIncr;
          670  +    int bEof = 1;
          671  +    if( pIncr ){
          672  +      rc = vdbeIncrSwap(pIncr);
          673  +      if( rc==SQLITE_OK && pIncr->bEof==0 ){
          674  +        rc = vdbePmaReaderSeek(
          675  +            pIncr->pTask, pReadr, &pIncr->aFile[0], pIncr->iStartOff
          676  +        );
          677  +        bEof = 0;
          678  +      }
          679  +    }
          680  +
          681  +    if( bEof ){
          682  +      /* This is an EOF condition */
          683  +      vdbePmaReaderClear(pReadr);
          684  +      testcase( rc!=SQLITE_OK );
          685  +      return rc;
          686  +    }
          687  +  }
          688  +
          689  +  if( rc==SQLITE_OK ){
          690  +    rc = vdbePmaReadVarint(pReadr, &nRec);
          691  +  }
          692  +  if( rc==SQLITE_OK ){
          693  +    pReadr->nKey = (int)nRec;
          694  +    rc = vdbePmaReadBlob(pReadr, (int)nRec, &pReadr->aKey);
          695  +    testcase( rc!=SQLITE_OK );
          696  +  }
          697  +
          698  +  return rc;
          699  +}
          700  +
          701  +/*
          702  +** Initialize PmaReader pReadr to scan through the PMA stored in file pFile
          703  +** starting at offset iStart and ending at offset iEof-1. This function 
          704  +** leaves the PmaReader pointing to the first key in the PMA (or EOF if the 
          705  +** PMA is empty).
          706  +**
          707  +** If the pnByte parameter is NULL, then it is assumed that the file 
          708  +** contains a single PMA, and that that PMA omits the initial length varint.
          709  +*/
          710  +static int vdbePmaReaderInit(
          711  +  SortSubtask *pTask,             /* Task context */
          712  +  SorterFile *pFile,              /* Sorter file to read from */
          713  +  i64 iStart,                     /* Start offset in pFile */
          714  +  PmaReader *pReadr,              /* PmaReader to populate */
          715  +  i64 *pnByte                     /* IN/OUT: Increment this value by PMA size */
          716  +){
          717  +  int rc;
          718  +
          719  +  assert( pFile->iEof>iStart );
          720  +  assert( pReadr->aAlloc==0 && pReadr->nAlloc==0 );
          721  +  assert( pReadr->aBuffer==0 );
          722  +  assert( pReadr->aMap==0 );
          723  +
          724  +  rc = vdbePmaReaderSeek(pTask, pReadr, pFile, iStart);
          725  +  if( rc==SQLITE_OK ){
          726  +    u64 nByte;                    /* Size of PMA in bytes */
          727  +    rc = vdbePmaReadVarint(pReadr, &nByte);
          728  +    pReadr->iEof = pReadr->iReadOff + nByte;
          729  +    *pnByte += nByte;
   362    730     }
   363    731   
   364    732     if( rc==SQLITE_OK ){
   365         -    rc = vdbeSorterIterNext(db, pIter);
          733  +    rc = vdbePmaReaderNext(pReadr);
   366    734     }
   367    735     return rc;
   368    736   }
   369    737   
   370    738   
   371    739   /*
   372    740   ** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, 
   373         -** size nKey2 bytes).  Argument pKeyInfo supplies the collation functions
   374         -** used by the comparison. If an error occurs, return an SQLite error code.
   375         -** Otherwise, return SQLITE_OK and set *pRes to a negative, zero or positive
   376         -** value, depending on whether key1 is smaller, equal to or larger than key2.
          741  +** size nKey2 bytes). Use (pTask->pKeyInfo) for the collation sequences
          742  +** used by the comparison. Return the result of the comparison.
   377    743   **
   378         -** If the bOmitRowid argument is non-zero, assume both keys end in a rowid
   379         -** field. For the purposes of the comparison, ignore it. Also, if bOmitRowid
   380         -** is true and key1 contains even a single NULL value, it is considered to
   381         -** be less than key2. Even if key2 also contains NULL values.
          744  +** Before returning, object (pTask->pUnpacked) is populated with the
          745  +** unpacked version of key2. Or, if pKey2 is passed a NULL pointer, then it 
          746  +** is assumed that the (pTask->pUnpacked) structure already contains the 
          747  +** unpacked key to use as key2.
   382    748   **
   383         -** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
   384         -** has been allocated and contains an unpacked record that is used as key2.
          749  +** If an OOM error is encountered, (pTask->pUnpacked->error_rc) is set
          750  +** to SQLITE_NOMEM.
   385    751   */
   386         -static void vdbeSorterCompare(
   387         -  const VdbeCursor *pCsr,         /* Cursor object (for pKeyInfo) */
   388         -  int nKeyCol,                    /* Num of columns. 0 means "all" */
          752  +static int vdbeSorterCompare(
          753  +  SortSubtask *pTask,             /* Subtask context (for pKeyInfo) */
   389    754     const void *pKey1, int nKey1,   /* Left side of comparison */
   390         -  const void *pKey2, int nKey2,   /* Right side of comparison */
   391         -  int *pRes                       /* OUT: Result of comparison */
          755  +  const void *pKey2, int nKey2    /* Right side of comparison */
   392    756   ){
   393         -  KeyInfo *pKeyInfo = pCsr->pKeyInfo;
   394         -  VdbeSorter *pSorter = pCsr->pSorter;
   395         -  UnpackedRecord *r2 = pSorter->pUnpacked;
   396         -  int i;
   397         -
          757  +  UnpackedRecord *r2 = pTask->pUnpacked;
   398    758     if( pKey2 ){
   399         -    sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, r2);
          759  +    sqlite3VdbeRecordUnpack(pTask->pSorter->pKeyInfo, nKey2, pKey2, r2);
   400    760     }
   401         -
   402         -  if( nKeyCol ){
   403         -    r2->nField = nKeyCol;
   404         -    for(i=0; i<nKeyCol; i++){
   405         -      if( r2->aMem[i].flags & MEM_Null ){
   406         -        *pRes = -1;
   407         -        return;
   408         -      }
   409         -    }
   410         -    assert( r2->default_rc==0 );
   411         -  }
   412         -
   413         -  *pRes = sqlite3VdbeRecordCompare(nKey1, pKey1, r2, 0);
   414         -}
   415         -
   416         -/*
   417         -** This function is called to compare two iterator keys when merging 
   418         -** multiple b-tree segments. Parameter iOut is the index of the aTree[] 
   419         -** value to recalculate.
   420         -*/
   421         -static int vdbeSorterDoCompare(const VdbeCursor *pCsr, int iOut){
   422         -  VdbeSorter *pSorter = pCsr->pSorter;
   423         -  int i1;
   424         -  int i2;
   425         -  int iRes;
   426         -  VdbeSorterIter *p1;
   427         -  VdbeSorterIter *p2;
   428         -
   429         -  assert( iOut<pSorter->nTree && iOut>0 );
   430         -
   431         -  if( iOut>=(pSorter->nTree/2) ){
   432         -    i1 = (iOut - pSorter->nTree/2) * 2;
   433         -    i2 = i1 + 1;
   434         -  }else{
   435         -    i1 = pSorter->aTree[iOut*2];
   436         -    i2 = pSorter->aTree[iOut*2+1];
   437         -  }
   438         -
   439         -  p1 = &pSorter->aIter[i1];
   440         -  p2 = &pSorter->aIter[i2];
   441         -
   442         -  if( p1->pFile==0 ){
   443         -    iRes = i2;
   444         -  }else if( p2->pFile==0 ){
   445         -    iRes = i1;
   446         -  }else{
   447         -    int res;
   448         -    assert( pCsr->pSorter->pUnpacked!=0 );  /* allocated in vdbeSorterMerge() */
   449         -    vdbeSorterCompare(
   450         -        pCsr, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res
   451         -    );
   452         -    if( res<=0 ){
   453         -      iRes = i1;
   454         -    }else{
   455         -      iRes = i2;
   456         -    }
   457         -  }
   458         -
   459         -  pSorter->aTree[iOut] = iRes;
   460         -  return SQLITE_OK;
          761  +  return sqlite3VdbeRecordCompare(nKey1, pKey1, r2, 0);
   461    762   }
   462    763   
   463    764   /*
   464    765   ** Initialize the temporary index cursor just opened as a sorter cursor.
          766  +**
          767  +** Usually, the sorter module uses the value of (pCsr->pKeyInfo->nField)
          768  +** to determine the number of fields that should be compared from the
          769  +** records being sorted. However, if the value passed as argument nField
          770  +** is non-zero and the sorter is able to guarantee a stable sort, nField
          771  +** is used instead. This is used when sorting records for a CREATE INDEX
          772  +** statement. In this case, keys are always delivered to the sorter in
          773  +** order of the primary key, which happens to be make up the final part 
          774  +** of the records being sorted. So if the sort is stable, there is never
          775  +** any reason to compare PK fields and they can be ignored for a small
          776  +** performance boost.
          777  +**
          778  +** The sorter can guarantee a stable sort when running in single-threaded
          779  +** mode, but not in multi-threaded mode.
          780  +**
          781  +** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
   465    782   */
   466         -int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
          783  +int sqlite3VdbeSorterInit(
          784  +  sqlite3 *db,                    /* Database connection (for malloc()) */
          785  +  int nField,                     /* Number of key fields in each record */
          786  +  VdbeCursor *pCsr                /* Cursor that holds the new sorter */
          787  +){
   467    788     int pgsz;                       /* Page size of main database */
          789  +  int i;                          /* Used to iterate through aTask[] */
   468    790     int mxCache;                    /* Cache size */
   469    791     VdbeSorter *pSorter;            /* The new sorter */
   470         -  char *d;                        /* Dummy */
          792  +  KeyInfo *pKeyInfo;              /* Copy of pCsr->pKeyInfo with db==0 */
          793  +  int szKeyInfo;                  /* Size of pCsr->pKeyInfo in bytes */
          794  +  int sz;                         /* Size of pSorter in bytes */
          795  +  int rc = SQLITE_OK;
          796  +#if SQLITE_MAX_WORKER_THREADS==0
          797  +# define nWorker 0
          798  +#else
          799  +  int nWorker;
          800  +#endif
          801  +
          802  +  /* Initialize the upper limit on the number of worker threads */
          803  +#if SQLITE_MAX_WORKER_THREADS>0
          804  +  if( sqlite3TempInMemory(db) || sqlite3GlobalConfig.bCoreMutex==0 ){
          805  +    nWorker = 0;
          806  +  }else{
          807  +    nWorker = db->aLimit[SQLITE_LIMIT_WORKER_THREADS];
          808  +  }
          809  +#endif
          810  +
          811  +  /* Do not allow the total number of threads (main thread + all workers)
          812  +  ** to exceed the maximum merge count */
          813  +#if SQLITE_MAX_WORKER_THREADS>=SORTER_MAX_MERGE_COUNT
          814  +  if( nWorker>=SORTER_MAX_MERGE_COUNT ){
          815  +    nWorker = SORTER_MAX_MERGE_COUNT-1;
          816  +  }
          817  +#endif
   471    818   
   472    819     assert( pCsr->pKeyInfo && pCsr->pBt==0 );
   473         -  pCsr->pSorter = pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
          820  +  szKeyInfo = sizeof(KeyInfo) + (pCsr->pKeyInfo->nField-1)*sizeof(CollSeq*);
          821  +  sz = sizeof(VdbeSorter) + nWorker * sizeof(SortSubtask);
          822  +
          823  +  pSorter = (VdbeSorter*)sqlite3DbMallocZero(db, sz + szKeyInfo);
          824  +  pCsr->pSorter = pSorter;
   474    825     if( pSorter==0 ){
   475         -    return SQLITE_NOMEM;
   476         -  }
   477         -  
   478         -  pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo, 0, 0, &d);
   479         -  if( pSorter->pUnpacked==0 ) return SQLITE_NOMEM;
   480         -  assert( pSorter->pUnpacked==(UnpackedRecord *)d );
   481         -
   482         -  if( !sqlite3TempInMemory(db) ){
   483         -    pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
   484         -    pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
   485         -    mxCache = db->aDb[0].pSchema->cache_size;
   486         -    if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
   487         -    pSorter->mxPmaSize = mxCache * pgsz;
   488         -  }
   489         -
   490         -  return SQLITE_OK;
   491         -}
          826  +    rc = SQLITE_NOMEM;
          827  +  }else{
          828  +    pSorter->pKeyInfo = pKeyInfo = (KeyInfo*)((u8*)pSorter + sz);
          829  +    memcpy(pKeyInfo, pCsr->pKeyInfo, szKeyInfo);
          830  +    pKeyInfo->db = 0;
          831  +    if( nField && nWorker==0 ) pKeyInfo->nField = nField;
          832  +    pSorter->pgsz = pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
          833  +    pSorter->nTask = nWorker + 1;
          834  +    pSorter->bUseThreads = (pSorter->nTask>1);
          835  +    pSorter->db = db;
          836  +    for(i=0; i<pSorter->nTask; i++){
          837  +      SortSubtask *pTask = &pSorter->aTask[i];
          838  +      pTask->pSorter = pSorter;
          839  +    }
          840  +
          841  +    if( !sqlite3TempInMemory(db) ){
          842  +      pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
          843  +      mxCache = db->aDb[0].pSchema->cache_size;
          844  +      if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
          845  +      pSorter->mxPmaSize = mxCache * pgsz;
          846  +
          847  +      /* If the application has not configure scratch memory using
          848  +      ** SQLITE_CONFIG_SCRATCH then we assume it is OK to do large memory
          849  +      ** allocations.  If scratch memory has been configured, then assume
          850  +      ** large memory allocations should be avoided to prevent heap
          851  +      ** fragmentation.
          852  +      */
          853  +      if( sqlite3GlobalConfig.pScratch==0 ){
          854  +        assert( pSorter->iMemory==0 );
          855  +        pSorter->nMemory = pgsz;
          856  +        pSorter->list.aMemory = (u8*)sqlite3Malloc(pgsz);
          857  +        if( !pSorter->list.aMemory ) rc = SQLITE_NOMEM;
          858  +      }
          859  +    }
          860  +  }
          861  +
          862  +  return rc;
          863  +}
          864  +#undef nWorker   /* Defined at the top of this function */
   492    865   
   493    866   /*
   494    867   ** Free the list of sorted records starting at pRecord.
   495    868   */
   496    869   static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){
   497    870     SorterRecord *p;
   498    871     SorterRecord *pNext;
   499    872     for(p=pRecord; p; p=pNext){
   500         -    pNext = p->pNext;
          873  +    pNext = p->u.pNext;
   501    874       sqlite3DbFree(db, p);
   502    875     }
   503    876   }
          877  +
          878  +/*
          879  +** Free all resources owned by the object indicated by argument pTask. All 
          880  +** fields of *pTask are zeroed before returning.
          881  +*/
          882  +static void vdbeSortSubtaskCleanup(sqlite3 *db, SortSubtask *pTask){
          883  +  sqlite3DbFree(db, pTask->pUnpacked);
          884  +  pTask->pUnpacked = 0;
          885  +#if SQLITE_MAX_WORKER_THREADS>0
          886  +  /* pTask->list.aMemory can only be non-zero if it was handed memory
          887  +  ** from the main thread.  That only occurs SQLITE_MAX_WORKER_THREADS>0 */
          888  +  if( pTask->list.aMemory ){
          889  +    sqlite3_free(pTask->list.aMemory);
          890  +    pTask->list.aMemory = 0;
          891  +  }else
          892  +#endif
          893  +  {
          894  +    assert( pTask->list.aMemory==0 );
          895  +    vdbeSorterRecordFree(0, pTask->list.pList);
          896  +  }
          897  +  pTask->list.pList = 0;
          898  +  if( pTask->file.pFd ){
          899  +    sqlite3OsCloseFree(pTask->file.pFd);
          900  +    pTask->file.pFd = 0;
          901  +    pTask->file.iEof = 0;
          902  +  }
          903  +  if( pTask->file2.pFd ){
          904  +    sqlite3OsCloseFree(pTask->file2.pFd);
          905  +    pTask->file2.pFd = 0;
          906  +    pTask->file2.iEof = 0;
          907  +  }
          908  +}
          909  +
          910  +#ifdef SQLITE_DEBUG_SORTER_THREADS
          911  +static void vdbeSorterWorkDebug(SortSubtask *pTask, const char *zEvent){
          912  +  i64 t;
          913  +  int iTask = (pTask - pTask->pSorter->aTask);
          914  +  sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
          915  +  fprintf(stderr, "%lld:%d %s\n", t, iTask, zEvent);
          916  +}
          917  +static void vdbeSorterRewindDebug(const char *zEvent){
          918  +  i64 t;
          919  +  sqlite3OsCurrentTimeInt64(sqlite3_vfs_find(0), &t);
          920  +  fprintf(stderr, "%lld:X %s\n", t, zEvent);
          921  +}
          922  +static void vdbeSorterPopulateDebug(
          923  +  SortSubtask *pTask,
          924  +  const char *zEvent
          925  +){
          926  +  i64 t;
          927  +  int iTask = (pTask - pTask->pSorter->aTask);
          928  +  sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
          929  +  fprintf(stderr, "%lld:bg%d %s\n", t, iTask, zEvent);
          930  +}
          931  +static void vdbeSorterBlockDebug(
          932  +  SortSubtask *pTask,
          933  +  int bBlocked,
          934  +  const char *zEvent
          935  +){
          936  +  if( bBlocked ){
          937  +    i64 t;
          938  +    sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
          939  +    fprintf(stderr, "%lld:main %s\n", t, zEvent);
          940  +  }
          941  +}
          942  +#else
          943  +# define vdbeSorterWorkDebug(x,y)
          944  +# define vdbeSorterRewindDebug(y)
          945  +# define vdbeSorterPopulateDebug(x,y)
          946  +# define vdbeSorterBlockDebug(x,y,z)
          947  +#endif
          948  +
          949  +#if SQLITE_MAX_WORKER_THREADS>0
          950  +/*
          951  +** Join thread pTask->thread.
          952  +*/
          953  +static int vdbeSorterJoinThread(SortSubtask *pTask){
          954  +  int rc = SQLITE_OK;
          955  +  if( pTask->pThread ){
          956  +#ifdef SQLITE_DEBUG_SORTER_THREADS
          957  +    int bDone = pTask->bDone;
          958  +#endif
          959  +    void *pRet = SQLITE_INT_TO_PTR(SQLITE_ERROR);
          960  +    vdbeSorterBlockDebug(pTask, !bDone, "enter");
          961  +    (void)sqlite3ThreadJoin(pTask->pThread, &pRet);
          962  +    vdbeSorterBlockDebug(pTask, !bDone, "exit");
          963  +    rc = SQLITE_PTR_TO_INT(pRet);
          964  +    assert( pTask->bDone==1 );
          965  +    pTask->bDone = 0;
          966  +    pTask->pThread = 0;
          967  +  }
          968  +  return rc;
          969  +}
          970  +
          971  +/*
          972  +** Launch a background thread to run xTask(pIn).
          973  +*/
          974  +static int vdbeSorterCreateThread(
          975  +  SortSubtask *pTask,             /* Thread will use this task object */
          976  +  void *(*xTask)(void*),          /* Routine to run in a separate thread */
          977  +  void *pIn                       /* Argument passed into xTask() */
          978  +){
          979  +  assert( pTask->pThread==0 && pTask->bDone==0 );
          980  +  return sqlite3ThreadCreate(&pTask->pThread, xTask, pIn);
          981  +}
          982  +
          983  +/*
          984  +** Join all outstanding threads launched by SorterWrite() to create 
          985  +** level-0 PMAs.
          986  +*/
          987  +static int vdbeSorterJoinAll(VdbeSorter *pSorter, int rcin){
          988  +  int rc = rcin;
          989  +  int i;
          990  +
          991  +  /* This function is always called by the main user thread.
          992  +  **
          993  +  ** If this function is being called after SorterRewind() has been called, 
          994  +  ** it is possible that thread pSorter->aTask[pSorter->nTask-1].pThread
          995  +  ** is currently attempt to join one of the other threads. To avoid a race
          996  +  ** condition where this thread also attempts to join the same object, join 
          997  +  ** thread pSorter->aTask[pSorter->nTask-1].pThread first. */
          998  +  for(i=pSorter->nTask-1; i>=0; i--){
          999  +    SortSubtask *pTask = &pSorter->aTask[i];
         1000  +    int rc2 = vdbeSorterJoinThread(pTask);
         1001  +    if( rc==SQLITE_OK ) rc = rc2;
         1002  +  }
         1003  +  return rc;
         1004  +}
         1005  +#else
         1006  +# define vdbeSorterJoinAll(x,rcin) (rcin)
         1007  +# define vdbeSorterJoinThread(pTask) SQLITE_OK
         1008  +#endif
         1009  +
         1010  +/*
         1011  +** Allocate a new MergeEngine object capable of handling up to
         1012  +** nReader PmaReader inputs.
         1013  +**
         1014  +** nReader is automatically rounded up to the next power of two.
         1015  +** nReader may not exceed SORTER_MAX_MERGE_COUNT even after rounding up.
         1016  +*/
         1017  +static MergeEngine *vdbeMergeEngineNew(int nReader){
         1018  +  int N = 2;                      /* Smallest power of two >= nReader */
         1019  +  int nByte;                      /* Total bytes of space to allocate */
         1020  +  MergeEngine *pNew;              /* Pointer to allocated object to return */
         1021  +
         1022  +  assert( nReader<=SORTER_MAX_MERGE_COUNT );
         1023  +
         1024  +  while( N<nReader ) N += N;
         1025  +  nByte = sizeof(MergeEngine) + N * (sizeof(int) + sizeof(PmaReader));
         1026  +
         1027  +  pNew = sqlite3FaultSim(100) ? 0 : (MergeEngine*)sqlite3MallocZero(nByte);
         1028  +  if( pNew ){
         1029  +    pNew->nTree = N;
         1030  +    pNew->pTask = 0;
         1031  +    pNew->aReadr = (PmaReader*)&pNew[1];
         1032  +    pNew->aTree = (int*)&pNew->aReadr[N];
         1033  +  }
         1034  +  return pNew;
         1035  +}
         1036  +
         1037  +/*
         1038  +** Free the MergeEngine object passed as the only argument.
         1039  +*/
         1040  +static void vdbeMergeEngineFree(MergeEngine *pMerger){
         1041  +  int i;
         1042  +  if( pMerger ){
         1043  +    for(i=0; i<pMerger->nTree; i++){
         1044  +      vdbePmaReaderClear(&pMerger->aReadr[i]);
         1045  +    }
         1046  +  }
         1047  +  sqlite3_free(pMerger);
         1048  +}
         1049  +
         1050  +/*
         1051  +** Free all resources associated with the IncrMerger object indicated by
         1052  +** the first argument.
         1053  +*/
         1054  +static void vdbeIncrFree(IncrMerger *pIncr){
         1055  +  if( pIncr ){
         1056  +#if SQLITE_MAX_WORKER_THREADS>0
         1057  +    if( pIncr->bUseThread ){
         1058  +      vdbeSorterJoinThread(pIncr->pTask);
         1059  +      if( pIncr->aFile[0].pFd ) sqlite3OsCloseFree(pIncr->aFile[0].pFd);
         1060  +      if( pIncr->aFile[1].pFd ) sqlite3OsCloseFree(pIncr->aFile[1].pFd);
         1061  +    }
         1062  +#endif
         1063  +    vdbeMergeEngineFree(pIncr->pMerger);
         1064  +    sqlite3_free(pIncr);
         1065  +  }
         1066  +}
   504   1067   
   505   1068   /*
   506   1069   ** Reset a sorting cursor back to its original empty state.
   507   1070   */
   508   1071   void sqlite3VdbeSorterReset(sqlite3 *db, VdbeSorter *pSorter){
   509         -  if( pSorter->aIter ){
   510         -    int i;
   511         -    for(i=0; i<pSorter->nTree; i++){
   512         -      vdbeSorterIterZero(db, &pSorter->aIter[i]);
   513         -    }
   514         -    sqlite3DbFree(db, pSorter->aIter);
   515         -    pSorter->aIter = 0;
   516         -  }
   517         -  if( pSorter->pTemp1 ){
   518         -    sqlite3OsCloseFree(pSorter->pTemp1);
   519         -    pSorter->pTemp1 = 0;
   520         -  }
   521         -  vdbeSorterRecordFree(db, pSorter->pRecord);
   522         -  pSorter->pRecord = 0;
   523         -  pSorter->iWriteOff = 0;
   524         -  pSorter->iReadOff = 0;
   525         -  pSorter->nInMemory = 0;
   526         -  pSorter->nTree = 0;
   527         -  pSorter->nPMA = 0;
   528         -  pSorter->aTree = 0;
   529         -}
   530         -
         1072  +  int i;
         1073  +  (void)vdbeSorterJoinAll(pSorter, SQLITE_OK);
         1074  +  assert( pSorter->bUseThreads || pSorter->pReader==0 );
         1075  +#if SQLITE_MAX_WORKER_THREADS>0
         1076  +  if( pSorter->pReader ){
         1077  +    vdbePmaReaderClear(pSorter->pReader);
         1078  +    sqlite3DbFree(db, pSorter->pReader);
         1079  +    pSorter->pReader = 0;
         1080  +  }
         1081  +#endif
         1082  +  vdbeMergeEngineFree(pSorter->pMerger);
         1083  +  pSorter->pMerger = 0;
         1084  +  for(i=0; i<pSorter->nTask; i++){
         1085  +    SortSubtask *pTask = &pSorter->aTask[i];
         1086  +    vdbeSortSubtaskCleanup(db, pTask);
         1087  +  }
         1088  +  if( pSorter->list.aMemory==0 ){
         1089  +    vdbeSorterRecordFree(0, pSorter->list.pList);
         1090  +  }
         1091  +  pSorter->list.pList = 0;
         1092  +  pSorter->list.szPMA = 0;
         1093  +  pSorter->bUsePMA = 0;
         1094  +  pSorter->iMemory = 0;
         1095  +  pSorter->mxKeysize = 0;
         1096  +  sqlite3DbFree(db, pSorter->pUnpacked);
         1097  +  pSorter->pUnpacked = 0;
         1098  +}
   531   1099   
   532   1100   /*
   533   1101   ** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
   534   1102   */
   535   1103   void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
   536   1104     VdbeSorter *pSorter = pCsr->pSorter;
   537   1105     if( pSorter ){
   538   1106       sqlite3VdbeSorterReset(db, pSorter);
   539         -    sqlite3DbFree(db, pSorter->pUnpacked);
         1107  +    sqlite3_free(pSorter->list.aMemory);
   540   1108       sqlite3DbFree(db, pSorter);
   541   1109       pCsr->pSorter = 0;
   542   1110     }
   543   1111   }
   544   1112   
         1113  +#if SQLITE_MAX_MMAP_SIZE>0
         1114  +/*
         1115  +** The first argument is a file-handle open on a temporary file. The file
         1116  +** is guaranteed to be nByte bytes or smaller in size. This function
         1117  +** attempts to extend the file to nByte bytes in size and to ensure that
         1118  +** the VFS has memory mapped it.
         1119  +**
         1120  +** Whether or not the file does end up memory mapped of course depends on
         1121  +** the specific VFS implementation.
         1122  +*/
         1123  +static void vdbeSorterExtendFile(sqlite3 *db, sqlite3_file *pFd, i64 nByte){
         1124  +  if( nByte<=(i64)(db->nMaxSorterMmap) ){
         1125  +    int rc = sqlite3OsTruncate(pFd, nByte);
         1126  +    if( rc==SQLITE_OK ){
         1127  +      void *p = 0;
         1128  +      sqlite3OsFetch(pFd, 0, (int)nByte, &p);
         1129  +      sqlite3OsUnfetch(pFd, 0, p);
         1130  +    }
         1131  +  }
         1132  +}
         1133  +#else
         1134  +# define vdbeSorterExtendFile(x,y,z)
         1135  +#endif
         1136  +
   545   1137   /*
   546   1138   ** Allocate space for a file-handle and open a temporary file. If successful,
   547         -** set *ppFile to point to the malloc'd file-handle and return SQLITE_OK.
   548         -** Otherwise, set *ppFile to 0 and return an SQLite error code.
         1139  +** set *ppFd to point to the malloc'd file-handle and return SQLITE_OK.
         1140  +** Otherwise, set *ppFd to 0 and return an SQLite error code.
   549   1141   */
   550         -static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){
   551         -  int dummy;
   552         -  return sqlite3OsOpenMalloc(db->pVfs, 0, ppFile,
         1142  +static int vdbeSorterOpenTempFile(
         1143  +  sqlite3 *db,                    /* Database handle doing sort */
         1144  +  i64 nExtend,                    /* Attempt to extend file to this size */
         1145  +  sqlite3_file **ppFd
         1146  +){
         1147  +  int rc;
         1148  +  rc = sqlite3OsOpenMalloc(db->pVfs, 0, ppFd,
   553   1149         SQLITE_OPEN_TEMP_JOURNAL |
   554   1150         SQLITE_OPEN_READWRITE    | SQLITE_OPEN_CREATE |
   555         -      SQLITE_OPEN_EXCLUSIVE    | SQLITE_OPEN_DELETEONCLOSE, &dummy
         1151  +      SQLITE_OPEN_EXCLUSIVE    | SQLITE_OPEN_DELETEONCLOSE, &rc
   556   1152     );
         1153  +  if( rc==SQLITE_OK ){
         1154  +    i64 max = SQLITE_MAX_MMAP_SIZE;
         1155  +    sqlite3OsFileControlHint(*ppFd, SQLITE_FCNTL_MMAP_SIZE, (void*)&max);
         1156  +    if( nExtend>0 ){
         1157  +      vdbeSorterExtendFile(db, *ppFd, nExtend);
         1158  +    }
         1159  +  }
         1160  +  return rc;
   557   1161   }
         1162  +
         1163  +/*
         1164  +** If it has not already been allocated, allocate the UnpackedRecord 
         1165  +** structure at pTask->pUnpacked. Return SQLITE_OK if successful (or 
         1166  +** if no allocation was required), or SQLITE_NOMEM otherwise.
         1167  +*/
         1168  +static int vdbeSortAllocUnpacked(SortSubtask *pTask){
         1169  +  if( pTask->pUnpacked==0 ){
         1170  +    char *pFree;
         1171  +    pTask->pUnpacked = sqlite3VdbeAllocUnpackedRecord(
         1172  +        pTask->pSorter->pKeyInfo, 0, 0, &pFree
         1173  +    );
         1174  +    assert( pTask->pUnpacked==(UnpackedRecord*)pFree );
         1175  +    if( pFree==0 ) return SQLITE_NOMEM;
         1176  +    pTask->pUnpacked->nField = pTask->pSorter->pKeyInfo->nField;
         1177  +    pTask->pUnpacked->errCode = 0;
         1178  +  }
         1179  +  return SQLITE_OK;
         1180  +}
         1181  +
   558   1182   
   559   1183   /*
   560   1184   ** Merge the two sorted lists p1 and p2 into a single list.
   561   1185   ** Set *ppOut to the head of the new list.
   562   1186   */
   563   1187   static void vdbeSorterMerge(
   564         -  const VdbeCursor *pCsr,         /* For pKeyInfo */
         1188  +  SortSubtask *pTask,             /* Calling thread context */
   565   1189     SorterRecord *p1,               /* First list to merge */
   566   1190     SorterRecord *p2,               /* Second list to merge */
   567   1191     SorterRecord **ppOut            /* OUT: Head of merged list */
   568   1192   ){
   569   1193     SorterRecord *pFinal = 0;
   570   1194     SorterRecord **pp = &pFinal;
   571         -  void *pVal2 = p2 ? p2->pVal : 0;
         1195  +  void *pVal2 = p2 ? SRVAL(p2) : 0;
   572   1196   
   573   1197     while( p1 && p2 ){
   574   1198       int res;
   575         -    vdbeSorterCompare(pCsr, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res);
         1199  +    res = vdbeSorterCompare(pTask, SRVAL(p1), p1->nVal, pVal2, p2->nVal);
   576   1200       if( res<=0 ){
   577   1201         *pp = p1;
   578         -      pp = &p1->pNext;
   579         -      p1 = p1->pNext;
         1202  +      pp = &p1->u.pNext;
         1203  +      p1 = p1->u.pNext;
   580   1204         pVal2 = 0;
   581   1205       }else{
   582   1206         *pp = p2;
   583         -       pp = &p2->pNext;
   584         -      p2 = p2->pNext;
         1207  +       pp = &p2->u.pNext;
         1208  +      p2 = p2->u.pNext;
   585   1209         if( p2==0 ) break;
   586         -      pVal2 = p2->pVal;
         1210  +      pVal2 = SRVAL(p2);
   587   1211       }
   588   1212     }
   589   1213     *pp = p1 ? p1 : p2;
   590   1214     *ppOut = pFinal;
   591   1215   }
   592   1216   
   593   1217   /*
   594         -** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK
   595         -** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error
   596         -** occurs.
         1218  +** Sort the linked list of records headed at pTask->pList. Return 
         1219  +** SQLITE_OK if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if 
         1220  +** an error occurs.
   597   1221   */
   598         -static int vdbeSorterSort(const VdbeCursor *pCsr){
         1222  +static int vdbeSorterSort(SortSubtask *pTask, SorterList *pList){
   599   1223     int i;
   600   1224     SorterRecord **aSlot;
   601   1225     SorterRecord *p;
   602         -  VdbeSorter *pSorter = pCsr->pSorter;
         1226  +  int rc;
         1227  +
         1228  +  rc = vdbeSortAllocUnpacked(pTask);
         1229  +  if( rc!=SQLITE_OK ) return rc;
   603   1230   
   604   1231     aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *));
   605   1232     if( !aSlot ){
   606   1233       return SQLITE_NOMEM;
   607   1234     }
   608   1235   
   609         -  p = pSorter->pRecord;
         1236  +  p = pList->pList;
   610   1237     while( p ){
   611         -    SorterRecord *pNext = p->pNext;
   612         -    p->pNext = 0;
         1238  +    SorterRecord *pNext;
         1239  +    if( pList->aMemory ){
         1240  +      if( (u8*)p==pList->aMemory ){
         1241  +        pNext = 0;
         1242  +      }else{
         1243  +        assert( p->u.iNext<sqlite3MallocSize(pList->aMemory) );
         1244  +        pNext = (SorterRecord*)&pList->aMemory[p->u.iNext];
         1245  +      }
         1246  +    }else{
         1247  +      pNext = p->u.pNext;
         1248  +    }
         1249  +
         1250  +    p->u.pNext = 0;
   613   1251       for(i=0; aSlot[i]; i++){
   614         -      vdbeSorterMerge(pCsr, p, aSlot[i], &p);
         1252  +      vdbeSorterMerge(pTask, p, aSlot[i], &p);
   615   1253         aSlot[i] = 0;
   616   1254       }
   617   1255       aSlot[i] = p;
   618   1256       p = pNext;
   619   1257     }
   620   1258   
   621   1259     p = 0;
   622   1260     for(i=0; i<64; i++){
   623         -    vdbeSorterMerge(pCsr, p, aSlot[i], &p);
         1261  +    vdbeSorterMerge(pTask, p, aSlot[i], &p);
   624   1262     }
   625         -  pSorter->pRecord = p;
         1263  +  pList->pList = p;
   626   1264   
   627   1265     sqlite3_free(aSlot);
   628         -  return SQLITE_OK;
         1266  +  assert( pTask->pUnpacked->errCode==SQLITE_OK 
         1267  +       || pTask->pUnpacked->errCode==SQLITE_NOMEM 
         1268  +  );
         1269  +  return pTask->pUnpacked->errCode;
   629   1270   }
   630   1271   
   631   1272   /*
   632         -** Initialize a file-writer object.
         1273  +** Initialize a PMA-writer object.
   633   1274   */
   634         -static void fileWriterInit(
   635         -  sqlite3 *db,                    /* Database (for malloc) */
   636         -  sqlite3_file *pFile,            /* File to write to */
   637         -  FileWriter *p,                  /* Object to populate */
   638         -  i64 iStart                      /* Offset of pFile to begin writing at */
         1275  +static void vdbePmaWriterInit(
         1276  +  sqlite3_file *pFd,              /* File handle to write to */
         1277  +  PmaWriter *p,                   /* Object to populate */
         1278  +  int nBuf,                       /* Buffer size */
         1279  +  i64 iStart                      /* Offset of pFd to begin writing at */
   639   1280   ){
   640         -  int nBuf = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
   641         -
   642         -  memset(p, 0, sizeof(FileWriter));
   643         -  p->aBuffer = (u8 *)sqlite3DbMallocRaw(db, nBuf);
         1281  +  memset(p, 0, sizeof(PmaWriter));
         1282  +  p->aBuffer = (u8*)sqlite3Malloc(nBuf);
   644   1283     if( !p->aBuffer ){
   645   1284       p->eFWErr = SQLITE_NOMEM;
   646   1285     }else{
   647   1286       p->iBufEnd = p->iBufStart = (iStart % nBuf);
   648   1287       p->iWriteOff = iStart - p->iBufStart;
   649   1288       p->nBuffer = nBuf;
   650         -    p->pFile = pFile;
         1289  +    p->pFd = pFd;
   651   1290     }
   652   1291   }
   653   1292   
   654   1293   /*
   655         -** Write nData bytes of data to the file-write object. Return SQLITE_OK
         1294  +** Write nData bytes of data to the PMA. Return SQLITE_OK
   656   1295   ** if successful, or an SQLite error code if an error occurs.
   657   1296   */
   658         -static void fileWriterWrite(FileWriter *p, u8 *pData, int nData){
         1297  +static void vdbePmaWriteBlob(PmaWriter *p, u8 *pData, int nData){
   659   1298     int nRem = nData;
   660   1299     while( nRem>0 && p->eFWErr==0 ){
   661   1300       int nCopy = nRem;
   662   1301       if( nCopy>(p->nBuffer - p->iBufEnd) ){
   663   1302         nCopy = p->nBuffer - p->iBufEnd;
   664   1303       }
   665   1304   
   666   1305       memcpy(&p->aBuffer[p->iBufEnd], &pData[nData-nRem], nCopy);
   667   1306       p->iBufEnd += nCopy;
   668   1307       if( p->iBufEnd==p->nBuffer ){
   669         -      p->eFWErr = sqlite3OsWrite(p->pFile, 
         1308  +      p->eFWErr = sqlite3OsWrite(p->pFd, 
   670   1309             &p->aBuffer[p->iBufStart], p->iBufEnd - p->iBufStart, 
   671   1310             p->iWriteOff + p->iBufStart
   672   1311         );
   673   1312         p->iBufStart = p->iBufEnd = 0;
   674   1313         p->iWriteOff += p->nBuffer;
   675   1314       }
   676   1315       assert( p->iBufEnd<p->nBuffer );
   677   1316   
   678   1317       nRem -= nCopy;
   679   1318     }
   680   1319   }
   681   1320   
   682   1321   /*
   683         -** Flush any buffered data to disk and clean up the file-writer object.
   684         -** The results of using the file-writer after this call are undefined.
         1322  +** Flush any buffered data to disk and clean up the PMA-writer object.
         1323  +** The results of using the PMA-writer after this call are undefined.
   685   1324   ** Return SQLITE_OK if flushing the buffered data succeeds or is not 
   686   1325   ** required. Otherwise, return an SQLite error code.
   687   1326   **
   688   1327   ** Before returning, set *piEof to the offset immediately following the
   689   1328   ** last byte written to the file.
   690   1329   */
   691         -static int fileWriterFinish(sqlite3 *db, FileWriter *p, i64 *piEof){
         1330  +static int vdbePmaWriterFinish(PmaWriter *p, i64 *piEof){
   692   1331     int rc;
   693   1332     if( p->eFWErr==0 && ALWAYS(p->aBuffer) && p->iBufEnd>p->iBufStart ){
   694         -    p->eFWErr = sqlite3OsWrite(p->pFile, 
         1333  +    p->eFWErr = sqlite3OsWrite(p->pFd, 
   695   1334           &p->aBuffer[p->iBufStart], p->iBufEnd - p->iBufStart, 
   696   1335           p->iWriteOff + p->iBufStart
   697   1336       );
   698   1337     }
   699   1338     *piEof = (p->iWriteOff + p->iBufEnd);
   700         -  sqlite3DbFree(db, p->aBuffer);
         1339  +  sqlite3_free(p->aBuffer);
   701   1340     rc = p->eFWErr;
   702         -  memset(p, 0, sizeof(FileWriter));
         1341  +  memset(p, 0, sizeof(PmaWriter));
   703   1342     return rc;
   704   1343   }
   705   1344   
   706   1345   /*
   707         -** Write value iVal encoded as a varint to the file-write object. Return 
         1346  +** Write value iVal encoded as a varint to the PMA. Return 
   708   1347   ** SQLITE_OK if successful, or an SQLite error code if an error occurs.
   709   1348   */
   710         -static void fileWriterWriteVarint(FileWriter *p, u64 iVal){
         1349  +static void vdbePmaWriteVarint(PmaWriter *p, u64 iVal){
   711   1350     int nByte; 
   712   1351     u8 aByte[10];
   713   1352     nByte = sqlite3PutVarint(aByte, iVal);
   714         -  fileWriterWrite(p, aByte, nByte);
         1353  +  vdbePmaWriteBlob(p, aByte, nByte);
   715   1354   }
   716   1355   
   717   1356   /*
   718         -** Write the current contents of the in-memory linked-list to a PMA. Return
   719         -** SQLITE_OK if successful, or an SQLite error code otherwise.
         1357  +** Write the current contents of in-memory linked-list pList to a level-0
         1358  +** PMA in the temp file belonging to sub-task pTask. Return SQLITE_OK if 
         1359  +** successful, or an SQLite error code otherwise.
   720   1360   **
   721   1361   ** The format of a PMA is:
   722   1362   **
   723   1363   **     * A varint. This varint contains the total number of bytes of content
   724   1364   **       in the PMA (not including the varint itself).
   725   1365   **
   726   1366   **     * One or more records packed end-to-end in order of ascending keys. 
   727   1367   **       Each record consists of a varint followed by a blob of data (the 
   728   1368   **       key). The varint is the number of bytes in the blob of data.
   729   1369   */
   730         -static int vdbeSorterListToPMA(sqlite3 *db, const VdbeCursor *pCsr){
         1370  +static int vdbeSorterListToPMA(SortSubtask *pTask, SorterList *pList){
         1371  +  sqlite3 *db = pTask->pSorter->db;
   731   1372     int rc = SQLITE_OK;             /* Return code */
   732         -  VdbeSorter *pSorter = pCsr->pSorter;
   733         -  FileWriter writer;
         1373  +  PmaWriter writer;               /* Object used to write to the file */
   734   1374   
   735         -  memset(&writer, 0, sizeof(FileWriter));
         1375  +#ifdef SQLITE_DEBUG
         1376  +  /* Set iSz to the expected size of file pTask->file after writing the PMA. 
         1377  +  ** This is used by an assert() statement at the end of this function.  */
         1378  +  i64 iSz = pList->szPMA + sqlite3VarintLen(pList->szPMA) + pTask->file.iEof;
         1379  +#endif
   736   1380   
   737         -  if( pSorter->nInMemory==0 ){
   738         -    assert( pSorter->pRecord==0 );
   739         -    return rc;
   740         -  }
   741         -
   742         -  rc = vdbeSorterSort(pCsr);
         1381  +  vdbeSorterWorkDebug(pTask, "enter");
         1382  +  memset(&writer, 0, sizeof(PmaWriter));
         1383  +  assert( pList->szPMA>0 );
   743   1384   
   744   1385     /* If the first temporary PMA file has not been opened, open it now. */
   745         -  if( rc==SQLITE_OK && pSorter->pTemp1==0 ){
   746         -    rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
   747         -    assert( rc!=SQLITE_OK || pSorter->pTemp1 );
   748         -    assert( pSorter->iWriteOff==0 );
   749         -    assert( pSorter->nPMA==0 );
         1386  +  if( pTask->file.pFd==0 ){
         1387  +    rc = vdbeSorterOpenTempFile(db, 0, &pTask->file.pFd);
         1388  +    assert( rc!=SQLITE_OK || pTask->file.pFd );
         1389  +    assert( pTask->file.iEof==0 );
         1390  +    assert( pTask->nPMA==0 );
         1391  +  }
         1392  +
         1393  +  /* Try to get the file to memory map */
         1394  +  if( rc==SQLITE_OK ){
         1395  +    vdbeSorterExtendFile(db, pTask->file.pFd, pTask->file.iEof+pList->szPMA+9);
         1396  +  }
         1397  +
         1398  +  /* Sort the list */
         1399  +  if( rc==SQLITE_OK ){
         1400  +    rc = vdbeSorterSort(pTask, pList);
   750   1401     }
   751   1402   
   752   1403     if( rc==SQLITE_OK ){
   753   1404       SorterRecord *p;
   754   1405       SorterRecord *pNext = 0;
   755   1406   
   756         -    fileWriterInit(db, pSorter->pTemp1, &writer, pSorter->iWriteOff);
   757         -    pSorter->nPMA++;
   758         -    fileWriterWriteVarint(&writer, pSorter->nInMemory);
   759         -    for(p=pSorter->pRecord; p; p=pNext){
   760         -      pNext = p->pNext;
   761         -      fileWriterWriteVarint(&writer, p->nVal);
   762         -      fileWriterWrite(&writer, p->pVal, p->nVal);
   763         -      sqlite3DbFree(db, p);
   764         -    }
   765         -    pSorter->pRecord = p;
   766         -    rc = fileWriterFinish(db, &writer, &pSorter->iWriteOff);
         1407  +    vdbePmaWriterInit(pTask->file.pFd, &writer, pTask->pSorter->pgsz,
         1408  +                      pTask->file.iEof);
         1409  +    pTask->nPMA++;
         1410  +    vdbePmaWriteVarint(&writer, pList->szPMA);
         1411  +    for(p=pList->pList; p; p=pNext){
         1412  +      pNext = p->u.pNext;
         1413  +      vdbePmaWriteVarint(&writer, p->nVal);
         1414  +      vdbePmaWriteBlob(&writer, SRVAL(p), p->nVal);
         1415  +      if( pList->aMemory==0 ) sqlite3_free(p);
         1416  +    }
         1417  +    pList->pList = p;
         1418  +    rc = vdbePmaWriterFinish(&writer, &pTask->file.iEof);
         1419  +  }
         1420  +
         1421  +  vdbeSorterWorkDebug(pTask, "exit");
         1422  +  assert( rc!=SQLITE_OK || pList->pList==0 );
         1423  +  assert( rc!=SQLITE_OK || pTask->file.iEof==iSz );
         1424  +  return rc;
         1425  +}
         1426  +
         1427  +/*
         1428  +** Advance the MergeEngine to its next entry.
         1429  +** Set *pbEof to true there is no next entry because
         1430  +** the MergeEngine has reached the end of all its inputs.
         1431  +**
         1432  +** Return SQLITE_OK if successful or an error code if an error occurs.
         1433  +*/
         1434  +static int vdbeMergeEngineStep(
         1435  +  MergeEngine *pMerger,      /* The merge engine to advance to the next row */
         1436  +  int *pbEof                 /* Set TRUE at EOF.  Set false for more content */
         1437  +){
         1438  +  int rc;
         1439  +  int iPrev = pMerger->aTree[1];/* Index of PmaReader to advance */
         1440  +  SortSubtask *pTask = pMerger->pTask;
         1441  +
         1442  +  /* Advance the current PmaReader */
         1443  +  rc = vdbePmaReaderNext(&pMerger->aReadr[iPrev]);
         1444  +
         1445  +  /* Update contents of aTree[] */
         1446  +  if( rc==SQLITE_OK ){
         1447  +    int i;                      /* Index of aTree[] to recalculate */
         1448  +    PmaReader *pReadr1;         /* First PmaReader to compare */
         1449  +    PmaReader *pReadr2;         /* Second PmaReader to compare */
         1450  +    u8 *pKey2;                  /* To pReadr2->aKey, or 0 if record cached */
         1451  +
         1452  +    /* Find the first two PmaReaders to compare. The one that was just
         1453  +    ** advanced (iPrev) and the one next to it in the array.  */
         1454  +    pReadr1 = &pMerger->aReadr[(iPrev & 0xFFFE)];
         1455  +    pReadr2 = &pMerger->aReadr[(iPrev | 0x0001)];
         1456  +    pKey2 = pReadr2->aKey;
         1457  +
         1458  +    for(i=(pMerger->nTree+iPrev)/2; i>0; i=i/2){
         1459  +      /* Compare pReadr1 and pReadr2. Store the result in variable iRes. */
         1460  +      int iRes;
         1461  +      if( pReadr1->pFd==0 ){
         1462  +        iRes = +1;
         1463  +      }else if( pReadr2->pFd==0 ){
         1464  +        iRes = -1;
         1465  +      }else{
         1466  +        iRes = vdbeSorterCompare(pTask, 
         1467  +            pReadr1->aKey, pReadr1->nKey, pKey2, pReadr2->nKey
         1468  +        );
         1469  +      }
         1470  +
         1471  +      /* If pReadr1 contained the smaller value, set aTree[i] to its index.
         1472  +      ** Then set pReadr2 to the next PmaReader to compare to pReadr1. In this
         1473  +      ** case there is no cache of pReadr2 in pTask->pUnpacked, so set
         1474  +      ** pKey2 to point to the record belonging to pReadr2.
         1475  +      **
         1476  +      ** Alternatively, if pReadr2 contains the smaller of the two values,
         1477  +      ** set aTree[i] to its index and update pReadr1. If vdbeSorterCompare()
         1478  +      ** was actually called above, then pTask->pUnpacked now contains
         1479  +      ** a value equivalent to pReadr2. So set pKey2 to NULL to prevent
         1480  +      ** vdbeSorterCompare() from decoding pReadr2 again.
         1481  +      **
         1482  +      ** If the two values were equal, then the value from the oldest
         1483  +      ** PMA should be considered smaller. The VdbeSorter.aReadr[] array
         1484  +      ** is sorted from oldest to newest, so pReadr1 contains older values
         1485  +      ** than pReadr2 iff (pReadr1<pReadr2).  */
         1486  +      if( iRes<0 || (iRes==0 && pReadr1<pReadr2) ){
         1487  +        pMerger->aTree[i] = (int)(pReadr1 - pMerger->aReadr);
         1488  +        pReadr2 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ];
         1489  +        pKey2 = pReadr2->aKey;
         1490  +      }else{
         1491  +        if( pReadr1->pFd ) pKey2 = 0;
         1492  +        pMerger->aTree[i] = (int)(pReadr2 - pMerger->aReadr);
         1493  +        pReadr1 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ];
         1494  +      }
         1495  +    }
         1496  +    *pbEof = (pMerger->aReadr[pMerger->aTree[1]].pFd==0);
         1497  +  }
         1498  +
         1499  +  return (rc==SQLITE_OK ? pTask->pUnpacked->errCode : rc);
         1500  +}
         1501  +
         1502  +#if SQLITE_MAX_WORKER_THREADS>0
         1503  +/*
         1504  +** The main routine for background threads that write level-0 PMAs.
         1505  +*/
         1506  +static void *vdbeSorterFlushThread(void *pCtx){
         1507  +  SortSubtask *pTask = (SortSubtask*)pCtx;
         1508  +  int rc;                         /* Return code */
         1509  +  assert( pTask->bDone==0 );
         1510  +  rc = vdbeSorterListToPMA(pTask, &pTask->list);
         1511  +  pTask->bDone = 1;
         1512  +  return SQLITE_INT_TO_PTR(rc);
         1513  +}
         1514  +#endif /* SQLITE_MAX_WORKER_THREADS>0 */
         1515  +
         1516  +/*
         1517  +** Flush the current contents of VdbeSorter.list to a new PMA, possibly
         1518  +** using a background thread.
         1519  +*/
         1520  +static int vdbeSorterFlushPMA(VdbeSorter *pSorter){
         1521  +#if SQLITE_MAX_WORKER_THREADS==0
         1522  +  pSorter->bUsePMA = 1;
         1523  +  return vdbeSorterListToPMA(&pSorter->aTask[0], &pSorter->list);
         1524  +#else
         1525  +  int rc = SQLITE_OK;
         1526  +  int i;
         1527  +  SortSubtask *pTask = 0;    /* Thread context used to create new PMA */
         1528  +  int nWorker = (pSorter->nTask-1);
         1529  +
         1530  +  /* Set the flag to indicate that at least one PMA has been written. 
         1531  +  ** Or will be, anyhow.  */
         1532  +  pSorter->bUsePMA = 1;
         1533  +
         1534  +  /* Select a sub-task to sort and flush the current list of in-memory
         1535  +  ** records to disk. If the sorter is running in multi-threaded mode,
         1536  +  ** round-robin between the first (pSorter->nTask-1) tasks. Except, if
         1537  +  ** the background thread from a sub-tasks previous turn is still running,
         1538  +  ** skip it. If the first (pSorter->nTask-1) sub-tasks are all still busy,
         1539  +  ** fall back to using the final sub-task. The first (pSorter->nTask-1)
         1540  +  ** sub-tasks are prefered as they use background threads - the final 
         1541  +  ** sub-task uses the main thread. */
         1542  +  for(i=0; i<nWorker; i++){
         1543  +    int iTest = (pSorter->iPrev + i + 1) % nWorker;
         1544  +    pTask = &pSorter->aTask[iTest];
         1545  +    if( pTask->bDone ){
         1546  +      rc = vdbeSorterJoinThread(pTask);
         1547  +    }
         1548  +    if( rc!=SQLITE_OK || pTask->pThread==0 ) break;
         1549  +  }
         1550  +
         1551  +  if( rc==SQLITE_OK ){
         1552  +    if( i==nWorker ){
         1553  +      /* Use the foreground thread for this operation */
         1554  +      rc = vdbeSorterListToPMA(&pSorter->aTask[nWorker], &pSorter->list);
         1555  +    }else{
         1556  +      /* Launch a background thread for this operation */
         1557  +      u8 *aMem = pTask->list.aMemory;
         1558  +      void *pCtx = (void*)pTask;
         1559  +
         1560  +      assert( pTask->pThread==0 && pTask->bDone==0 );
         1561  +      assert( pTask->list.pList==0 );
         1562  +      assert( pTask->list.aMemory==0 || pSorter->list.aMemory!=0 );
         1563  +
         1564  +      pSorter->iPrev = (u8)(pTask - pSorter->aTask);
         1565  +      pTask->list = pSorter->list;
         1566  +      pSorter->list.pList = 0;
         1567  +      pSorter->list.szPMA = 0;
         1568  +      if( aMem ){
         1569  +        pSorter->list.aMemory = aMem;
         1570  +        pSorter->nMemory = sqlite3MallocSize(aMem);
         1571  +      }else if( pSorter->list.aMemory ){
         1572  +        pSorter->list.aMemory = sqlite3Malloc(pSorter->nMemory);
         1573  +        if( !pSorter->list.aMemory ) return SQLITE_NOMEM;
         1574  +      }
         1575  +
         1576  +      rc = vdbeSorterCreateThread(pTask, vdbeSorterFlushThread, pCtx);
         1577  +    }
   767   1578     }
   768   1579   
   769   1580     return rc;
         1581  +#endif /* SQLITE_MAX_WORKER_THREADS!=0 */
   770   1582   }
   771   1583   
   772   1584   /*
   773   1585   ** Add a record to the sorter.
   774   1586   */
   775   1587   int sqlite3VdbeSorterWrite(
   776         -  sqlite3 *db,                    /* Database handle */
   777         -  const VdbeCursor *pCsr,               /* Sorter cursor */
         1588  +  const VdbeCursor *pCsr,         /* Sorter cursor */
   778   1589     Mem *pVal                       /* Memory cell containing record */
   779   1590   ){
   780   1591     VdbeSorter *pSorter = pCsr->pSorter;
   781   1592     int rc = SQLITE_OK;             /* Return Code */
   782   1593     SorterRecord *pNew;             /* New list element */
   783   1594   
         1595  +  int bFlush;                     /* True to flush contents of memory to PMA */
         1596  +  int nReq;                       /* Bytes of memory required */
         1597  +  int nPMA;                       /* Bytes of PMA space required */
         1598  +
   784   1599     assert( pSorter );
   785         -  pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n;
   786   1600   
   787         -  pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord));
   788         -  if( pNew==0 ){
   789         -    rc = SQLITE_NOMEM;
   790         -  }else{
   791         -    pNew->pVal = (void *)&pNew[1];
   792         -    memcpy(pNew->pVal, pVal->z, pVal->n);
   793         -    pNew->nVal = pVal->n;
   794         -    pNew->pNext = pSorter->pRecord;
   795         -    pSorter->pRecord = pNew;
   796         -  }
   797         -
   798         -  /* See if the contents of the sorter should now be written out. They
   799         -  ** are written out when either of the following are true:
         1601  +  /* Figure out whether or not the current contents of memory should be
         1602  +  ** flushed to a PMA before continuing. If so, do so.
         1603  +  **
         1604  +  ** If using the single large allocation mode (pSorter->aMemory!=0), then
         1605  +  ** flush the contents of memory to a new PMA if (a) at least one value is
         1606  +  ** already in memory and (b) the new value will not fit in memory.
         1607  +  ** 
         1608  +  ** Or, if using separate allocations for each record, flush the contents
         1609  +  ** of memory to a PMA if either of the following are true:
   800   1610     **
   801   1611     **   * The total memory allocated for the in-memory list is greater 
   802   1612     **     than (page-size * cache-size), or
   803   1613     **
   804   1614     **   * The total memory allocated for the in-memory list is greater 
   805   1615     **     than (page-size * 10) and sqlite3HeapNearlyFull() returns true.
   806   1616     */
   807         -  if( rc==SQLITE_OK && pSorter->mxPmaSize>0 && (
   808         -        (pSorter->nInMemory>pSorter->mxPmaSize)
   809         -     || (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull())
   810         -  )){
   811         -#ifdef SQLITE_DEBUG
   812         -    i64 nExpect = pSorter->iWriteOff
   813         -                + sqlite3VarintLen(pSorter->nInMemory)
   814         -                + pSorter->nInMemory;
         1617  +  nReq = pVal->n + sizeof(SorterRecord);
         1618  +  nPMA = pVal->n + sqlite3VarintLen(pVal->n);
         1619  +  if( pSorter->mxPmaSize ){
         1620  +    if( pSorter->list.aMemory ){
         1621  +      bFlush = pSorter->iMemory && (pSorter->iMemory+nReq) > pSorter->mxPmaSize;
         1622  +    }else{
         1623  +      bFlush = (
         1624  +          (pSorter->list.szPMA > pSorter->mxPmaSize)
         1625  +       || (pSorter->list.szPMA > pSorter->mnPmaSize && sqlite3HeapNearlyFull())
         1626  +      );
         1627  +    }
         1628  +    if( bFlush ){
         1629  +      rc = vdbeSorterFlushPMA(pSorter);
         1630  +      pSorter->list.szPMA = 0;
         1631  +      pSorter->iMemory = 0;
         1632  +      assert( rc!=SQLITE_OK || pSorter->list.pList==0 );
         1633  +    }
         1634  +  }
         1635  +
         1636  +  pSorter->list.szPMA += nPMA;
         1637  +  if( nPMA>pSorter->mxKeysize ){
         1638  +    pSorter->mxKeysize = nPMA;
         1639  +  }
         1640  +
         1641  +  if( pSorter->list.aMemory ){
         1642  +    int nMin = pSorter->iMemory + nReq;
         1643  +
         1644  +    if( nMin>pSorter->nMemory ){
         1645  +      u8 *aNew;
         1646  +      int nNew = pSorter->nMemory * 2;
         1647  +      while( nNew < nMin ) nNew = nNew*2;
         1648  +      if( nNew > pSorter->mxPmaSize ) nNew = pSorter->mxPmaSize;
         1649  +      if( nNew < nMin ) nNew = nMin;
         1650  +
         1651  +      aNew = sqlite3Realloc(pSorter->list.aMemory, nNew);
         1652  +      if( !aNew ) return SQLITE_NOMEM;
         1653  +      pSorter->list.pList = (SorterRecord*)(
         1654  +          aNew + ((u8*)pSorter->list.pList - pSorter->list.aMemory)
         1655  +      );
         1656  +      pSorter->list.aMemory = aNew;
         1657  +      pSorter->nMemory = nNew;
         1658  +    }
         1659  +
         1660  +    pNew = (SorterRecord*)&pSorter->list.aMemory[pSorter->iMemory];
         1661  +    pSorter->iMemory += ROUND8(nReq);
         1662  +    pNew->u.iNext = (int)((u8*)(pSorter->list.pList) - pSorter->list.aMemory);
         1663  +  }else{
         1664  +    pNew = (SorterRecord *)sqlite3Malloc(nReq);
         1665  +    if( pNew==0 ){
         1666  +      return SQLITE_NOMEM;
         1667  +    }
         1668  +    pNew->u.pNext = pSorter->list.pList;
         1669  +  }
         1670  +
         1671  +  memcpy(SRVAL(pNew), pVal->z, pVal->n);
         1672  +  pNew->nVal = pVal->n;
         1673  +  pSorter->list.pList = pNew;
         1674  +
         1675  +  return rc;
         1676  +}
         1677  +
         1678  +/*
         1679  +** Read keys from pIncr->pMerger and populate pIncr->aFile[1]. The format
         1680  +** of the data stored in aFile[1] is the same as that used by regular PMAs,
         1681  +** except that the number-of-bytes varint is omitted from the start.
         1682  +*/
         1683  +static int vdbeIncrPopulate(IncrMerger *pIncr){
         1684  +  int rc = SQLITE_OK;
         1685  +  int rc2;
         1686  +  i64 iStart = pIncr->iStartOff;
         1687  +  SorterFile *pOut = &pIncr->aFile[1];
         1688  +  SortSubtask *pTask = pIncr->pTask;
         1689  +  MergeEngine *pMerger = pIncr->pMerger;
         1690  +  PmaWriter writer;
         1691  +  assert( pIncr->bEof==0 );
         1692  +
         1693  +  vdbeSorterPopulateDebug(pTask, "enter");
         1694  +
         1695  +  vdbePmaWriterInit(pOut->pFd, &writer, pTask->pSorter->pgsz, iStart);
         1696  +  while( rc==SQLITE_OK ){
         1697  +    int dummy;
         1698  +    PmaReader *pReader = &pMerger->aReadr[ pMerger->aTree[1] ];
         1699  +    int nKey = pReader->nKey;
         1700  +    i64 iEof = writer.iWriteOff + writer.iBufEnd;
         1701  +
         1702  +    /* Check if the output file is full or if the input has been exhausted.
         1703  +    ** In either case exit the loop. */
         1704  +    if( pReader->pFd==0 ) break;
         1705  +    if( (iEof + nKey + sqlite3VarintLen(nKey))>(iStart + pIncr->mxSz) ) break;
         1706  +
         1707  +    /* Write the next key to the output. */
         1708  +    vdbePmaWriteVarint(&writer, nKey);
         1709  +    vdbePmaWriteBlob(&writer, pReader->aKey, nKey);
         1710  +    assert( pIncr->pMerger->pTask==pTask );
         1711  +    rc = vdbeMergeEngineStep(pIncr->pMerger, &dummy);
         1712  +  }
         1713  +
         1714  +  rc2 = vdbePmaWriterFinish(&writer, &pOut->iEof);
         1715  +  if( rc==SQLITE_OK ) rc = rc2;
         1716  +  vdbeSorterPopulateDebug(pTask, "exit");
         1717  +  return rc;
         1718  +}
         1719  +
         1720  +#if SQLITE_MAX_WORKER_THREADS>0
         1721  +/*
         1722  +** The main routine for background threads that populate aFile[1] of
         1723  +** multi-threaded IncrMerger objects.
         1724  +*/
         1725  +static void *vdbeIncrPopulateThread(void *pCtx){
         1726  +  IncrMerger *pIncr = (IncrMerger*)pCtx;
         1727  +  void *pRet = SQLITE_INT_TO_PTR( vdbeIncrPopulate(pIncr) );
         1728  +  pIncr->pTask->bDone = 1;
         1729  +  return pRet;
         1730  +}
         1731  +
         1732  +/*
         1733  +** Launch a background thread to populate aFile[1] of pIncr.
         1734  +*/
         1735  +static int vdbeIncrBgPopulate(IncrMerger *pIncr){
         1736  +  void *p = (void*)pIncr;
         1737  +  assert( pIncr->bUseThread );
         1738  +  return vdbeSorterCreateThread(pIncr->pTask, vdbeIncrPopulateThread, p);
         1739  +}
         1740  +#endif
         1741  +
         1742  +/*
         1743  +** This function is called when the PmaReader corresponding to pIncr has
         1744  +** finished reading the contents of aFile[0]. Its purpose is to "refill"
         1745  +** aFile[0] such that the PmaReader should start rereading it from the
         1746  +** beginning.
         1747  +**
         1748  +** For single-threaded objects, this is accomplished by literally reading 
         1749  +** keys from pIncr->pMerger and repopulating aFile[0]. 
         1750  +**
         1751  +** For multi-threaded objects, all that is required is to wait until the 
         1752  +** background thread is finished (if it is not already) and then swap 
         1753  +** aFile[0] and aFile[1] in place. If the contents of pMerger have not
         1754  +** been exhausted, this function also launches a new background thread
         1755  +** to populate the new aFile[1].
         1756  +**
         1757  +** SQLITE_OK is returned on success, or an SQLite error code otherwise.
         1758  +*/
         1759  +static int vdbeIncrSwap(IncrMerger *pIncr){
         1760  +  int rc = SQLITE_OK;
         1761  +
         1762  +#if SQLITE_MAX_WORKER_THREADS>0
         1763  +  if( pIncr->bUseThread ){
         1764  +    rc = vdbeSorterJoinThread(pIncr->pTask);
         1765  +
         1766  +    if( rc==SQLITE_OK ){
         1767  +      SorterFile f0 = pIncr->aFile[0];
         1768  +      pIncr->aFile[0] = pIncr->aFile[1];
         1769  +      pIncr->aFile[1] = f0;
         1770  +    }
         1771  +
         1772  +    if( rc==SQLITE_OK ){
         1773  +      if( pIncr->aFile[0].iEof==pIncr->iStartOff ){
         1774  +        pIncr->bEof = 1;
         1775  +      }else{
         1776  +        rc = vdbeIncrBgPopulate(pIncr);
         1777  +      }
         1778  +    }
         1779  +  }else
         1780  +#endif
         1781  +  {
         1782  +    rc = vdbeIncrPopulate(pIncr);
         1783  +    pIncr->aFile[0] = pIncr->aFile[1];
         1784  +    if( pIncr->aFile[0].iEof==pIncr->iStartOff ){
         1785  +      pIncr->bEof = 1;
         1786  +    }
         1787  +  }
         1788  +
         1789  +  return rc;
         1790  +}
         1791  +
         1792  +/*
         1793  +** Allocate and return a new IncrMerger object to read data from pMerger.
         1794  +**
         1795  +** If an OOM condition is encountered, return NULL. In this case free the
         1796  +** pMerger argument before returning.
         1797  +*/
         1798  +static int vdbeIncrMergerNew(
         1799  +  SortSubtask *pTask,     /* The thread that will be using the new IncrMerger */
         1800  +  MergeEngine *pMerger,   /* The MergeEngine that the IncrMerger will control */
         1801  +  IncrMerger **ppOut      /* Write the new IncrMerger here */
         1802  +){
         1803  +  int rc = SQLITE_OK;
         1804  +  IncrMerger *pIncr = *ppOut = (IncrMerger*)
         1805  +       (sqlite3FaultSim(100) ? 0 : sqlite3MallocZero(sizeof(*pIncr)));
         1806  +  if( pIncr ){
         1807  +    pIncr->pMerger = pMerger;
         1808  +    pIncr->pTask = pTask;
         1809  +    pIncr->mxSz = MAX(pTask->pSorter->mxKeysize+9,pTask->pSorter->mxPmaSize/2);
         1810  +    pTask->file2.iEof += pIncr->mxSz;
         1811  +  }else{
         1812  +    vdbeMergeEngineFree(pMerger);
         1813  +    rc = SQLITE_NOMEM;
         1814  +  }
         1815  +  return rc;
         1816  +}
         1817  +
         1818  +#if SQLITE_MAX_WORKER_THREADS>0
         1819  +/*
         1820  +** Set the "use-threads" flag on object pIncr.
         1821  +*/
         1822  +static void vdbeIncrMergerSetThreads(IncrMerger *pIncr){
         1823  +  pIncr->bUseThread = 1;
         1824  +  pIncr->pTask->file2.iEof -= pIncr->mxSz;
         1825  +}
         1826  +#endif /* SQLITE_MAX_WORKER_THREADS>0 */
         1827  +
         1828  +
         1829  +
         1830  +/*
         1831  +** Recompute pMerger->aTree[iOut] by comparing the next keys on the
         1832  +** two PmaReaders that feed that entry.  Neither of the PmaReaders
         1833  +** are advanced.  This routine merely does the comparison.
         1834  +*/
         1835  +static void vdbeMergeEngineCompare(
         1836  +  MergeEngine *pMerger,  /* Merge engine containing PmaReaders to compare */
         1837  +  int iOut               /* Store the result in pMerger->aTree[iOut] */
         1838  +){
         1839  +  int i1;
         1840  +  int i2;
         1841  +  int iRes;
         1842  +  PmaReader *p1;
         1843  +  PmaReader *p2;
         1844  +
         1845  +  assert( iOut<pMerger->nTree && iOut>0 );
         1846  +
         1847  +  if( iOut>=(pMerger->nTree/2) ){
         1848  +    i1 = (iOut - pMerger->nTree/2) * 2;
         1849  +    i2 = i1 + 1;
         1850  +  }else{
         1851  +    i1 = pMerger->aTree[iOut*2];
         1852  +    i2 = pMerger->aTree[iOut*2+1];
         1853  +  }
         1854  +
         1855  +  p1 = &pMerger->aReadr[i1];
         1856  +  p2 = &pMerger->aReadr[i2];
         1857  +
         1858  +  if( p1->pFd==0 ){
         1859  +    iRes = i2;
         1860  +  }else if( p2->pFd==0 ){
         1861  +    iRes = i1;
         1862  +  }else{
         1863  +    int res;
         1864  +    assert( pMerger->pTask->pUnpacked!=0 );  /* from vdbeSortSubtaskMain() */
         1865  +    res = vdbeSorterCompare(
         1866  +        pMerger->pTask, p1->aKey, p1->nKey, p2->aKey, p2->nKey
         1867  +    );
         1868  +    if( res<=0 ){
         1869  +      iRes = i1;
         1870  +    }else{
         1871  +      iRes = i2;
         1872  +    }
         1873  +  }
         1874  +
         1875  +  pMerger->aTree[iOut] = iRes;
         1876  +}
         1877  +
         1878  +/*
         1879  +** Allowed values for the eMode parameter to vdbeMergeEngineInit()
         1880  +** and vdbePmaReaderIncrMergeInit().
         1881  +**
         1882  +** Only INCRINIT_NORMAL is valid in single-threaded builds (when
         1883  +** SQLITE_MAX_WORKER_THREADS==0).  The other values are only used
         1884  +** when there exists one or more separate worker threads.
         1885  +*/
         1886  +#define INCRINIT_NORMAL 0
         1887  +#define INCRINIT_TASK   1
         1888  +#define INCRINIT_ROOT   2
         1889  +
         1890  +/* Forward reference.
         1891  +** The vdbeIncrMergeInit() and vdbePmaReaderIncrMergeInit() routines call each
         1892  +** other (when building a merge tree).
         1893  +*/
         1894  +static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode);
         1895  +
         1896  +/*
         1897  +** Initialize the MergeEngine object passed as the second argument. Once this
         1898  +** function returns, the first key of merged data may be read from the 
         1899  +** MergeEngine object in the usual fashion.
         1900  +**
         1901  +** If argument eMode is INCRINIT_ROOT, then it is assumed that any IncrMerge
         1902  +** objects attached to the PmaReader objects that the merger reads from have
         1903  +** already been populated, but that they have not yet populated aFile[0] and
         1904  +** set the PmaReader objects up to read from it. In this case all that is
         1905  +** required is to call vdbePmaReaderNext() on each PmaReader to point it at
         1906  +** its first key.
         1907  +**
         1908  +** Otherwise, if eMode is any value other than INCRINIT_ROOT, then use 
         1909  +** vdbePmaReaderIncrMergeInit() to initialize each PmaReader that feeds data 
         1910  +** to pMerger.
         1911  +**
         1912  +** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
         1913  +*/
         1914  +static int vdbeMergeEngineInit(
         1915  +  SortSubtask *pTask,             /* Thread that will run pMerger */
         1916  +  MergeEngine *pMerger,           /* MergeEngine to initialize */
         1917  +  int eMode                       /* One of the INCRINIT_XXX constants */
         1918  +){
         1919  +  int rc = SQLITE_OK;             /* Return code */
         1920  +  int i;                          /* For looping over PmaReader objects */
         1921  +  int nTree = pMerger->nTree;
         1922  +
         1923  +  /* eMode is always INCRINIT_NORMAL in single-threaded mode */
         1924  +  assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL );
         1925  +
         1926  +  /* Verify that the MergeEngine is assigned to a single thread */
         1927  +  assert( pMerger->pTask==0 );
         1928  +  pMerger->pTask = pTask;
         1929  +
         1930  +  for(i=0; i<nTree; i++){
         1931  +    if( SQLITE_MAX_WORKER_THREADS>0 && eMode==INCRINIT_ROOT ){
         1932  +      /* PmaReaders should be normally initialized in order, as if they are
         1933  +      ** reading from the same temp file this makes for more linear file IO.
         1934  +      ** However, in the INCRINIT_ROOT case, if PmaReader aReadr[nTask-1] is
         1935  +      ** in use it will block the vdbePmaReaderNext() call while it uses
         1936  +      ** the main thread to fill its buffer. So calling PmaReaderNext()
         1937  +      ** on this PmaReader before any of the multi-threaded PmaReaders takes
         1938  +      ** better advantage of multi-processor hardware. */
         1939  +      rc = vdbePmaReaderNext(&pMerger->aReadr[nTree-i-1]);
         1940  +    }else{
         1941  +      rc = vdbePmaReaderIncrMergeInit(&pMerger->aReadr[i], INCRINIT_NORMAL);
         1942  +    }
         1943  +    if( rc!=SQLITE_OK ) return rc;
         1944  +  }
         1945  +
         1946  +  for(i=pMerger->nTree-1; i>0; i--){
         1947  +    vdbeMergeEngineCompare(pMerger, i);
         1948  +  }
         1949  +  return pTask->pUnpacked->errCode;
         1950  +}
         1951  +
         1952  +/*
         1953  +** Initialize the IncrMerge field of a PmaReader.
         1954  +**
         1955  +** If the PmaReader passed as the first argument is not an incremental-reader
         1956  +** (if pReadr->pIncr==0), then this function is a no-op. Otherwise, it serves
         1957  +** to open and/or initialize the temp file related fields of the IncrMerge
         1958  +** object at (pReadr->pIncr).
         1959  +**
         1960  +** If argument eMode is set to INCRINIT_NORMAL, then all PmaReaders
         1961  +** in the sub-tree headed by pReadr are also initialized. Data is then loaded
         1962  +** into the buffers belonging to pReadr and it is set to
         1963  +** point to the first key in its range.
         1964  +**
         1965  +** If argument eMode is set to INCRINIT_TASK, then pReadr is guaranteed
         1966  +** to be a multi-threaded PmaReader and this function is being called in a
         1967  +** background thread. In this case all PmaReaders in the sub-tree are 
         1968  +** initialized as for INCRINIT_NORMAL and the aFile[1] buffer belonging to
         1969  +** pReadr is populated. However, pReadr itself is not set up to point
         1970  +** to its first key. A call to vdbePmaReaderNext() is still required to do
         1971  +** that. 
         1972  +**
         1973  +** The reason this function does not call vdbePmaReaderNext() immediately 
         1974  +** in the INCRINIT_TASK case is that vdbePmaReaderNext() assumes that it has
         1975  +** to block on thread (pTask->thread) before accessing aFile[1]. But, since
         1976  +** this entire function is being run by thread (pTask->thread), that will
         1977  +** lead to the current background thread attempting to join itself.
         1978  +**
         1979  +** Finally, if argument eMode is set to INCRINIT_ROOT, it may be assumed
         1980  +** that pReadr->pIncr is a multi-threaded IncrMerge objects, and that all
         1981  +** child-trees have already been initialized using IncrInit(INCRINIT_TASK).
         1982  +** In this case vdbePmaReaderNext() is called on all child PmaReaders and
         1983  +** the current PmaReader set to point to the first key in its range.
         1984  +**
         1985  +** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
         1986  +*/
         1987  +static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode){
         1988  +  int rc = SQLITE_OK;
         1989  +  IncrMerger *pIncr = pReadr->pIncr;
         1990  +
         1991  +  /* eMode is always INCRINIT_NORMAL in single-threaded mode */
         1992  +  assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL );
         1993  +
         1994  +  if( pIncr ){
         1995  +    SortSubtask *pTask = pIncr->pTask;
         1996  +    sqlite3 *db = pTask->pSorter->db;
         1997  +
         1998  +    rc = vdbeMergeEngineInit(pTask, pIncr->pMerger, eMode);
         1999  +
         2000  +    /* Set up the required files for pIncr. A multi-theaded IncrMerge object
         2001  +    ** requires two temp files to itself, whereas a single-threaded object
         2002  +    ** only requires a region of pTask->file2. */
         2003  +    if( rc==SQLITE_OK ){
         2004  +      int mxSz = pIncr->mxSz;
         2005  +#if SQLITE_MAX_WORKER_THREADS>0
         2006  +      if( pIncr->bUseThread ){
         2007  +        rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[0].pFd);
         2008  +        if( rc==SQLITE_OK ){
         2009  +          rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[1].pFd);
         2010  +        }
         2011  +      }else
         2012  +#endif
         2013  +      /*if( !pIncr->bUseThread )*/{
         2014  +        if( pTask->file2.pFd==0 ){
         2015  +          assert( pTask->file2.iEof>0 );
         2016  +          rc = vdbeSorterOpenTempFile(db, pTask->file2.iEof, &pTask->file2.pFd);
         2017  +          pTask->file2.iEof = 0;
         2018  +        }
         2019  +        if( rc==SQLITE_OK ){
         2020  +          pIncr->aFile[1].pFd = pTask->file2.pFd;
         2021  +          pIncr->iStartOff = pTask->file2.iEof;
         2022  +          pTask->file2.iEof += mxSz;
         2023  +        }
         2024  +      }
         2025  +    }
         2026  +
         2027  +#if SQLITE_MAX_WORKER_THREADS>0
         2028  +    if( rc==SQLITE_OK && pIncr->bUseThread ){
         2029  +      /* Use the current thread to populate aFile[1], even though this
         2030  +      ** PmaReader is multi-threaded. The reason being that this function
         2031  +      ** is already running in background thread pIncr->pTask->thread. */
         2032  +      assert( eMode==INCRINIT_ROOT || eMode==INCRINIT_TASK );
         2033  +      rc = vdbeIncrPopulate(pIncr);
         2034  +    }
         2035  +#endif
         2036  +
         2037  +    if( rc==SQLITE_OK
         2038  +     && (SQLITE_MAX_WORKER_THREADS==0 || eMode!=INCRINIT_TASK)
         2039  +    ){
         2040  +      rc = vdbePmaReaderNext(pReadr);
         2041  +    }
         2042  +  }
         2043  +  return rc;
         2044  +}
         2045  +
         2046  +#if SQLITE_MAX_WORKER_THREADS>0
         2047  +/*
         2048  +** The main routine for vdbePmaReaderIncrMergeInit() operations run in 
         2049  +** background threads.
         2050  +*/
         2051  +static void *vdbePmaReaderBgInit(void *pCtx){
         2052  +  PmaReader *pReader = (PmaReader*)pCtx;
         2053  +  void *pRet = SQLITE_INT_TO_PTR(
         2054  +                  vdbePmaReaderIncrMergeInit(pReader,INCRINIT_TASK)
         2055  +               );
         2056  +  pReader->pIncr->pTask->bDone = 1;
         2057  +  return pRet;
         2058  +}
         2059  +
         2060  +/*
         2061  +** Use a background thread to invoke vdbePmaReaderIncrMergeInit(INCRINIT_TASK) 
         2062  +** on the the PmaReader object passed as the first argument.
         2063  +**
         2064  +** This call will initialize the various fields of the pReadr->pIncr 
         2065  +** structure and, if it is a multi-threaded IncrMerger, launch a 
         2066  +** background thread to populate aFile[1].
         2067  +*/
         2068  +static int vdbePmaReaderBgIncrInit(PmaReader *pReadr){
         2069  +  void *pCtx = (void*)pReadr;
         2070  +  return vdbeSorterCreateThread(pReadr->pIncr->pTask, vdbePmaReaderBgInit, pCtx);
         2071  +}
         2072  +#endif
         2073  +
         2074  +/*
         2075  +** Allocate a new MergeEngine object to merge the contents of nPMA level-0
         2076  +** PMAs from pTask->file. If no error occurs, set *ppOut to point to
         2077  +** the new object and return SQLITE_OK. Or, if an error does occur, set *ppOut
         2078  +** to NULL and return an SQLite error code.
         2079  +**
         2080  +** When this function is called, *piOffset is set to the offset of the
         2081  +** first PMA to read from pTask->file. Assuming no error occurs, it is 
         2082  +** set to the offset immediately following the last byte of the last
         2083  +** PMA before returning. If an error does occur, then the final value of
         2084  +** *piOffset is undefined.
         2085  +*/
         2086  +static int vdbeMergeEngineLevel0(
         2087  +  SortSubtask *pTask,             /* Sorter task to read from */
         2088  +  int nPMA,                       /* Number of PMAs to read */
         2089  +  i64 *piOffset,                  /* IN/OUT: Readr offset in pTask->file */
         2090  +  MergeEngine **ppOut             /* OUT: New merge-engine */
         2091  +){
         2092  +  MergeEngine *pNew;              /* Merge engine to return */
         2093  +  i64 iOff = *piOffset;
         2094  +  int i;
         2095  +  int rc = SQLITE_OK;
         2096  +
         2097  +  *ppOut = pNew = vdbeMergeEngineNew(nPMA);
         2098  +  if( pNew==0 ) rc = SQLITE_NOMEM;
         2099  +
         2100  +  for(i=0; i<nPMA && rc==SQLITE_OK; i++){
         2101  +    i64 nDummy;
         2102  +    PmaReader *pReadr = &pNew->aReadr[i];
         2103  +    rc = vdbePmaReaderInit(pTask, &pTask->file, iOff, pReadr, &nDummy);
         2104  +    iOff = pReadr->iEof;
         2105  +  }
         2106  +
         2107  +  if( rc!=SQLITE_OK ){
         2108  +    vdbeMergeEngineFree(pNew);
         2109  +    *ppOut = 0;
         2110  +  }
         2111  +  *piOffset = iOff;
         2112  +  return rc;
         2113  +}
         2114  +
         2115  +/*
         2116  +** Return the depth of a tree comprising nPMA PMAs, assuming a fanout of
         2117  +** SORTER_MAX_MERGE_COUNT. The returned value does not include leaf nodes.
         2118  +**
         2119  +** i.e.
         2120  +**
         2121  +**   nPMA<=16    -> TreeDepth() == 0
         2122  +**   nPMA<=256   -> TreeDepth() == 1
         2123  +**   nPMA<=65536 -> TreeDepth() == 2
         2124  +*/
         2125  +static int vdbeSorterTreeDepth(int nPMA){
         2126  +  int nDepth = 0;
         2127  +  i64 nDiv = SORTER_MAX_MERGE_COUNT;
         2128  +  while( nDiv < (i64)nPMA ){
         2129  +    nDiv = nDiv * SORTER_MAX_MERGE_COUNT;
         2130  +    nDepth++;
         2131  +  }
         2132  +  return nDepth;
         2133  +}
         2134  +
         2135  +/*
         2136  +** pRoot is the root of an incremental merge-tree with depth nDepth (according
         2137  +** to vdbeSorterTreeDepth()). pLeaf is the iSeq'th leaf to be added to the
         2138  +** tree, counting from zero. This function adds pLeaf to the tree.
         2139  +**
         2140  +** If successful, SQLITE_OK is returned. If an error occurs, an SQLite error
         2141  +** code is returned and pLeaf is freed.
         2142  +*/
         2143  +static int vdbeSorterAddToTree(
         2144  +  SortSubtask *pTask,             /* Task context */
         2145  +  int nDepth,                     /* Depth of tree according to TreeDepth() */
         2146  +  int iSeq,                       /* Sequence number of leaf within tree */
         2147  +  MergeEngine *pRoot,             /* Root of tree */
         2148  +  MergeEngine *pLeaf              /* Leaf to add to tree */
         2149  +){
         2150  +  int rc = SQLITE_OK;
         2151  +  int nDiv = 1;
         2152  +  int i;
         2153  +  MergeEngine *p = pRoot;
         2154  +  IncrMerger *pIncr;
         2155  +
         2156  +  rc = vdbeIncrMergerNew(pTask, pLeaf, &pIncr);
         2157  +
         2158  +  for(i=1; i<nDepth; i++){
         2159  +    nDiv = nDiv * SORTER_MAX_MERGE_COUNT;
         2160  +  }
         2161  +
         2162  +  for(i=1; i<nDepth && rc==SQLITE_OK; i++){
         2163  +    int iIter = (iSeq / nDiv) % SORTER_MAX_MERGE_COUNT;
         2164  +    PmaReader *pReadr = &p->aReadr[iIter];
         2165  +
         2166  +    if( pReadr->pIncr==0 ){
         2167  +      MergeEngine *pNew = vdbeMergeEngineNew(SORTER_MAX_MERGE_COUNT);
         2168  +      if( pNew==0 ){
         2169  +        rc = SQLITE_NOMEM;
         2170  +      }else{
         2171  +        rc = vdbeIncrMergerNew(pTask, pNew, &pReadr->pIncr);
         2172  +      }
         2173  +    }
         2174  +    if( rc==SQLITE_OK ){
         2175  +      p = pReadr->pIncr->pMerger;
         2176  +      nDiv = nDiv / SORTER_MAX_MERGE_COUNT;
         2177  +    }
         2178  +  }
         2179  +
         2180  +  if( rc==SQLITE_OK ){
         2181  +    p->aReadr[iSeq % SORTER_MAX_MERGE_COUNT].pIncr = pIncr;
         2182  +  }else{
         2183  +    vdbeIncrFree(pIncr);
         2184  +  }
         2185  +  return rc;
         2186  +}
         2187  +
         2188  +/*
         2189  +** This function is called as part of a SorterRewind() operation on a sorter
         2190  +** that has already written two or more level-0 PMAs to one or more temp
         2191  +** files. It builds a tree of MergeEngine/IncrMerger/PmaReader objects that 
         2192  +** can be used to incrementally merge all PMAs on disk.
         2193  +**
         2194  +** If successful, SQLITE_OK is returned and *ppOut set to point to the
         2195  +** MergeEngine object at the root of the tree before returning. Or, if an
         2196  +** error occurs, an SQLite error code is returned and the final value 
         2197  +** of *ppOut is undefined.
         2198  +*/
         2199  +static int vdbeSorterMergeTreeBuild(
         2200  +  VdbeSorter *pSorter,       /* The VDBE cursor that implements the sort */
         2201  +  MergeEngine **ppOut        /* Write the MergeEngine here */
         2202  +){
         2203  +  MergeEngine *pMain = 0;
         2204  +  int rc = SQLITE_OK;
         2205  +  int iTask;
         2206  +
         2207  +#if SQLITE_MAX_WORKER_THREADS>0
         2208  +  /* If the sorter uses more than one task, then create the top-level 
         2209  +  ** MergeEngine here. This MergeEngine will read data from exactly 
         2210  +  ** one PmaReader per sub-task.  */
         2211  +  assert( pSorter->bUseThreads || pSorter->nTask==1 );
         2212  +  if( pSorter->nTask>1 ){
         2213  +    pMain = vdbeMergeEngineNew(pSorter->nTask);
         2214  +    if( pMain==0 ) rc = SQLITE_NOMEM;
         2215  +  }
         2216  +#endif
         2217  +
         2218  +  for(iTask=0; rc==SQLITE_OK && iTask<pSorter->nTask; iTask++){
         2219  +    SortSubtask *pTask = &pSorter->aTask[iTask];
         2220  +    assert( pTask->nPMA>0 || SQLITE_MAX_WORKER_THREADS>0 );
         2221  +    if( SQLITE_MAX_WORKER_THREADS==0 || pTask->nPMA ){
         2222  +      MergeEngine *pRoot = 0;     /* Root node of tree for this task */
         2223  +      int nDepth = vdbeSorterTreeDepth(pTask->nPMA);
         2224  +      i64 iReadOff = 0;
         2225  +
         2226  +      if( pTask->nPMA<=SORTER_MAX_MERGE_COUNT ){
         2227  +        rc = vdbeMergeEngineLevel0(pTask, pTask->nPMA, &iReadOff, &pRoot);
         2228  +      }else{
         2229  +        int i;
         2230  +        int iSeq = 0;
         2231  +        pRoot = vdbeMergeEngineNew(SORTER_MAX_MERGE_COUNT);
         2232  +        if( pRoot==0 ) rc = SQLITE_NOMEM;
         2233  +        for(i=0; i<pTask->nPMA && rc==SQLITE_OK; i += SORTER_MAX_MERGE_COUNT){
         2234  +          MergeEngine *pMerger = 0; /* New level-0 PMA merger */
         2235  +          int nReader;              /* Number of level-0 PMAs to merge */
         2236  +
         2237  +          nReader = MIN(pTask->nPMA - i, SORTER_MAX_MERGE_COUNT);
         2238  +          rc = vdbeMergeEngineLevel0(pTask, nReader, &iReadOff, &pMerger);
         2239  +          if( rc==SQLITE_OK ){
         2240  +            rc = vdbeSorterAddToTree(pTask, nDepth, iSeq++, pRoot, pMerger);
         2241  +          }
         2242  +        }
         2243  +      }
         2244  +
         2245  +      if( rc==SQLITE_OK ){
         2246  +#if SQLITE_MAX_WORKER_THREADS>0
         2247  +        if( pMain!=0 ){
         2248  +          rc = vdbeIncrMergerNew(pTask, pRoot, &pMain->aReadr[iTask].pIncr);
         2249  +        }else
   815   2250   #endif
   816         -    rc = vdbeSorterListToPMA(db, pCsr);
   817         -    pSorter->nInMemory = 0;
   818         -    assert( rc!=SQLITE_OK || (nExpect==pSorter->iWriteOff) );
         2251  +        {
         2252  +          assert( pMain==0 );
         2253  +          pMain = pRoot;
         2254  +        }
         2255  +      }else{
         2256  +        vdbeMergeEngineFree(pRoot);
         2257  +      }
         2258  +    }
         2259  +  }
         2260  +
         2261  +  if( rc!=SQLITE_OK ){
         2262  +    vdbeMergeEngineFree(pMain);
         2263  +    pMain = 0;
   819   2264     }
   820         -
         2265  +  *ppOut = pMain;
   821   2266     return rc;
   822   2267   }
   823   2268   
   824   2269   /*
   825         -** Helper function for sqlite3VdbeSorterRewind(). 
         2270  +** This function is called as part of an sqlite3VdbeSorterRewind() operation
         2271  +** on a sorter that has written two or more PMAs to temporary files. It sets
         2272  +** up either VdbeSorter.pMerger (for single threaded sorters) or pReader
         2273  +** (for multi-threaded sorters) so that it can be used to iterate through
         2274  +** all records stored in the sorter.
         2275  +**
         2276  +** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
   826   2277   */
   827         -static int vdbeSorterInitMerge(
   828         -  sqlite3 *db,                    /* Database handle */
   829         -  const VdbeCursor *pCsr,         /* Cursor handle for this sorter */
   830         -  i64 *pnByte                     /* Sum of bytes in all opened PMAs */
   831         -){
   832         -  VdbeSorter *pSorter = pCsr->pSorter;
   833         -  int rc = SQLITE_OK;             /* Return code */
   834         -  int i;                          /* Used to iterator through aIter[] */
   835         -  i64 nByte = 0;                  /* Total bytes in all opened PMAs */
   836         -
   837         -  /* Initialize the iterators. */
   838         -  for(i=0; i<SORTER_MAX_MERGE_COUNT; i++){
   839         -    VdbeSorterIter *pIter = &pSorter->aIter[i];
   840         -    rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte);
   841         -    pSorter->iReadOff = pIter->iEof;
   842         -    assert( rc!=SQLITE_OK || pSorter->iReadOff<=pSorter->iWriteOff );
   843         -    if( rc!=SQLITE_OK || pSorter->iReadOff>=pSorter->iWriteOff ) break;
         2278  +static int vdbeSorterSetupMerge(VdbeSorter *pSorter){
         2279  +  int rc;                         /* Return code */
         2280  +  SortSubtask *pTask0 = &pSorter->aTask[0];
         2281  +  MergeEngine *pMain = 0;
         2282  +#if SQLITE_MAX_WORKER_THREADS
         2283  +  sqlite3 *db = pTask0->pSorter->db;
         2284  +#endif
         2285  +
         2286  +  rc = vdbeSorterMergeTreeBuild(pSorter, &pMain);
         2287  +  if( rc==SQLITE_OK ){
         2288  +#if SQLITE_MAX_WORKER_THREADS
         2289  +    assert( pSorter->bUseThreads==0 || pSorter->nTask>1 );
         2290  +    if( pSorter->bUseThreads ){
         2291  +      int iTask;
         2292  +      PmaReader *pReadr;
         2293  +      SortSubtask *pLast = &pSorter->aTask[pSorter->nTask-1];
         2294  +      rc = vdbeSortAllocUnpacked(pLast);
         2295  +      if( rc==SQLITE_OK ){
         2296  +        pReadr = (PmaReader*)sqlite3DbMallocZero(db, sizeof(PmaReader));
         2297  +        pSorter->pReader = pReadr;
         2298  +        if( pReadr==0 ) rc = SQLITE_NOMEM;
         2299  +      }
         2300  +      if( rc==SQLITE_OK ){
         2301  +        rc = vdbeIncrMergerNew(pLast, pMain, &pReadr->pIncr);
         2302  +        if( rc==SQLITE_OK ){
         2303  +          vdbeIncrMergerSetThreads(pReadr->pIncr);
         2304  +          for(iTask=0; iTask<(pSorter->nTask-1); iTask++){
         2305  +            IncrMerger *pIncr;
         2306  +            if( (pIncr = pMain->aReadr[iTask].pIncr) ){
         2307  +              vdbeIncrMergerSetThreads(pIncr);
         2308  +              assert( pIncr->pTask!=pLast );
         2309  +            }
         2310  +          }
         2311  +          for(iTask=0; rc==SQLITE_OK && iTask<pSorter->nTask; iTask++){
         2312  +            PmaReader *p = &pMain->aReadr[iTask];
         2313  +            assert( p->pIncr==0 || p->pIncr->pTask==&pSorter->aTask[iTask] );
         2314  +            if( p->pIncr ){ 
         2315  +              if( iTask==pSorter->nTask-1 ){
         2316  +                rc = vdbePmaReaderIncrMergeInit(p, INCRINIT_TASK);
         2317  +              }else{
         2318  +                rc = vdbePmaReaderBgIncrInit(p);
         2319  +              }
         2320  +            }
         2321  +          }
         2322  +        }
         2323  +        pMain = 0;
         2324  +      }
         2325  +      if( rc==SQLITE_OK ){
         2326  +        rc = vdbePmaReaderIncrMergeInit(pReadr, INCRINIT_ROOT);
         2327  +      }
         2328  +    }else
         2329  +#endif
         2330  +    {
         2331  +      rc = vdbeMergeEngineInit(pTask0, pMain, INCRINIT_NORMAL);
         2332  +      pSorter->pMerger = pMain;
         2333  +      pMain = 0;
         2334  +    }
   844   2335     }
   845   2336   
   846         -  /* Initialize the aTree[] array. */
   847         -  for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
   848         -    rc = vdbeSorterDoCompare(pCsr, i);
         2337  +  if( rc!=SQLITE_OK ){
         2338  +    vdbeMergeEngineFree(pMain);
   849   2339     }
   850         -
   851         -  *pnByte = nByte;
   852   2340     return rc;
   853   2341   }
         2342  +
   854   2343   
   855   2344   /*
   856         -** Once the sorter has been populated, this function is called to prepare
   857         -** for iterating through its contents in sorted order.
         2345  +** Once the sorter has been populated by calls to sqlite3VdbeSorterWrite,
         2346  +** this function is called to prepare for iterating through the records
         2347  +** in sorted order.
   858   2348   */
   859         -int sqlite3VdbeSorterRewind(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
         2349  +int sqlite3VdbeSorterRewind(const VdbeCursor *pCsr, int *pbEof){
   860   2350     VdbeSorter *pSorter = pCsr->pSorter;
   861         -  int rc;                         /* Return code */
   862         -  sqlite3_file *pTemp2 = 0;       /* Second temp file to use */
   863         -  i64 iWrite2 = 0;                /* Write offset for pTemp2 */
   864         -  int nIter;                      /* Number of iterators used */
   865         -  int nByte;                      /* Bytes of space required for aIter/aTree */
   866         -  int N = 2;                      /* Power of 2 >= nIter */
         2351  +  int rc = SQLITE_OK;             /* Return code */
   867   2352   
   868   2353     assert( pSorter );
   869   2354   
   870   2355     /* If no data has been written to disk, then do not do so now. Instead,
   871   2356     ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
   872   2357     ** from the in-memory list.  */
   873         -  if( pSorter->nPMA==0 ){
   874         -    *pbEof = !pSorter->pRecord;
   875         -    assert( pSorter->aTree==0 );
   876         -    return vdbeSorterSort(pCsr);
         2358  +  if( pSorter->bUsePMA==0 ){
         2359  +    if( pSorter->list.pList ){
         2360  +      *pbEof = 0;
         2361  +      rc = vdbeSorterSort(&pSorter->aTask[0], &pSorter->list);
         2362  +    }else{
         2363  +      *pbEof = 1;
         2364  +    }
         2365  +    return rc;
         2366  +  }
         2367  +
         2368  +  /* Write the current in-memory list to a PMA. When the VdbeSorterWrite() 
         2369  +  ** function flushes the contents of memory to disk, it immediately always
         2370  +  ** creates a new list consisting of a single key immediately afterwards.
         2371  +  ** So the list is never empty at this point.  */
         2372  +  assert( pSorter->list.pList );
         2373  +  rc = vdbeSorterFlushPMA(pSorter);
         2374  +
         2375  +  /* Join all threads */
         2376  +  rc = vdbeSorterJoinAll(pSorter, rc);
         2377  +
         2378  +  vdbeSorterRewindDebug("rewind");
         2379  +
         2380  +  /* Assuming no errors have occurred, set up a merger structure to 
         2381  +  ** incrementally read and merge all remaining PMAs.  */
         2382  +  assert( pSorter->pReader==0 );
         2383  +  if( rc==SQLITE_OK ){
         2384  +    rc = vdbeSorterSetupMerge(pSorter);
         2385  +    *pbEof = 0;
   877   2386     }
   878   2387   
   879         -  /* Write the current in-memory list to a PMA. */
   880         -  rc = vdbeSorterListToPMA(db, pCsr);
   881         -  if( rc!=SQLITE_OK ) return rc;
   882         -
   883         -  /* Allocate space for aIter[] and aTree[]. */
   884         -  nIter = pSorter->nPMA;
   885         -  if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
   886         -  assert( nIter>0 );
   887         -  while( N<nIter ) N += N;
   888         -  nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
   889         -  pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
   890         -  if( !pSorter->aIter ) return SQLITE_NOMEM;
   891         -  pSorter->aTree = (int *)&pSorter->aIter[N];
   892         -  pSorter->nTree = N;
   893         -
   894         -  do {
   895         -    int iNew;                     /* Index of new, merged, PMA */
   896         -
   897         -    for(iNew=0; 
   898         -        rc==SQLITE_OK && iNew*SORTER_MAX_MERGE_COUNT<pSorter->nPMA; 
   899         -        iNew++
   900         -    ){
   901         -      int rc2;                    /* Return code from fileWriterFinish() */
   902         -      FileWriter writer;          /* Object used to write to disk */
   903         -      i64 nWrite;                 /* Number of bytes in new PMA */
   904         -
   905         -      memset(&writer, 0, sizeof(FileWriter));
   906         -
   907         -      /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1,
   908         -      ** initialize an iterator for each of them and break out of the loop.
   909         -      ** These iterators will be incrementally merged as the VDBE layer calls
   910         -      ** sqlite3VdbeSorterNext().
   911         -      **
   912         -      ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
   913         -      ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
   914         -      ** are merged into a single PMA that is written to file pTemp2.
   915         -      */
   916         -      rc = vdbeSorterInitMerge(db, pCsr, &nWrite);
   917         -      assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
   918         -      if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
   919         -        break;
   920         -      }
   921         -
   922         -      /* Open the second temp file, if it is not already open. */
   923         -      if( pTemp2==0 ){
   924         -        assert( iWrite2==0 );
   925         -        rc = vdbeSorterOpenTempFile(db, &pTemp2);
   926         -      }
   927         -
   928         -      if( rc==SQLITE_OK ){
   929         -        int bEof = 0;
   930         -        fileWriterInit(db, pTemp2, &writer, iWrite2);
   931         -        fileWriterWriteVarint(&writer, nWrite);
   932         -        while( rc==SQLITE_OK && bEof==0 ){
   933         -          VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
   934         -          assert( pIter->pFile );
   935         -
   936         -          fileWriterWriteVarint(&writer, pIter->nKey);
   937         -          fileWriterWrite(&writer, pIter->aKey, pIter->nKey);
   938         -          rc = sqlite3VdbeSorterNext(db, pCsr, &bEof);
   939         -        }
   940         -        rc2 = fileWriterFinish(db, &writer, &iWrite2);
   941         -        if( rc==SQLITE_OK ) rc = rc2;
   942         -      }
   943         -    }
   944         -
   945         -    if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
   946         -      break;
   947         -    }else{
   948         -      sqlite3_file *pTmp = pSorter->pTemp1;
   949         -      pSorter->nPMA = iNew;
   950         -      pSorter->pTemp1 = pTemp2;
   951         -      pTemp2 = pTmp;
   952         -      pSorter->iWriteOff = iWrite2;
   953         -      pSorter->iReadOff = 0;
   954         -      iWrite2 = 0;
   955         -    }
   956         -  }while( rc==SQLITE_OK );
   957         -
   958         -  if( pTemp2 ){
   959         -    sqlite3OsCloseFree(pTemp2);
   960         -  }
   961         -  *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
         2388  +  vdbeSorterRewindDebug("rewinddone");
   962   2389     return rc;
   963   2390   }
   964   2391   
   965   2392   /*
   966   2393   ** Advance to the next element in the sorter.
   967   2394   */
   968   2395   int sqlite3VdbeSorterNext(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
   969   2396     VdbeSorter *pSorter = pCsr->pSorter;
   970   2397     int rc;                         /* Return code */
   971   2398   
   972         -  if( pSorter->aTree ){
   973         -    int iPrev = pSorter->aTree[1];/* Index of iterator to advance */
   974         -    rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]);
   975         -    if( rc==SQLITE_OK ){
   976         -      int i;                      /* Index of aTree[] to recalculate */
   977         -      VdbeSorterIter *pIter1;     /* First iterator to compare */
   978         -      VdbeSorterIter *pIter2;     /* Second iterator to compare */
   979         -      u8 *pKey2;                  /* To pIter2->aKey, or 0 if record cached */
   980         -
   981         -      /* Find the first two iterators to compare. The one that was just
   982         -      ** advanced (iPrev) and the one next to it in the array.  */
   983         -      pIter1 = &pSorter->aIter[(iPrev & 0xFFFE)];
   984         -      pIter2 = &pSorter->aIter[(iPrev | 0x0001)];
   985         -      pKey2 = pIter2->aKey;
   986         -
   987         -      for(i=(pSorter->nTree+iPrev)/2; i>0; i=i/2){
   988         -        /* Compare pIter1 and pIter2. Store the result in variable iRes. */
   989         -        int iRes;
   990         -        if( pIter1->pFile==0 ){
   991         -          iRes = +1;
   992         -        }else if( pIter2->pFile==0 ){
   993         -          iRes = -1;
   994         -        }else{
   995         -          vdbeSorterCompare(pCsr, 0, 
   996         -              pIter1->aKey, pIter1->nKey, pKey2, pIter2->nKey, &iRes
   997         -          );
   998         -        }
   999         -
  1000         -        /* If pIter1 contained the smaller value, set aTree[i] to its index.
  1001         -        ** Then set pIter2 to the next iterator to compare to pIter1. In this
  1002         -        ** case there is no cache of pIter2 in pSorter->pUnpacked, so set
  1003         -        ** pKey2 to point to the record belonging to pIter2.
  1004         -        **
  1005         -        ** Alternatively, if pIter2 contains the smaller of the two values,
  1006         -        ** set aTree[i] to its index and update pIter1. If vdbeSorterCompare()
  1007         -        ** was actually called above, then pSorter->pUnpacked now contains
  1008         -        ** a value equivalent to pIter2. So set pKey2 to NULL to prevent
  1009         -        ** vdbeSorterCompare() from decoding pIter2 again.  */
  1010         -        if( iRes<=0 ){
  1011         -          pSorter->aTree[i] = (int)(pIter1 - pSorter->aIter);
  1012         -          pIter2 = &pSorter->aIter[ pSorter->aTree[i ^ 0x0001] ];
  1013         -          pKey2 = pIter2->aKey;
  1014         -        }else{
  1015         -          if( pIter1->pFile ) pKey2 = 0;
  1016         -          pSorter->aTree[i] = (int)(pIter2 - pSorter->aIter);
  1017         -          pIter1 = &pSorter->aIter[ pSorter->aTree[i ^ 0x0001] ];
  1018         -        }
  1019         -
  1020         -      }
  1021         -      *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
         2399  +  assert( pSorter->bUsePMA || (pSorter->pReader==0 && pSorter->pMerger==0) );
         2400  +  if( pSorter->bUsePMA ){
         2401  +    assert( pSorter->pReader==0 || pSorter->pMerger==0 );
         2402  +    assert( pSorter->bUseThreads==0 || pSorter->pReader );
         2403  +    assert( pSorter->bUseThreads==1 || pSorter->pMerger );
         2404  +#if SQLITE_MAX_WORKER_THREADS>0
         2405  +    if( pSorter->bUseThreads ){
         2406  +      rc = vdbePmaReaderNext(pSorter->pReader);
         2407  +      *pbEof = (pSorter->pReader->pFd==0);
         2408  +    }else
         2409  +#endif
         2410  +    /*if( !pSorter->bUseThreads )*/ {
         2411  +      assert( pSorter->pMerger->pTask==(&pSorter->aTask[0]) );
         2412  +      rc = vdbeMergeEngineStep(pSorter->pMerger, pbEof);
  1022   2413       }
  1023   2414     }else{
  1024         -    SorterRecord *pFree = pSorter->pRecord;
  1025         -    pSorter->pRecord = pFree->pNext;
  1026         -    pFree->pNext = 0;
  1027         -    vdbeSorterRecordFree(db, pFree);
  1028         -    *pbEof = !pSorter->pRecord;
         2415  +    SorterRecord *pFree = pSorter->list.pList;
         2416  +    pSorter->list.pList = pFree->u.pNext;
         2417  +    pFree->u.pNext = 0;
         2418  +    if( pSorter->list.aMemory==0 ) vdbeSorterRecordFree(db, pFree);
         2419  +    *pbEof = !pSorter->list.pList;
  1029   2420       rc = SQLITE_OK;
  1030   2421     }
  1031   2422     return rc;
  1032   2423   }
  1033   2424   
  1034   2425   /*
  1035   2426   ** Return a pointer to a buffer owned by the sorter that contains the 
................................................................................
  1036   2427   ** current key.
  1037   2428   */
  1038   2429   static void *vdbeSorterRowkey(
  1039   2430     const VdbeSorter *pSorter,      /* Sorter object */
  1040   2431     int *pnKey                      /* OUT: Size of current key in bytes */
  1041   2432   ){
  1042   2433     void *pKey;
  1043         -  if( pSorter->aTree ){
  1044         -    VdbeSorterIter *pIter;
  1045         -    pIter = &pSorter->aIter[ pSorter->aTree[1] ];
  1046         -    *pnKey = pIter->nKey;
  1047         -    pKey = pIter->aKey;
         2434  +  if( pSorter->bUsePMA ){
         2435  +    PmaReader *pReader;
         2436  +#if SQLITE_MAX_WORKER_THREADS>0
         2437  +    if( pSorter->bUseThreads ){
         2438  +      pReader = pSorter->pReader;
         2439  +    }else
         2440  +#endif
         2441  +    /*if( !pSorter->bUseThreads )*/{
         2442  +      pReader = &pSorter->pMerger->aReadr[pSorter->pMerger->aTree[1]];
         2443  +    }
         2444  +    *pnKey = pReader->nKey;
         2445  +    pKey = pReader->aKey;
  1048   2446     }else{
  1049         -    *pnKey = pSorter->pRecord->nVal;
  1050         -    pKey = pSorter->pRecord->pVal;
         2447  +    *pnKey = pSorter->list.pList->nVal;
         2448  +    pKey = SRVAL(pSorter->list.pList);
  1051   2449     }
  1052   2450     return pKey;
  1053   2451   }
  1054   2452   
  1055   2453   /*
  1056   2454   ** Copy the current sorter key into the memory cell pOut.
  1057   2455   */
................................................................................
  1070   2468     return SQLITE_OK;
  1071   2469   }
  1072   2470   
  1073   2471   /*
  1074   2472   ** Compare the key in memory cell pVal with the key that the sorter cursor
  1075   2473   ** passed as the first argument currently points to. For the purposes of
  1076   2474   ** the comparison, ignore the rowid field at the end of each record.
         2475  +**
         2476  +** If the sorter cursor key contains any NULL values, consider it to be
         2477  +** less than pVal. Even if pVal also contains NULL values.
  1077   2478   **
  1078   2479   ** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
  1079   2480   ** Otherwise, set *pRes to a negative, zero or positive value if the
  1080   2481   ** key in pVal is smaller than, equal to or larger than the current sorter
  1081   2482   ** key.
         2483  +**
         2484  +** This routine forms the core of the OP_SorterCompare opcode, which in
         2485  +** turn is used to verify uniqueness when constructing a UNIQUE INDEX.
  1082   2486   */
  1083   2487   int sqlite3VdbeSorterCompare(
  1084   2488     const VdbeCursor *pCsr,         /* Sorter cursor */
  1085   2489     Mem *pVal,                      /* Value to compare to current sorter key */
  1086         -  int nKeyCol,                    /* Only compare this many fields */
         2490  +  int nKeyCol,                    /* Compare this many columns */
  1087   2491     int *pRes                       /* OUT: Result of comparison */
  1088   2492   ){
  1089   2493     VdbeSorter *pSorter = pCsr->pSorter;
         2494  +  UnpackedRecord *r2 = pSorter->pUnpacked;
         2495  +  KeyInfo *pKeyInfo = pCsr->pKeyInfo;
         2496  +  int i;
  1090   2497     void *pKey; int nKey;           /* Sorter key to compare pVal with */
  1091   2498   
         2499  +  if( r2==0 ){
         2500  +    char *p;
         2501  +    r2 = pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pKeyInfo,0,0,&p);
         2502  +    assert( pSorter->pUnpacked==(UnpackedRecord*)p );
         2503  +    if( r2==0 ) return SQLITE_NOMEM;
         2504  +    r2->nField = nKeyCol;
         2505  +  }
         2506  +  assert( r2->nField==nKeyCol );
         2507  +
  1092   2508     pKey = vdbeSorterRowkey(pSorter, &nKey);
  1093         -  vdbeSorterCompare(pCsr, nKeyCol, pVal->z, pVal->n, pKey, nKey, pRes);
         2509  +  sqlite3VdbeRecordUnpack(pKeyInfo, nKey, pKey, r2);
         2510  +  for(i=0; i<nKeyCol; i++){
         2511  +    if( r2->aMem[i].flags & MEM_Null ){
         2512  +      *pRes = -1;
         2513  +      return SQLITE_OK;
         2514  +    }
         2515  +  }
         2516  +
         2517  +  *pRes = sqlite3VdbeRecordCompare(pVal->n, pVal->z, r2, 0);
  1094   2518     return SQLITE_OK;
  1095   2519   }

Changes to src/where.c.

  2187   2187         ** less than the upper bound of the range query. Where the upper bound
  2188   2188         ** is either ($P) or ($P:$U). Again, even if $U is available, both values
  2189   2189         ** of iUpper are requested of whereKeyStats() and the smaller used.
  2190   2190         */
  2191   2191         tRowcnt iLower;
  2192   2192         tRowcnt iUpper;
  2193   2193   
         2194  +      if( pRec ){
         2195  +        testcase( pRec->nField!=pBuilder->nRecValid );
         2196  +        pRec->nField = pBuilder->nRecValid;
         2197  +      }
  2194   2198         if( nEq==p->nKeyCol ){
  2195   2199           aff = SQLITE_AFF_INTEGER;
  2196   2200         }else{
  2197   2201           aff = p->pTable->aCol[p->aiColumn[nEq]].affinity;
  2198   2202         }
  2199   2203         /* Determine iLower and iUpper using ($P) only. */
  2200   2204         if( nEq==0 ){
................................................................................
  2246   2250             nNew = sqlite3LogEst(iUpper - iLower);
  2247   2251           }else{
  2248   2252             nNew = 10;        assert( 10==sqlite3LogEst(2) );
  2249   2253           }
  2250   2254           if( nNew<nOut ){
  2251   2255             nOut = nNew;
  2252   2256           }
  2253         -        WHERETRACE(0x10, ("range scan regions: %u..%u  est=%d\n",
         2257  +        WHERETRACE(0x10, ("STAT4 range scan: %u..%u  est=%d\n",
  2254   2258                              (u32)iLower, (u32)iUpper, nOut));
  2255   2259         }
  2256   2260       }else{
  2257   2261         int bDone = 0;
  2258   2262         rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone);
  2259   2263         if( bDone ) return rc;
  2260   2264       }
................................................................................
  2274   2278     ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to
  2275   2279     ** match 1/64 of the index. */ 
  2276   2280     if( pLower && pUpper ) nNew -= 20;
  2277   2281   
  2278   2282     nOut -= (pLower!=0) + (pUpper!=0);
  2279   2283     if( nNew<10 ) nNew = 10;
  2280   2284     if( nNew<nOut ) nOut = nNew;
         2285  +#if defined(WHERETRACE_ENABLED)
         2286  +  if( pLoop->nOut>nOut ){
         2287  +    WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n",
         2288  +                    pLoop->nOut, nOut));
         2289  +  }
         2290  +#endif
  2281   2291     pLoop->nOut = (LogEst)nOut;
  2282   2292     return rc;
  2283   2293   }
  2284   2294   
  2285   2295   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  2286   2296   /*
  2287   2297   ** Estimate the number of rows that will be returned based on
................................................................................
  2386   2396       nRowEst += nEst;
  2387   2397       pBuilder->nRecValid = nRecValid;
  2388   2398     }
  2389   2399   
  2390   2400     if( rc==SQLITE_OK ){
  2391   2401       if( nRowEst > nRow0 ) nRowEst = nRow0;
  2392   2402       *pnRow = nRowEst;
  2393         -    WHERETRACE(0x10,("IN row estimate: est=%g\n", nRowEst));
         2403  +    WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst));
  2394   2404     }
  2395   2405     assert( pBuilder->nRecValid==nRecValid );
  2396   2406     return rc;
  2397   2407   }
  2398   2408   #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
  2399   2409   
  2400   2410   /*
................................................................................
  4709   4719     }
  4710   4720   #endif /* SQLITE_OMIT_AUTOMATIC_INDEX */
  4711   4721   
  4712   4722     /* Loop over all indices
  4713   4723     */
  4714   4724     for(; rc==SQLITE_OK && pProbe; pProbe=pProbe->pNext, iSortIdx++){
  4715   4725       if( pProbe->pPartIdxWhere!=0
  4716         -     && !whereUsablePartialIndex(pNew->iTab, pWC, pProbe->pPartIdxWhere) ){
         4726  +     && !whereUsablePartialIndex(pSrc->iCursor, pWC, pProbe->pPartIdxWhere) ){
         4727  +      testcase( pNew->iTab!=pSrc->iCursor );  /* See ticket [98d973b8f5] */
  4717   4728         continue;  /* Partial index inappropriate for this query */
  4718   4729       }
  4719   4730       rSize = pProbe->aiRowLogEst[0];
  4720   4731       pNew->u.btree.nEq = 0;
  4721   4732       pNew->u.btree.nSkip = 0;
  4722   4733       pNew->nLTerm = 0;
  4723   4734       pNew->iSortIdx = 0;

Changes to test/index7.test.

   243    243   do_execsql_test index7-5.0 {
   244    244     CREATE INDEX t3b ON t3(b) WHERE xyzzy.t3.b BETWEEN 5 AND 10;
   245    245                                  /* ^^^^^-- ignored */
   246    246     ANALYZE;
   247    247     SELECT count(*) FROM t3 WHERE t3.b BETWEEN 5 AND 10;
   248    248     SELECT stat+0 FROM sqlite_stat1 WHERE idx='t3b';
   249    249   } {6 6}
          250  +
          251  +# Verify that the problem identified by ticket [98d973b8f5] has been fixed.
          252  +#
          253  +do_execsql_test index7-6.1 {
          254  +  CREATE TABLE t5(a, b);
          255  +  CREATE TABLE t4(c, d);
          256  +  INSERT INTO t5 VALUES(1, 'xyz');
          257  +  INSERT INTO t4 VALUES('abc', 'not xyz');
          258  +  SELECT * FROM (SELECT * FROM t5 WHERE a=1 AND b='xyz'), t4 WHERE c='abc';
          259  +} {
          260  +  1 xyz abc {not xyz}
          261  +}
          262  +do_execsql_test index7-6.2 {
          263  +  CREATE INDEX i4 ON t4(c) WHERE d='xyz';
          264  +  SELECT * FROM (SELECT * FROM t5 WHERE a=1 AND b='xyz'), t4 WHERE c='abc';
          265  +} {
          266  +  1 xyz abc {not xyz}
          267  +}
          268  +do_execsql_test index7-6.3 {
          269  +  CREATE VIEW v4 AS SELECT * FROM t4;
          270  +  INSERT INTO t4 VALUES('def', 'xyz');
          271  +  SELECT * FROM v4 WHERE d='xyz' AND c='def'
          272  +} {
          273  +  def xyz
          274  +}
          275  +do_eqp_test index7-6.4 {
          276  +  SELECT * FROM v4 WHERE d='xyz' AND c='def'
          277  +} {
          278  +  0 0 0 {SEARCH TABLE t4 USING INDEX i4 (c=?)}
          279  +}
   250    280   
   251    281   finish_test

Changes to test/malloc.test.

   876    876   do_malloc_test 39 -tclprep {
   877    877     sqlite3 db test.db
   878    878   } -sqlbody {
   879    879     SELECT test_auxdata('abc', 'def');
   880    880   } -cleanup {
   881    881     db close
   882    882   }
          883  +
          884  +reset_db
          885  +add_test_utf16bin_collate db
          886  +do_execsql_test 40.1 {
          887  +  CREATE TABLE t1(a);
          888  +  INSERT INTO t1 VALUES('fghij');
          889  +  INSERT INTO t1 VALUES('pqrst');
          890  +  INSERT INTO t1 VALUES('abcde');
          891  +  INSERT INTO t1 VALUES('uvwxy');
          892  +  INSERT INTO t1 VALUES('klmno');
          893  +}
          894  +do_execsql_test 40.2 {
          895  +  SELECT * FROM t1 ORDER BY 1 COLLATE utf16bin;
          896  +} {abcde fghij klmno pqrst uvwxy}
          897  +do_faultsim_test 40.3 -faults oom-trans* -body {
          898  +  execsql {
          899  +    SELECT * FROM t1 ORDER BY 1 COLLATE utf16bin;
          900  +  }
          901  +} -test {
          902  +  faultsim_test_result {0 {abcde fghij klmno pqrst uvwxy}} 
          903  +  faultsim_integrity_check
          904  +}
          905  +
          906  +reset_db
          907  +add_test_utf16bin_collate db
          908  +set big [string repeat x 200]
          909  +do_execsql_test 41.1 {
          910  +  DROP TABLE IF EXISTS t1;
          911  +  CREATE TABLE t1(a COLLATE utf16bin);
          912  +  INSERT INTO t1 VALUES('fghij' || $::big);
          913  +  INSERT INTO t1 VALUES('pqrst' || $::big);
          914  +  INSERT INTO t1 VALUES('abcde' || $::big);
          915  +  INSERT INTO t1 VALUES('uvwxy' || $::big);
          916  +  INSERT INTO t1 VALUES('klmno' || $::big);
          917  +  CREATE INDEX i1 ON t1(a);
          918  +}
          919  +do_faultsim_test 41.2 -faults oom* -body {
          920  +  execsql { SELECT * FROM t1 WHERE a = ('abcde' || $::big)}
          921  +} -test {
          922  +  faultsim_test_result [list 0 "abcde$::big"]
          923  +  faultsim_integrity_check
          924  +}
   883    925   
   884    926   # Ensure that no file descriptors were leaked.
   885    927   do_test malloc-99.X {
   886    928     catch {db close}
   887    929     set sqlite_open_file_count
   888    930   } {0}
   889    931   
   890    932   puts open-file-count=$sqlite_open_file_count
   891    933   finish_test

Changes to test/mallocA.test.

    21     21   #
    22     22   if {!$MEMDEBUG} {
    23     23      puts "Skipping mallocA tests: not compiled with -DSQLITE_MEMDEBUG..."
    24     24      finish_test
    25     25      return
    26     26   }
    27     27   
    28         -
    29     28   # Construct a test database
    30     29   #
    31     30   forcedelete test.db.bu
    32     31   db eval {
    33     32     CREATE TABLE t1(a COLLATE NOCASE,b,c);
    34     33     INSERT INTO t1 VALUES(1,2,3);
    35     34     INSERT INTO t1 VALUES(1,2,4);
................................................................................
   111    110         ANALYZE sqlite_master;
   112    111         SELECT rowid FROM t1 WHERE a='abc' AND b<'y';
   113    112       }
   114    113     } -test {
   115    114       faultsim_test_result [list 0 {1 2}]
   116    115     }
   117    116   }
          117  +
          118  +do_execsql_test 7.0 {
          119  +  PRAGMA cache_size = 5;
          120  +}
          121  +do_faultsim_test 7 -faults oom-trans* -prep {
          122  +  if {$iFail < 500} { set iFail 2000 }
          123  +  if {$iFail > 1215} { set iFail 2000 }
          124  +} -body {
          125  +  execsql {
          126  +    WITH r(x,y) AS (
          127  +      SELECT 1, randomblob(100)
          128  +      UNION ALL
          129  +      SELECT x+1, randomblob(100) FROM r
          130  +      LIMIT 1000
          131  +    )
          132  +    SELECT count(x), length(y) FROM r GROUP BY (x%5)
          133  +  }
          134  +} -test {
          135  +  set res [list 200 100 200 100 200 100 200 100 200 100]
          136  +  faultsim_test_result [list 0 $res]
          137  +}
          138  +
   118    139   
   119    140   # Ensure that no file descriptors were leaked.
   120    141   do_test malloc-99.X {
   121    142     catch {db close}
   122    143     set sqlite_open_file_count
   123    144   } {0}
   124    145   
   125    146   forcedelete test.db.bu
   126    147   finish_test

Changes to test/permutations.test.

   111    111     savepoint4.test savepoint6.test select9.test 
   112    112     speed1.test speed1p.test speed2.test speed3.test speed4.test 
   113    113     speed4p.test sqllimits1.test tkt2686.test thread001.test thread002.test
   114    114     thread003.test thread004.test thread005.test trans2.test vacuum3.test 
   115    115     incrvacuum_ioerr.test autovacuum_crash.test btree8.test shared_err.test
   116    116     vtab_err.test walslow.test walcrash.test walcrash3.test
   117    117     walthread.test rtree3.test indexfault.test securedel2.test
   118         -  fts4growth.test fts4growth2.test
          118  +  sort3.test sort4.test fts4growth.test fts4growth2.test
   119    119   }]
   120    120   if {[info exists ::env(QUICKTEST_INCLUDE)]} {
   121    121     set allquicktests [concat $allquicktests $::env(QUICKTEST_INCLUDE)]
   122    122   }
   123    123   
   124    124   #############################################################################
   125    125   # Start of tests
................................................................................
   354    354     Coverage tests for file analyze.c.
   355    355   } -files {
   356    356     analyze3.test analyze4.test analyze5.test analyze6.test
   357    357     analyze7.test analyze8.test analyze9.test analyzeA.test
   358    358     analyze.test analyzeB.test mallocA.test
   359    359   } 
   360    360   
          361  +test_suite "coverage-sorter" -description {
          362  +  Coverage tests for file vdbesort.c.
          363  +} -files {
          364  +  sort.test sortfault.test
          365  +} 
          366  +
   361    367   
   362    368   lappend ::testsuitelist xxx
   363    369   #-------------------------------------------------------------------------
   364    370   # Define the permutation test suites:
   365    371   #
   366    372   
   367    373   # Run some tests using pre-allocated page and scratch blocks.
................................................................................
   485    491     sqlite3_shutdown
   486    492     catch {sqlite3_config multithread}
   487    493     sqlite3_initialize
   488    494     autoinstall_test_functions
   489    495   } -files {
   490    496     delete.test   delete2.test  insert.test  rollback.test  select1.test
   491    497     select2.test  trans.test    update.test  vacuum.test    types.test
   492         -  types2.test   types3.test
          498  +  types2.test   types3.test   sort4.test
   493    499   } -shutdown {
   494    500     catch {db close}
   495    501     sqlite3_shutdown
   496    502     catch {sqlite3_config serialized}
   497    503     sqlite3_initialize
   498    504     autoinstall_test_functions
   499    505   }

Changes to test/sort.test.

     4      4   # a legal notice, here is a blessing:
     5      5   #
     6      6   #    May you do good and not evil.
     7      7   #    May you find forgiveness for yourself and forgive others.
     8      8   #    May you share freely, never taking more than you give.
     9      9   #
    10     10   #***********************************************************************
           11  +#
    11     12   # This file implements regression tests for SQLite library.  The
    12         -# focus of this file is testing the CREATE TABLE statement.
           13  +# focus of this file is testing the sorter (code in vdbesort.c).
    13     14   #
    14         -# $Id: sort.test,v 1.25 2005/11/14 22:29:06 drh Exp $
    15     15   
    16     16   set testdir [file dirname $argv0]
    17     17   source $testdir/tester.tcl
    18     18   
    19     19   # Create a bunch of data to sort against
    20     20   #
    21     21   do_test sort-1.0 {
................................................................................
   459    459       insert into b values (2, 1, 'xxx');
   460    460       insert into b values (1, 1, 'zzz');
   461    461       insert into b values (3, 1, 'yyy');
   462    462       select a.id, b.id, b.text from a join b on (a.id = b.aId)
   463    463         order by a.id, b.text;
   464    464     }
   465    465   } {1 2 xxx 1 3 yyy 1 1 zzz}
          466  +
          467  +#-------------------------------------------------------------------------
          468  +# Check that the sorter in vdbesort.c sorts in a stable fashion.
          469  +#
          470  +do_execsql_test sort-13.0 {
          471  +  CREATE TABLE t10(a, b);
          472  +}
          473  +do_test sort-13.1 {
          474  +  db transaction {
          475  +    for {set i 0} {$i < 100000} {incr i} {
          476  +      execsql { INSERT INTO t10 VALUES( $i/10, $i%10 ) }
          477  +    }
          478  +  }
          479  +} {}
          480  +do_execsql_test sort-13.2 {
          481  +  SELECT a, b FROM t10 ORDER BY a;
          482  +} [db eval {SELECT a, b FROM t10 ORDER BY a, b}]
          483  +do_execsql_test sort-13.3 {
          484  +  PRAGMA cache_size = 5;
          485  +  SELECT a, b FROM t10 ORDER BY a;
          486  +} [db eval {SELECT a, b FROM t10 ORDER BY a, b}]
          487  +
          488  +#-------------------------------------------------------------------------
          489  +# Sort some large ( > 4KiB) records.
          490  +#
          491  +proc cksum {x} {
          492  +  set i1 1
          493  +  set i2 2
          494  +  binary scan $x c* L
          495  +  foreach {a b} $L {
          496  +    set i1 [expr (($i2<<3) + $a) & 0x7FFFFFFF]
          497  +    set i2 [expr (($i1<<3) + $b) & 0x7FFFFFFF]
          498  +  }
          499  +  list $i1 $i2
          500  +}
          501  +db func cksum cksum
          502  +
          503  +do_execsql_test sort-14.0 {
          504  +  PRAGMA cache_size = 5;
          505  +  CREATE TABLE t11(a, b);
          506  +  INSERT INTO t11 VALUES(randomblob(5000), NULL);
          507  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --2
          508  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --3
          509  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --4
          510  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --5
          511  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --6
          512  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --7
          513  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --8
          514  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --9
          515  +  UPDATE t11 SET b = cksum(a);
          516  +}
          517  +
          518  +foreach {tn mmap_limit} {
          519  +  1 0
          520  +  2 1000000
          521  +} {
          522  +  do_test sort-14.$tn {
          523  +    sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $mmap_limit
          524  +    set prev ""
          525  +    db eval { SELECT * FROM t11 ORDER BY b } {
          526  +      if {$b != [cksum $a]} {error "checksum failed"}
          527  +      if {[string compare $b $prev] < 0} {error "sort failed"}
          528  +      set prev $b
          529  +    }
          530  +    set {} {}
          531  +  } {}
          532  +}
          533  +
          534  +#-------------------------------------------------------------------------
          535  +#
          536  +foreach {tn mmap_limit nWorker tmpstore coremutex fakeheap softheaplimit} {
          537  +          1          0       3     file      true    false             0
          538  +          2          0       3     file      true     true             0
          539  +          3          0       0     file      true    false             0
          540  +          4    1000000       3     file      true    false             0
          541  +          5          0       0   memory     false     true             0
          542  +          6          0       0     file     false     true       1000000     
          543  +          7          0       0     file     false     true         10000
          544  +} {
          545  +  db close
          546  +  sqlite3_shutdown
          547  +  if {$coremutex} {
          548  +    sqlite3_config multithread
          549  +  } else {
          550  +    sqlite3_config singlethread
          551  +  }
          552  +  sqlite3_initialize
          553  +  sorter_test_fakeheap $fakeheap
          554  +  sqlite3_soft_heap_limit $softheaplimit
          555  +
          556  +  reset_db
          557  +  sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $mmap_limit
          558  +  execsql "PRAGMA temp_store = $tmpstore; PRAGMA threads = $nWorker"
          559  +  
          560  +  
          561  +  set ten [string repeat X 10300]
          562  +  set one [string repeat y   200]
          563  +
          564  +  if {$softheaplimit} {
          565  +    execsql { PRAGMA cache_size = 20 };
          566  +  } else {
          567  +    execsql { PRAGMA cache_size = 5 };
          568  +  }
          569  +
          570  +  do_execsql_test 15.$tn.1 {
          571  +    WITH rr AS (
          572  +      SELECT 4, $ten UNION ALL
          573  +      SELECT 2, $one UNION ALL
          574  +      SELECT 1, $ten UNION ALL
          575  +      SELECT 3, $one
          576  +    )
          577  +    SELECT * FROM rr ORDER BY 1;
          578  +  } [list 1 $ten 2 $one 3 $one 4 $ten]
          579  +
          580  +  do_execsql_test 15.$tn.2 {
          581  +    CREATE TABLE t1(a);
          582  +    INSERT INTO t1 VALUES(4);
          583  +    INSERT INTO t1 VALUES(5);
          584  +    INSERT INTO t1 VALUES(3);
          585  +    INSERT INTO t1 VALUES(2);
          586  +    INSERT INTO t1 VALUES(6);
          587  +    INSERT INTO t1 VALUES(1);
          588  +    CREATE INDEX i1 ON t1(a);
          589  +    SELECT * FROM t1 ORDER BY a;
          590  +  } {1 2 3 4 5 6}
          591  +
          592  +  do_execsql_test 15.$tn.3 {
          593  +    WITH rr AS (
          594  +      SELECT 4, $ten UNION ALL
          595  +      SELECT 2, $one
          596  +    )
          597  +    SELECT * FROM rr ORDER BY 1;
          598  +  } [list 2 $one 4 $ten]
          599  +
          600  +  sorter_test_fakeheap 0
          601  +}
          602  +
          603  +db close
          604  +sqlite3_shutdown
          605  +set t(0) singlethread
          606  +set t(1) multithread
          607  +set t(2) serialized
          608  +sqlite3_config $t($sqlite_options(threadsafe))
          609  +sqlite3_initialize
          610  +sqlite3_soft_heap_limit 0
          611  +
          612  +reset_db
          613  +do_catchsql_test 16.1 {
          614  +  CREATE TABLE t1(a, b, c);
          615  +  INSERT INTO t1 VALUES(1, 2, 3);
          616  +  INSERT INTO t1 VALUES(1, NULL, 3);
          617  +  INSERT INTO t1 VALUES(NULL, 2, 3);
          618  +  INSERT INTO t1 VALUES(1, 2, NULL);
          619  +  INSERT INTO t1 VALUES(4, 5, 6);
          620  +  CREATE UNIQUE INDEX i1 ON t1(b, a, c);
          621  +} {0 {}}
          622  +reset_db
          623  +do_catchsql_test 16.2 {
          624  +  CREATE TABLE t1(a, b, c);
          625  +  INSERT INTO t1 VALUES(1, 2, 3);
          626  +  INSERT INTO t1 VALUES(1, NULL, 3);
          627  +  INSERT INTO t1 VALUES(1, 2, 3);
          628  +  INSERT INTO t1 VALUES(1, 2, NULL);
          629  +  INSERT INTO t1 VALUES(4, 5, 6);
          630  +  CREATE UNIQUE INDEX i1 ON t1(b, a, c);
          631  +} {1 {UNIQUE constraint failed: t1.b, t1.a, t1.c}}
          632  +
          633  +reset_db
          634  +do_execsql_test 17.1 {
          635  +  SELECT * FROM sqlite_master ORDER BY sql;
          636  +} {}
   466    637   
   467    638   finish_test

Added test/sort2.test.

            1  +# 2014 March 25.
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +#
           13  +# Specifically, the tests in this file attempt to verify that 
           14  +# multi-threaded sorting works.
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +set testprefix sort2
           20  +
           21  +foreach {tn script} {
           22  +  1 { }
           23  +  2 {
           24  +    catch { db close }
           25  +    reset_db
           26  +    catch { db eval {PRAGMA threads=7} }
           27  +  }
           28  +} {
           29  +
           30  +  eval $script
           31  +
           32  +  do_execsql_test $tn.1 {
           33  +    PRAGMA cache_size = 5;
           34  +    WITH r(x,y) AS (
           35  +      SELECT 1, randomblob(100)
           36  +      UNION ALL
           37  +      SELECT x+1, randomblob(100) FROM r
           38  +      LIMIT 100000
           39  +    )
           40  +    SELECT count(x), length(y) FROM r GROUP BY (x%5)
           41  +  } {
           42  +    20000 100 20000 100 20000 100 20000 100 20000 100
           43  +  }
           44  +
           45  +  do_execsql_test $tn.2.1 {
           46  +    CREATE TABLE t1(a, b);
           47  +    WITH r(x,y) AS (
           48  +      SELECT 1, randomblob(100)
           49  +      UNION ALL
           50  +      SELECT x+1, randomblob(100) FROM r
           51  +      LIMIT 10000
           52  +    ) INSERT INTO t1 SELECT * FROM r;
           53  +  }
           54  +  
           55  +  do_execsql_test $tn.2.2 {
           56  +    CREATE UNIQUE INDEX i1 ON t1(b, a);
           57  +  }
           58  +  
           59  +  do_execsql_test $tn.2.3 {
           60  +    CREATE UNIQUE INDEX i2 ON t1(a);
           61  +  }
           62  +  
           63  +  do_execsql_test $tn.2.4 { PRAGMA integrity_check } {ok}
           64  +  
           65  +  breakpoint
           66  +  do_execsql_test $tn.3 {
           67  +    PRAGMA cache_size = 5;
           68  +    WITH r(x,y) AS (
           69  +      SELECT 1, randomblob(100)
           70  +      UNION ALL
           71  +      SELECT x+1, randomblob(100) FROM r
           72  +      LIMIT 1000000
           73  +    )
           74  +    SELECT count(x), length(y) FROM r GROUP BY (x%5)
           75  +  } {
           76  +    200000 100 200000 100 200000 100 200000 100 200000 100
           77  +  }
           78  +}
           79  +
           80  +finish_test

Added test/sort3.test.

            1  +# 2014 March 25.
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +#
           13  +# The tests in this file verify that sorting works when the library is
           14  +# configured to use mmap(), but the temporary files generated by the
           15  +# sorter are too large to be completely mapped.
           16  +#
           17  +
           18  +set testdir [file dirname $argv0]
           19  +source $testdir/tester.tcl
           20  +set testprefix sort3
           21  +
           22  +# Sort roughly 20MB of data. Once with a mmap limit of 5MB and once without.
           23  +#
           24  +foreach {itest limit} {
           25  +  1 5000000
           26  +  2 0x7FFFFFFF
           27  +} {
           28  +  sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $limit
           29  +  do_execsql_test 1.$itest {
           30  +    WITH r(x,y) AS (
           31  +        SELECT 1, randomblob(1000)
           32  +        UNION ALL
           33  +        SELECT x+1, randomblob(1000) FROM r
           34  +        LIMIT 20000
           35  +    )
           36  +    SELECT count(*), sum(length(y)) FROM r GROUP BY (x%5);
           37  +  } {
           38  +    4000 4000000 
           39  +    4000 4000000 
           40  +    4000 4000000 
           41  +    4000 4000000 
           42  +    4000 4000000
           43  +  }
           44  +}
           45  +
           46  +# Sort more than 2GB of data. At one point this was causing a problem.
           47  +# This test might take one minute or more to run.
           48  +#
           49  +do_execsql_test 2 {
           50  +  PRAGMA cache_size = 20000;
           51  +  WITH r(x,y) AS (
           52  +    SELECT 1, randomblob(1000)
           53  +    UNION ALL
           54  +    SELECT x+1, randomblob(1000) FROM r
           55  +    LIMIT 2200000
           56  +  )
           57  +  SELECT count(*), sum(length(y)) FROM r GROUP BY (x%5);
           58  +} {
           59  +  440000 440000000 
           60  +  440000 440000000 
           61  +  440000 440000000 
           62  +  440000 440000000 
           63  +  440000 440000000
           64  +}
           65  +
           66  +finish_test
           67  +

Added test/sort4.test.

            1  +# 2014 May 6.
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +#
           13  +# The tests in this file are brute force tests of the multi-threaded
           14  +# sorter.
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +set testprefix sort4
           20  +
           21  +# Configure the sorter to use 3 background threads.
           22  +db eval {PRAGMA threads=3}
           23  +
           24  +# Minimum number of seconds to run for. If the value is 0, each test
           25  +# is run exactly once. Otherwise, tests are repeated until the timeout
           26  +# expires.
           27  +set SORT4TIMEOUT 0
           28  +if {[permutation] == "multithread"} { set SORT4TIMEOUT 300 }
           29  +
           30  +#--------------------------------------------------------------------
           31  +# Set up a table "t1" containing $nRow rows. Each row contains also
           32  +# contains blob fields that collectively contain at least $nPayload 
           33  +# bytes of content. The table schema is as follows:
           34  +#
           35  +#   CREATE TABLE t1(a INTEGER, <extra-columns>, b INTEGER);
           36  +#
           37  +# For each row, the values of columns "a" and "b" are set to the same
           38  +# pseudo-randomly selected integer. The "extra-columns", of which there
           39  +# are at most eight, are named c0, c1, c2 etc. Column c0 contains a 4
           40  +# byte string. Column c1 an 8 byte string. Field c2 16 bytes, and so on.
           41  +#
           42  +# This table is intended to be used for testing queries of the form: 
           43  +#
           44  +#   SELECT a, <cols>, b FROM t1 ORDER BY a;
           45  +#
           46  +# The test code checks that rows are returned in order, and that the 
           47  +# values of "a" and "b" are the same for each row (the idea being that
           48  +# if field "b" at the end of the sorter record has not been corrupted, 
           49  +# the rest of the record is probably Ok as well).
           50  +#
           51  +proc populate_table {nRow nPayload} {
           52  +  set nCol 0
           53  +
           54  +  set n 0
           55  +  for {set nCol 0} {$n < $nPayload} {incr nCol} {
           56  +    incr n [expr (4 << $nCol)]
           57  +  }
           58  +
           59  +  set cols [lrange [list xxx c0 c1 c2 c3 c4 c5 c6 c7] 1 $nCol]
           60  +  set data [lrange [list xxx \
           61  +      randomblob(4) randomblob(8) randomblob(16) randomblob(32) \
           62  +      randomblob(64) randomblob(128) randomblob(256) randomblob(512) \
           63  +  ] 1 $nCol]
           64  +
           65  +  execsql { DROP TABLE IF EXISTS t1 }
           66  +
           67  +  db transaction {
           68  +    execsql "CREATE TABLE t1(a, [join $cols ,], b);"
           69  +    set insert "INSERT INTO t1 VALUES(:k, [join $data ,], :k)"
           70  +    for {set i 0} {$i < $nRow} {incr i} {
           71  +      set k [expr int(rand()*1000000000)]
           72  +      execsql $insert
           73  +    }
           74  +  }
           75  +}
           76  +
           77  +# Helper for [do_sorter_test]
           78  +#
           79  +proc sorter_test {nRow nRead nPayload} {
           80  +  set res [list]
           81  +
           82  +  set nLoad [expr ($nRow > $nRead) ? $nRead : $nRow]
           83  +
           84  +  set nPayload [expr (($nPayload+3)/4) * 4]
           85  +  set cols [list]
           86  +  foreach {mask col} { 
           87  +    0x04  c0 0x08  c1 0x10  c2 0x20  c3 
           88  +    0x40  c4 0x80  c5 0x100 c6 0x200 c7 
           89  +  } {
           90  +    if {$nPayload & $mask} { lappend cols $col }
           91  +  }
           92  +
           93  +  # Create two SELECT statements. Statement $sql1 uses the sorter to sort
           94  +  # $nRow records of a bit over $nPayload bytes each read from the "t1"
           95  +  # table created by [populate_table] proc above. Rows are sorted in order
           96  +  # of the integer field in each "t1" record.
           97  +  #
           98  +  # The second SQL statement sorts the same set of rows as the first, but
           99  +  # uses a LIMIT clause, causing SQLite to use a temp table instead of the
          100  +  # sorter for sorting.
          101  +  #
          102  +  set sql1 "SELECT a, [join $cols ,], b FROM t1 WHERE rowid<=$nRow ORDER BY a"
          103  +  set sql2 "SELECT a FROM t1 WHERE rowid<=$nRow ORDER BY a LIMIT $nRead"
          104  +
          105  +  # Pass the two SQL statements to a helper command written in C. This
          106  +  # command steps statement $sql1 $nRead times and compares the integer
          107  +  # values in the rows returned with the results of executing $sql2. If
          108  +  # the comparison fails (indicating some bug in the sorter), a Tcl
          109  +  # exception is thrown.
          110  +  #
          111  +  sorter_test_sort4_helper db $sql1 $nRead $sql2
          112  +  set {} {} 
          113  +}
          114  +
          115  +# Usage:
          116  +#
          117  +#   do_sorter_test <testname> <args>...
          118  +#
          119  +# where <args> are any of the following switches:
          120  +#
          121  +#   -rows N          (number of rows to have sorter sort)
          122  +#   -read N          (number of rows to read out of sorter)
          123  +#   -payload N       (bytes of payload to read with each row)
          124  +#   -cachesize N     (Value for "PRAGMA cache_size = ?")
          125  +#   -repeats N       (number of times to repeat test)
          126  +#   -fakeheap BOOL   (true to use separate allocations for in-memory records)
          127  +#
          128  +proc do_sorter_test {tn args} {
          129  +  set a(-rows)      1000
          130  +  set a(-repeats)   1
          131  +  set a(-read)      100
          132  +  set a(-payload)   100
          133  +  set a(-cachesize) 100
          134  +  set a(-fakeheap)  0
          135  +
          136  +  foreach {s val} $args {
          137  +    if {[info exists a($s)]==0} { 
          138  +      unset a(-cachesize)
          139  +      set optlist "[join [array names a] ,] or -cachesize"
          140  +      error "Unknown option $s, expected $optlist"
          141  +    }
          142  +    set a($s) $val
          143  +  }
          144  +  if {[permutation] == "memsys3" || [permutation] == "memsys5"} {
          145  +    set a(-fakeheap) 0
          146  +  }
          147  +  if {$a(-fakeheap)} { sorter_test_fakeheap 1 }
          148  +
          149  +
          150  +  db eval "PRAGMA cache_size = $a(-cachesize)"
          151  +  do_test $tn [subst -nocommands {
          152  +    for {set i 0} {[set i] < $a(-repeats)} {incr i} {
          153  +      sorter_test $a(-rows) $a(-read) $a(-payload)
          154  +    }
          155  +  }] {}
          156  +
          157  +  if {$a(-fakeheap)} { sorter_test_fakeheap 0 }
          158  +}
          159  +
          160  +proc clock_seconds {} {
          161  +  db one {SELECT strftime('%s')}
          162  +}
          163  +
          164  +#-------------------------------------------------------------------------
          165  +# Begin tests here.
          166  +
          167  +# Create a test database.
          168  +do_test 1 {
          169  +  execsql "PRAGMA page_size = 4096"
          170  +  populate_table 100000 500
          171  +} {}
          172  +
          173  +set iTimeLimit [expr [clock_seconds] + $SORT4TIMEOUT]
          174  +
          175  +for {set t 2} {1} {incr tn} {
          176  +  do_sorter_test $t.2 -repeats 10 -rows 1000   -read 100
          177  +  do_sorter_test $t.3 -repeats 10 -rows 100000 -read 1000
          178  +  do_sorter_test $t.4 -repeats 10 -rows 100000 -read 1000 -payload 500
          179  +  do_sorter_test $t.5 -repeats 10 -rows 100000 -read 100000 -payload 8
          180  +  do_sorter_test $t.6 -repeats 10 -rows 100000 -read 10 -payload 8
          181  +  do_sorter_test $t.7 -repeats 10 -rows 10000 -read 10000 -payload 8 -fakeheap 1
          182  +  do_sorter_test $t.8 -repeats 10 -rows 100000 -read 10000 -cachesize 250
          183  +
          184  +  set iNow [clock_seconds]
          185  +  if {$iNow>=$iTimeLimit} break
          186  +  do_test "$testprefix-([expr $iTimeLimit-$iNow] seconds remain)" {} {}
          187  +}
          188  +
          189  +finish_test

Added test/sortfault.test.

            1  +# 2014 March 25.
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +#
           13  +# Specifically, it tests the effects of fault injection on the sorter
           14  +# module (code in vdbesort.c).
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +set testprefix sortfault
           20  +
           21  +do_execsql_test 1.0 {
           22  +  PRAGMA cache_size = 5;
           23  +}
           24  +
           25  +foreach {tn mmap_limit nWorker tmpstore threadsmode fakeheap lookaside} {
           26  +          1          0       0     file multithread    false     false
           27  +          2     100000       0     file multithread    false     false
           28  +          3     100000       1     file multithread    false     false
           29  +          4    2000000       0     file singlethread   false      true
           30  +} {
           31  +  if {$sqlite_options(threadsafe)} { set threadsmode singlethread }
           32  +
           33  +  db eval "PRAGMA threads=$nWorker"
           34  +  sqlite3_config $threadsmode
           35  +  if { $lookaside } {
           36  +    sqlite3_config_lookaside 100 500
           37  +  } else {
           38  +    sqlite3_config_lookaside 0 0
           39  +  }
           40  +  sqlite3_initialize
           41  +  sorter_test_fakeheap $fakeheap
           42  +
           43  +  set str [string repeat a 1000]
           44  +  puts $threadsmode
           45  +
           46  +  do_faultsim_test 1.$tn -prep {
           47  +    sqlite3 db test.db
           48  +    sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $::mmap_limit
           49  +    execsql { PRAGMA cache_size = 5 }
           50  +  } -body {
           51  +    execsql { 
           52  +      WITH r(x,y) AS (
           53  +          SELECT 1, $::str
           54  +          UNION ALL
           55  +          SELECT x+1, $::str FROM r
           56  +          LIMIT 200
           57  +      )
           58  +      SELECT count(x), length(y) FROM r GROUP BY (x%5)
           59  +    }
           60  +  } -test {
           61  +    faultsim_test_result {0 {40 1000 40 1000 40 1000 40 1000 40 1000}}
           62  +  }
           63  +
           64  +  do_faultsim_test 2.$tn -faults oom* -prep {
           65  +    sqlite3 db test.db
           66  +    sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $::mmap_limit
           67  +    add_test_utf16bin_collate db
           68  +    execsql { PRAGMA cache_size = 5 }
           69  +  } -body {
           70  +    execsql { 
           71  +      WITH r(x,y) AS (
           72  +          SELECT 100, $::str
           73  +          UNION ALL
           74  +          SELECT x-1, $::str FROM r
           75  +          LIMIT 100
           76  +      )
           77  +      SELECT count(x), length(y) FROM r GROUP BY y COLLATE utf16bin, (x%5)
           78  +    }
           79  +  } -test {
           80  +    faultsim_test_result {0 {20 1000 20 1000 20 1000 20 1000 20 1000}}
           81  +  }
           82  +
           83  +  if {$mmap_limit > 1000000} {
           84  +    set str2 [string repeat $str 10]
           85  +
           86  +    sqlite3_memdebug_vfs_oom_test 0
           87  +    sqlite3 db test.db
           88  +    sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $::mmap_limit
           89  +    execsql { PRAGMA cache_size = 5 }
           90  +
           91  +    do_faultsim_test 3.$tn -faults oom-trans* -body {
           92  +      execsql { 
           93  +        WITH r(x,y) AS (
           94  +            SELECT 300, $::str2
           95  +            UNION ALL
           96  +            SELECT x-1, $::str2 FROM r
           97  +            LIMIT 300
           98  +        )
           99  +        SELECT count(x), length(y) FROM r GROUP BY y, (x%5)
          100  +      }
          101  +    } -test {
          102  +      faultsim_test_result {0 {60 10000 60 10000 60 10000 60 10000 60 10000}}
          103  +    }
          104  +
          105  +    sqlite3_memdebug_vfs_oom_test 1
          106  +  }
          107  +}
          108  +
          109  +catch { db close }
          110  +sqlite3_shutdown
          111  +set t(0) singlethread
          112  +set t(1) multithread
          113  +set t(2) serialized
          114  +sqlite3_config $t($sqlite_options(threadsafe))
          115  +sqlite3_config_lookaside 100 500
          116  +sqlite3_initialize
          117  +
          118  +#-------------------------------------------------------------------------
          119  +#
          120  +reset_db
          121  +do_execsql_test 4.0 { 
          122  +  CREATE TABLE t1(a, b, c); 
          123  +  INSERT INTO t1 VALUES(1, 2, 3);
          124  +}
          125  +do_test 4.1 { 
          126  +  for {set i 0} {$i < 256} {incr i} {
          127  +    execsql { 
          128  +      INSERT INTO t1 SELECT
          129  +        ((a<<3) + b) & 2147483647,
          130  +        ((b<<3) + c) & 2147483647,
          131  +        ((c<<3) + a) & 2147483647
          132  +      FROM t1 ORDER BY rowid DESC LIMIT 1;
          133  +    }
          134  +  }
          135  +} {}
          136  +
          137  +faultsim_save_and_close
          138  +
          139  +do_faultsim_test 4.2 -faults oom* -prep {
          140  +  faultsim_restore_and_reopen
          141  +} -body {
          142  +  execsql { CREATE UNIQUE INDEX i1 ON t1(a,b,c) }
          143  +} -test {
          144  +  faultsim_test_result {0 {}}
          145  +}
          146  +
          147  +#-------------------------------------------------------------------------
          148  +#
          149  +reset_db
          150  +set a [string repeat a 500]
          151  +set b [string repeat b 500]
          152  +set c [string repeat c 500]
          153  +do_execsql_test 5.0 { 
          154  +  CREATE TABLE t1(a, b, c); 
          155  +  INSERT INTO t1 VALUES($a, $b, $c); 
          156  +  INSERT INTO t1 VALUES($c, $b, $a); 
          157  +}
          158  +
          159  +do_faultsim_test 5.1 -faults oom* -body {
          160  +  execsql { SELECT * FROM t1 ORDER BY a }
          161  +} -test {
          162  +  faultsim_test_result [list 0 [list $::a $::b $::c $::c $::b $::a]]
          163  +}
          164  +
          165  +finish_test

Changes to test/speedtest1.c.

    23     23     "  --reprepare         Reprepare each statement upon every invocation\n"
    24     24     "  --scratch N SZ      Configure scratch memory for N slots of SZ bytes each\n"
    25     25     "  --sqlonly           No-op.  Only show the SQL that would have been run.\n"
    26     26     "  --size N            Relative test size.  Default=100\n"
    27     27     "  --stats             Show statistics at the end\n"
    28     28     "  --testset T         Run test-set T\n"
    29     29     "  --trace             Turn on SQL tracing\n"
           30  +  "  --threads N         Use up to N threads for sorting\n"
    30     31     "  --utf16be           Set text encoding to UTF-16BE\n"
    31     32     "  --utf16le           Set text encoding to UTF-16LE\n"
    32     33     "  --verify            Run additional verification steps.\n"
    33     34     "  --without-rowid     Use WITHOUT ROWID where appropriate\n"
    34     35   ;
    35     36   
    36     37   
................................................................................
  1137   1138     const char *zKey = 0;         /* Encryption key */
  1138   1139     int nLook = 0, szLook = 0;    /* --lookaside configuration */
  1139   1140     int noSync = 0;               /* True for --nosync */
  1140   1141     int pageSize = 0;             /* Desired page size.  0 means default */
  1141   1142     int nPCache = 0, szPCache = 0;/* --pcache configuration */
  1142   1143     int nScratch = 0, szScratch=0;/* --scratch configuration */
  1143   1144     int showStats = 0;            /* True for --stats */
         1145  +  int nThread = 0;              /* --threads value */
  1144   1146     const char *zTSet = "main";   /* Which --testset torun */
  1145   1147     int doTrace = 0;              /* True for --trace */
  1146   1148     const char *zEncoding = 0;    /* --utf16be or --utf16le */
  1147   1149     const char *zDbName = 0;      /* Name of the test database */
  1148   1150   
  1149   1151     void *pHeap = 0;              /* Allocated heap space */
  1150   1152     void *pLook = 0;              /* Allocated lookaside space */
................................................................................
  1221   1223         }else if( strcmp(z,"stats")==0 ){
  1222   1224           showStats = 1;
  1223   1225         }else if( strcmp(z,"testset")==0 ){
  1224   1226           if( i>=argc-1 ) fatal_error("missing argument on %s\n", argv[i]);
  1225   1227           zTSet = argv[++i];
  1226   1228         }else if( strcmp(z,"trace")==0 ){
  1227   1229           doTrace = 1;
         1230  +      }else if( strcmp(z,"threads")==0 ){
         1231  +        if( i>=argc-1 ) fatal_error("missing argument on %s\n", argv[i]);
         1232  +        nThread = integerValue(argv[++i]);
  1228   1233         }else if( strcmp(z,"utf16le")==0 ){
  1229   1234           zEncoding = "utf16le";
  1230   1235         }else if( strcmp(z,"utf16be")==0 ){
  1231   1236           zEncoding = "utf16be";
  1232   1237         }else if( strcmp(z,"verify")==0 ){
  1233   1238           g.bVerify = 1;
  1234   1239         }else if( strcmp(z,"without-rowid")==0 ){
................................................................................
  1286   1291       rc = sqlite3_db_config(g.db, SQLITE_DBCONFIG_LOOKASIDE, pLook, szLook,nLook);
  1287   1292       if( rc ) fatal_error("lookaside configuration failed: %d\n", rc);
  1288   1293     }
  1289   1294   
  1290   1295     /* Set database connection options */
  1291   1296     sqlite3_create_function(g.db, "random", 0, SQLITE_UTF8, 0, randomFunc, 0, 0);
  1292   1297     if( doTrace ) sqlite3_trace(g.db, traceCallback, 0);
         1298  +  speedtest1_exec("PRAGMA threads=%d", nThread);
  1293   1299     if( zKey ){
  1294   1300       speedtest1_exec("PRAGMA key('%s')", zKey);
  1295   1301     }
  1296   1302     if( zEncoding ){
  1297   1303       speedtest1_exec("PRAGMA encoding=%s", zEncoding);
  1298   1304     }
  1299   1305     if( doAutovac ){

Changes to test/tester.tcl.

  1079   1079       set G ""
  1080   1080       set B ""
  1081   1081       set D ""
  1082   1082     }
  1083   1083     foreach opcode {
  1084   1084         Seek SeekGe SeekGt SeekLe SeekLt NotFound Last Rewind
  1085   1085         NoConflict Next Prev VNext VPrev VFilter
         1086  +      SorterSort SorterNext
  1086   1087     } {
  1087   1088       set color($opcode) $B
  1088   1089     }
  1089   1090     foreach opcode {ResultRow} {
  1090   1091       set color($opcode) $G
  1091   1092     }
  1092   1093     foreach opcode {IdxInsert Insert Delete IdxDelete} {
................................................................................
  1101   1102       if {$opcode == "Goto" && ($bSeenGoto==0 || ($p2 > $addr+10))} {
  1102   1103         set linebreak($p2) 1
  1103   1104         set bSeenGoto 1
  1104   1105       }
  1105   1106   
  1106   1107       if {$opcode=="Next"  || $opcode=="Prev" 
  1107   1108        || $opcode=="VNext" || $opcode=="VPrev"
         1109  +     || $opcode=="SorterNext"
  1108   1110       } {
  1109   1111         for {set i $p2} {$i<$addr} {incr i} {
  1110   1112           incr x($i) 2
  1111   1113         }
  1112   1114       }
  1113   1115   
  1114   1116       if {$opcode == "Goto" && $p2<$addr && $op($p2)=="Yield"} {

Changes to test/whereJ.test.

   366    366        AND t0b.id=2
   367    367        AND t1b.id BETWEEN t0b.minChild AND t0b.maxChild
   368    368        AND t2b.id BETWEEN t1b.minChild AND t1b.maxChild
   369    369        AND t3b.id BETWEEN t2b.minChild AND t2b.maxChild
   370    370        AND t4.id BETWEEN t3a.minChild AND t3b.maxChild
   371    371     ORDER BY t4.x;
   372    372   } {~/SCAN/}
          373  +
          374  +############################################################################
          375  +
          376  +ifcapable stat4 {
          377  +  # Create and populate table.
          378  +  do_execsql_test 3.1 { CREATE TABLE t1(a, b, c) }
          379  +  for {set i 0} {$i < 32} {incr i 2} {
          380  +    for {set x 0} {$x < 100} {incr x} {
          381  +      execsql { INSERT INTO t1 VALUES($i, $x, $c) }
          382  +      incr c
          383  +    }
          384  +    execsql { INSERT INTO t1 VALUES($i+1, 5, $c) }
          385  +    incr c
          386  +  }
          387  +  
          388  +  do_execsql_test 3.2 {
          389  +    SELECT a, count(*) FROM t1 GROUP BY a HAVING a < 8;
          390  +  } {
          391  +    0 100 1 1 2 100 3 1 4 100 5 1 6 100 7 1
          392  +  }
          393  +  
          394  +  do_execsql_test 3.3 {
          395  +    CREATE INDEX idx_ab ON t1(a, b);
          396  +    CREATE INDEX idx_c ON t1(c);
          397  +    ANALYZE;
          398  +  } {}
          399  +  
          400  +  # This one should use index "idx_c".
          401  +  do_eqp_test 3.4 {
          402  +    SELECT * FROM t1 WHERE 
          403  +      a = 4 AND b BETWEEN 20 AND 80           -- Matches 80 rows
          404  +        AND
          405  +      c BETWEEN 150 AND 160                   -- Matches 10 rows
          406  +  } {
          407  +    0 0 0 {SEARCH TABLE t1 USING INDEX idx_c (c>? AND c<?)}
          408  +  }
          409  +  
          410  +  # This one should use index "idx_ab".
          411  +  do_eqp_test 3.5 {
          412  +    SELECT * FROM t1 WHERE 
          413  +      a = 5 AND b BETWEEN 20 AND 80           -- Matches 1 row
          414  +        AND
          415  +      c BETWEEN 150 AND 160                   -- Matches 10 rows
          416  +  } {
          417  +    0 0 0 {SEARCH TABLE t1 USING INDEX idx_ab (a=? AND b>? AND b<?)}
          418  +  }
          419  +}
   373    420   
   374    421   
   375    422   finish_test

Changes to tool/mkpragmatab.tcl.

   290    290     TYPE: HEXKEY
   291    291     IF:   defined(SQLITE_HAS_CODEC)
   292    292   
   293    293     NAME: activate_extensions
   294    294     IF:   defined(SQLITE_HAS_CODEC) || defined(SQLITE_ENABLE_CEROD)
   295    295   
   296    296     NAME: soft_heap_limit
          297  +
          298  +  NAME: threads
   297    299   }
   298    300   fconfigure stdout -translation lf
   299    301   set name {}
   300    302   set type {}
   301    303   set if {}
   302    304   set flags {}
   303    305   set arg 0

Changes to tool/mksqlite3c-noext.tcl.

   235    235      mutex.c
   236    236      mutex_noop.c
   237    237      mutex_unix.c
   238    238      mutex_w32.c
   239    239      malloc.c
   240    240      printf.c
   241    241      random.c
          242  +   threads.c
   242    243      utf.c
   243    244      util.c
   244    245      hash.c
   245    246      opcodes.c
   246    247   
   247    248      os_unix.c
   248    249      os_win.c

Changes to tool/mksqlite3c.tcl.

   251    251      mutex.c
   252    252      mutex_noop.c
   253    253      mutex_unix.c
   254    254      mutex_w32.c
   255    255      malloc.c
   256    256      printf.c
   257    257      random.c
          258  +   threads.c
   258    259      utf.c
   259    260      util.c
   260    261      hash.c
   261    262      opcodes.c
   262    263   
   263    264      os_unix.c
   264    265      os_win.c