/ Check-in [696dc935]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge the latest trunk changes into the apple-osx branch.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | apple-osx
Files: files | file ages | folders
SHA1: 696dc935f78dc883faf98cf446f75ce4028e98f8
User & Date: drh 2014-09-02 15:57:35
Context
2014-09-21
23:08
Merge in all recent changes from trunk. check-in: 3967ebe8 user: drh tags: apple-osx
2014-09-02
15:57
Merge the latest trunk changes into the apple-osx branch. check-in: 696dc935 user: drh tags: apple-osx
2014-09-01
23:06
Update comments in the ANALYZE command that describe how the Stat4Accum objecct is passed around within the VDBE. No changes to functional code. check-in: 9779c7a9 user: drh tags: trunk
2014-08-26
02:02
Merge recent performance enhancements and the CAST operator enhancements into the apple-osx branch. check-in: 5c273111 user: drh tags: apple-osx
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to Makefile.in.

   173    173            icu.lo insert.lo journal.lo legacy.lo loadext.lo \
   174    174            main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
   175    175            memjournal.lo \
   176    176            mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \
   177    177            notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
   178    178            pager.lo parse.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \
   179    179            random.lo resolve.lo rowset.lo rtree.lo select.lo status.lo \
   180         -         table.lo tokenize.lo trigger.lo \
          180  +         table.lo threads.lo tokenize.lo trigger.lo \
   181    181            update.lo util.lo vacuum.lo \
   182    182            vdbe.lo vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbemem.lo vdbesort.lo \
   183    183            vdbetrace.lo wal.lo walker.lo where.lo utf.lo vtab.lo
   184    184   
   185    185   # Object files for the amalgamation.
   186    186   #
   187    187   LIBOBJS1 = sqlite3.lo
................................................................................
   259    259     $(TOP)/src/status.c \
   260    260     $(TOP)/src/shell.c \
   261    261     $(TOP)/src/sqlite.h.in \
   262    262     $(TOP)/src/sqlite3ext.h \
   263    263     $(TOP)/src/sqliteInt.h \
   264    264     $(TOP)/src/sqliteLimit.h \
   265    265     $(TOP)/src/table.c \
          266  +  $(TOP)/src/threads.c \
   266    267     $(TOP)/src/tclsqlite.c \
   267    268     $(TOP)/src/tokenize.c \
   268    269     $(TOP)/src/trigger.c \
   269    270     $(TOP)/src/utf.c \
   270    271     $(TOP)/src/update.c \
   271    272     $(TOP)/src/util.c \
   272    273     $(TOP)/src/vacuum.c \
................................................................................
   746    747   
   747    748   status.lo:	$(TOP)/src/status.c $(HDR)
   748    749   	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/status.c
   749    750   
   750    751   table.lo:	$(TOP)/src/table.c $(HDR)
   751    752   	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/table.c
   752    753   
          754  +threads.lo:	$(TOP)/src/threads.c $(HDR)
          755  +	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/threads.c
          756  +
   753    757   tokenize.lo:	$(TOP)/src/tokenize.c keywordhash.h $(HDR)
   754    758   	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/tokenize.c
   755    759   
   756    760   trigger.lo:	$(TOP)/src/trigger.c $(HDR)
   757    761   	$(LTCOMPILE) $(TEMP_STORE) -c $(TOP)/src/trigger.c
   758    762   
   759    763   update.lo:	$(TOP)/src/update.c $(HDR)

Changes to Makefile.msc.

   632    632            icu.lo insert.lo journal.lo legacy.lo loadext.lo \
   633    633            main.lo malloc.lo mem0.lo mem1.lo mem2.lo mem3.lo mem5.lo \
   634    634            memjournal.lo \
   635    635            mutex.lo mutex_noop.lo mutex_unix.lo mutex_w32.lo \
   636    636            notify.lo opcodes.lo os.lo os_unix.lo os_win.lo \
   637    637            pager.lo pcache.lo pcache1.lo pragma.lo prepare.lo printf.lo \
   638    638            random.lo resolve.lo rowset.lo rtree.lo select.lo status.lo \
   639         -         table.lo tokenize.lo trigger.lo \
          639  +         table.lo threads.lo tokenize.lo trigger.lo \
   640    640            update.lo util.lo vacuum.lo \
   641    641            vdbeapi.lo vdbeaux.lo vdbeblob.lo vdbemem.lo vdbesort.lo \
   642    642            vdbetrace.lo wal.lo walker.lo where.lo utf.lo vtab.lo
   643    643   
   644    644   # Object files for the amalgamation.
   645    645   #
   646    646   LIBOBJS1 = sqlite3.lo
................................................................................
   729    729     $(TOP)\src\status.c \
   730    730     $(TOP)\src\shell.c \
   731    731     $(TOP)\src\sqlite.h.in \
   732    732     $(TOP)\src\sqlite3ext.h \
   733    733     $(TOP)\src\sqliteInt.h \
   734    734     $(TOP)\src\sqliteLimit.h \
   735    735     $(TOP)\src\table.c \
          736  +  $(TOP)\src\threads.c \
   736    737     $(TOP)\src\tclsqlite.c \
   737    738     $(TOP)\src\tokenize.c \
   738    739     $(TOP)\src\trigger.c \
   739    740     $(TOP)\src\utf.c \
   740    741     $(TOP)\src\update.c \
   741    742     $(TOP)\src\util.c \
   742    743     $(TOP)\src\vacuum.c \
................................................................................
  1211   1212   	$(LTCOMPILE) -c $(TOP)\src\select.c
  1212   1213   
  1213   1214   status.lo:	$(TOP)\src\status.c $(HDR)
  1214   1215   	$(LTCOMPILE) -c $(TOP)\src\status.c
  1215   1216   
  1216   1217   table.lo:	$(TOP)\src\table.c $(HDR)
  1217   1218   	$(LTCOMPILE) -c $(TOP)\src\table.c
         1219  +
         1220  +threads.lo:	$(TOP)\src\threads.c $(HDR)
         1221  +	$(LTCOMPILE) -c $(TOP)\src\threads.c
  1218   1222   
  1219   1223   tokenize.lo:	$(TOP)\src\tokenize.c keywordhash.h $(HDR)
  1220   1224   	$(LTCOMPILE) -c $(TOP)\src\tokenize.c
  1221   1225   
  1222   1226   trigger.lo:	$(TOP)\src\trigger.c $(HDR)
  1223   1227   	$(LTCOMPILE) -c $(TOP)\src\trigger.c
  1224   1228   

Changes to main.mk.

    62     62            icu.o insert.o journal.o legacy.o loadext.o \
    63     63            main.o malloc.o mem0.o mem1.o mem2.o mem3.o mem5.o \
    64     64            memjournal.o \
    65     65            mutex.o mutex_noop.o mutex_unix.o mutex_w32.o \
    66     66            notify.o opcodes.o os.o os_unix.o os_win.o \
    67     67            pager.o pcache.o pcache1.o pragma.o prepare.o printf.o \
    68     68            random.o resolve.o rowset.o rtree.o select.o status.o \
    69         -         table.o tokenize.o trigger.o \
           69  +         table.o threads.o tokenize.o trigger.o \
    70     70            update.o util.o vacuum.o \
    71     71            vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \
    72     72   	 vdbetrace.o wal.o walker.o where.o utf.o vtab.o
    73     73   
    74     74   
    75     75   
    76     76   # All of the source code files.
................................................................................
   142    142     $(TOP)/src/shell.c \
   143    143     $(TOP)/src/sqlite.h.in \
   144    144     $(TOP)/src/sqlite3ext.h \
   145    145     $(TOP)/src/sqliteInt.h \
   146    146     $(TOP)/src/sqliteLimit.h \
   147    147     $(TOP)/src/table.c \
   148    148     $(TOP)/src/tclsqlite.c \
          149  +  $(TOP)/src/threads.c \
   149    150     $(TOP)/src/tokenize.c \
   150    151     $(TOP)/src/trigger.c \
   151    152     $(TOP)/src/utf.c \
   152    153     $(TOP)/src/update.c \
   153    154     $(TOP)/src/util.c \
   154    155     $(TOP)/src/vacuum.c \
   155    156     $(TOP)/src/vdbe.c \
................................................................................
   313    314     $(TOP)/src/pragma.c \
   314    315     $(TOP)/src/prepare.c \
   315    316     $(TOP)/src/printf.c \
   316    317     $(TOP)/src/random.c \
   317    318     $(TOP)/src/pcache.c \
   318    319     $(TOP)/src/pcache1.c \
   319    320     $(TOP)/src/select.c \
          321  +  $(TOP)/src/threads.c \
   320    322     $(TOP)/src/tokenize.c \
   321    323     $(TOP)/src/utf.c \
   322    324     $(TOP)/src/util.c \
   323    325     $(TOP)/src/vdbeapi.c \
   324    326     $(TOP)/src/vdbeaux.c \
   325    327     $(TOP)/src/vdbe.c \
   326    328     $(TOP)/src/vdbemem.c \

Changes to src/analyze.c.

   383    383   **
   384    384   ** For indexes on ordinary rowid tables, N==K+1.  But for indexes on
   385    385   ** WITHOUT ROWID tables, N=K+P where P is the number of columns in the
   386    386   ** PRIMARY KEY of the table.  The covering index that implements the
   387    387   ** original WITHOUT ROWID table as N==K as a special case.
   388    388   **
   389    389   ** This routine allocates the Stat4Accum object in heap memory. The return 
   390         -** value is a pointer to the the Stat4Accum object encoded as a blob (i.e. 
   391         -** the size of the blob is sizeof(void*) bytes). 
          390  +** value is a pointer to the the Stat4Accum object.  The datatype of the
          391  +** return value is BLOB, but it is really just a pointer to the Stat4Accum
          392  +** object.
   392    393   */
   393    394   static void statInit(
   394    395     sqlite3_context *context,
   395    396     int argc,
   396    397     sqlite3_value **argv
   397    398   ){
   398    399     Stat4Accum *p;
................................................................................
   462    463     
   463    464       for(i=0; i<nCol; i++){
   464    465         p->aBest[i].iCol = i;
   465    466       }
   466    467     }
   467    468   #endif
   468    469   
   469         -  /* Return a pointer to the allocated object to the caller */
   470         -  sqlite3_result_blob(context, p, sizeof(p), stat4Destructor);
          470  +  /* Return a pointer to the allocated object to the caller.  Note that
          471  +  ** only the pointer (the 2nd parameter) matters.  The size of the object
          472  +  ** (given by the 3rd parameter) is never used and can be any positive
          473  +  ** value. */
          474  +  sqlite3_result_blob(context, p, sizeof(*p), stat4Destructor);
   471    475   }
   472    476   static const FuncDef statInitFuncdef = {
   473    477     2+IsStat34,      /* nArg */
   474    478     SQLITE_UTF8,     /* funcFlags */
   475    479     0,               /* pUserData */
   476    480     0,               /* pNext */
   477    481     statInit,        /* xFunc */
................................................................................
   789    793   #define STAT_GET_NLT   3          /* "nlt" column of stat[34] entry */
   790    794   #define STAT_GET_NDLT  4          /* "ndlt" column of stat[34] entry */
   791    795   
   792    796   /*
   793    797   ** Implementation of the stat_get(P,J) SQL function.  This routine is
   794    798   ** used to query statistical information that has been gathered into
   795    799   ** the Stat4Accum object by prior calls to stat_push().  The P parameter
   796         -** is a BLOB which is decoded into a pointer to the Stat4Accum objects.
          800  +** has type BLOB but it is really just a pointer to the Stat4Accum object.
   797    801   ** The content to returned is determined by the parameter J
   798    802   ** which is one of the STAT_GET_xxxx values defined above.
   799    803   **
   800    804   ** If neither STAT3 nor STAT4 are enabled, then J is always
   801    805   ** STAT_GET_STAT1 and is hence omitted and this routine becomes
   802    806   ** a one-parameter function, stat_get(P), that always returns the
   803    807   ** stat1 table entry information.

Changes to src/btree.c.

  4536   4536   static int moveToRightmost(BtCursor *pCur){
  4537   4537     Pgno pgno;
  4538   4538     int rc = SQLITE_OK;
  4539   4539     MemPage *pPage = 0;
  4540   4540   
  4541   4541     assert( cursorHoldsMutex(pCur) );
  4542   4542     assert( pCur->eState==CURSOR_VALID );
  4543         -  while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){
         4543  +  while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){
  4544   4544       pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
  4545   4545       pCur->aiIdx[pCur->iPage] = pPage->nCell;
  4546   4546       rc = moveToChild(pCur, pgno);
         4547  +    if( rc ) return rc;
  4547   4548     }
  4548         -  if( rc==SQLITE_OK ){
  4549         -    pCur->aiIdx[pCur->iPage] = pPage->nCell-1;
  4550         -    pCur->info.nSize = 0;
  4551         -    pCur->curFlags &= ~BTCF_ValidNKey;
  4552         -  }
  4553         -  return rc;
         4549  +  pCur->aiIdx[pCur->iPage] = pPage->nCell-1;
         4550  +  assert( pCur->info.nSize==0 );
         4551  +  assert( (pCur->curFlags & BTCF_ValidNKey)==0 );
         4552  +  return SQLITE_OK;
  4554   4553   }
  4555   4554   
  4556   4555   /* Move the cursor to the first entry in the table.  Return SQLITE_OK
  4557   4556   ** on success.  Set *pRes to 0 if the cursor actually points to something
  4558   4557   ** or set *pRes to 1 if the table is empty.
  4559   4558   */
  4560   4559   int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
................................................................................
  4677   4676         *pRes = -1;
  4678   4677         return SQLITE_OK;
  4679   4678       }
  4680   4679     }
  4681   4680   
  4682   4681     if( pIdxKey ){
  4683   4682       xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
  4684         -    pIdxKey->isCorrupt = 0;
         4683  +    pIdxKey->errCode = 0;
  4685   4684       assert( pIdxKey->default_rc==1 
  4686   4685            || pIdxKey->default_rc==0 
  4687   4686            || pIdxKey->default_rc==-1
  4688   4687       );
  4689   4688     }else{
  4690   4689       xRecordCompare = 0; /* All keys are integers */
  4691   4690     }
................................................................................
  4801   4800             if( rc ){
  4802   4801               sqlite3_free(pCellKey);
  4803   4802               goto moveto_finish;
  4804   4803             }
  4805   4804             c = xRecordCompare(nCell, pCellKey, pIdxKey, 0);
  4806   4805             sqlite3_free(pCellKey);
  4807   4806           }
  4808         -        assert( pIdxKey->isCorrupt==0 || c==0 );
         4807  +        assert( 
         4808  +            (pIdxKey->errCode!=SQLITE_CORRUPT || c==0)
         4809  +         && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed)
         4810  +        );
  4809   4811           if( c<0 ){
  4810   4812             lwr = idx+1;
  4811   4813           }else if( c>0 ){
  4812   4814             upr = idx-1;
  4813   4815           }else{
  4814   4816             assert( c==0 );
  4815   4817             *pRes = 0;
  4816   4818             rc = SQLITE_OK;
  4817   4819             pCur->aiIdx[pCur->iPage] = (u16)idx;
  4818         -          if( pIdxKey->isCorrupt ) rc = SQLITE_CORRUPT;
         4820  +          if( pIdxKey->errCode ) rc = SQLITE_CORRUPT;
  4819   4821             goto moveto_finish;
  4820   4822           }
  4821   4823           if( lwr>upr ) break;
  4822   4824           assert( lwr+upr>=0 );
  4823   4825           idx = (lwr+upr)>>1;  /* idx = (lwr+upr)/2 */
  4824   4826         }
  4825   4827       }
................................................................................
  4865   4867   }
  4866   4868   
  4867   4869   /*
  4868   4870   ** Advance the cursor to the next entry in the database.  If
  4869   4871   ** successful then set *pRes=0.  If the cursor
  4870   4872   ** was already pointing to the last entry in the database before
  4871   4873   ** this routine was called, then set *pRes=1.
         4874  +**
         4875  +** The main entry point is sqlite3BtreeNext().  That routine is optimized
         4876  +** for the common case of merely incrementing the cell counter BtCursor.aiIdx
         4877  +** to the next cell on the current page.  The (slower) btreeNext() helper
         4878  +** routine is called when it is necessary to move to a different page or
         4879  +** to restore the cursor.
  4872   4880   **
  4873   4881   ** The calling function will set *pRes to 0 or 1.  The initial *pRes value
  4874   4882   ** will be 1 if the cursor being stepped corresponds to an SQL index and
  4875   4883   ** if this routine could have been skipped if that SQL index had been
  4876   4884   ** a unique index.  Otherwise the caller will have set *pRes to zero.
  4877   4885   ** Zero is the common case. The btree implementation is free to use the
  4878   4886   ** initial *pRes value as a hint to improve performance, but the current
  4879   4887   ** SQLite btree implementation does not. (Note that the comdb2 btree
  4880   4888   ** implementation does use this hint, however.)
  4881   4889   */
  4882         -int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
         4890  +static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){
  4883   4891     int rc;
  4884   4892     int idx;
  4885   4893     MemPage *pPage;
  4886   4894   
  4887   4895     assert( cursorHoldsMutex(pCur) );
  4888         -  assert( pRes!=0 );
  4889         -  assert( *pRes==0 || *pRes==1 );
  4890   4896     assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
         4897  +  assert( *pRes==0 );
  4891   4898     if( pCur->eState!=CURSOR_VALID ){
  4892         -    invalidateOverflowCache(pCur);
         4899  +    assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
  4893   4900       rc = restoreCursorPosition(pCur);
  4894   4901       if( rc!=SQLITE_OK ){
  4895         -      *pRes = 0;
  4896   4902         return rc;
  4897   4903       }
  4898   4904       if( CURSOR_INVALID==pCur->eState ){
  4899   4905         *pRes = 1;
  4900   4906         return SQLITE_OK;
  4901   4907       }
  4902   4908       if( pCur->skipNext ){
  4903   4909         assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT );
  4904   4910         pCur->eState = CURSOR_VALID;
  4905   4911         if( pCur->skipNext>0 ){
  4906   4912           pCur->skipNext = 0;
  4907         -        *pRes = 0;
  4908   4913           return SQLITE_OK;
  4909   4914         }
  4910   4915         pCur->skipNext = 0;
  4911   4916       }
  4912   4917     }
  4913   4918   
  4914   4919     pPage = pCur->apPage[pCur->iPage];
................................................................................
  4918   4923     /* If the database file is corrupt, it is possible for the value of idx 
  4919   4924     ** to be invalid here. This can only occur if a second cursor modifies
  4920   4925     ** the page while cursor pCur is holding a reference to it. Which can
  4921   4926     ** only happen if the database is corrupt in such a way as to link the
  4922   4927     ** page into more than one b-tree structure. */
  4923   4928     testcase( idx>pPage->nCell );
  4924   4929   
  4925         -  pCur->info.nSize = 0;
  4926         -  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
  4927   4930     if( idx>=pPage->nCell ){
  4928   4931       if( !pPage->leaf ){
  4929   4932         rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
  4930         -      if( rc ){
  4931         -        *pRes = 0;
  4932         -        return rc;
  4933         -      }
  4934         -      rc = moveToLeftmost(pCur);
  4935         -      *pRes = 0;
  4936         -      return rc;
         4933  +      if( rc ) return rc;
         4934  +      return moveToLeftmost(pCur);
  4937   4935       }
  4938   4936       do{
  4939   4937         if( pCur->iPage==0 ){
  4940   4938           *pRes = 1;
  4941   4939           pCur->eState = CURSOR_INVALID;
  4942   4940           return SQLITE_OK;
  4943   4941         }
  4944   4942         moveToParent(pCur);
  4945   4943         pPage = pCur->apPage[pCur->iPage];
  4946   4944       }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell );
  4947         -    *pRes = 0;
  4948   4945       if( pPage->intKey ){
  4949         -      rc = sqlite3BtreeNext(pCur, pRes);
         4946  +      return sqlite3BtreeNext(pCur, pRes);
  4950   4947       }else{
  4951         -      rc = SQLITE_OK;
         4948  +      return SQLITE_OK;
  4952   4949       }
  4953         -    return rc;
  4954   4950     }
         4951  +  if( pPage->leaf ){
         4952  +    return SQLITE_OK;
         4953  +  }else{
         4954  +    return moveToLeftmost(pCur);
         4955  +  }
         4956  +}
         4957  +int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
         4958  +  MemPage *pPage;
         4959  +  assert( cursorHoldsMutex(pCur) );
         4960  +  assert( pRes!=0 );
         4961  +  assert( *pRes==0 || *pRes==1 );
         4962  +  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
         4963  +  pCur->info.nSize = 0;
         4964  +  pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
  4955   4965     *pRes = 0;
         4966  +  if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes);
         4967  +  pPage = pCur->apPage[pCur->iPage];
         4968  +  if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){
         4969  +    pCur->aiIdx[pCur->iPage]--;
         4970  +    return btreeNext(pCur, pRes);
         4971  +  }
  4956   4972     if( pPage->leaf ){
  4957   4973       return SQLITE_OK;
         4974  +  }else{
         4975  +    return moveToLeftmost(pCur);
  4958   4976     }
  4959         -  rc = moveToLeftmost(pCur);
  4960         -  return rc;
  4961   4977   }
  4962         -
  4963   4978   
  4964   4979   /*
  4965   4980   ** Step the cursor to the back to the previous entry in the database.  If
  4966   4981   ** successful then set *pRes=0.  If the cursor
  4967   4982   ** was already pointing to the first entry in the database before
  4968   4983   ** this routine was called, then set *pRes=1.
         4984  +**
         4985  +** The main entry point is sqlite3BtreePrevious().  That routine is optimized
         4986  +** for the common case of merely decrementing the cell counter BtCursor.aiIdx
         4987  +** to the previous cell on the current page.  The (slower) btreePrevious() helper
         4988  +** routine is called when it is necessary to move to a different page or
         4989  +** to restore the cursor.
  4969   4990   **
  4970   4991   ** The calling function will set *pRes to 0 or 1.  The initial *pRes value
  4971   4992   ** will be 1 if the cursor being stepped corresponds to an SQL index and
  4972   4993   ** if this routine could have been skipped if that SQL index had been
  4973   4994   ** a unique index.  Otherwise the caller will have set *pRes to zero.
  4974   4995   ** Zero is the common case. The btree implementation is free to use the
  4975   4996   ** initial *pRes value as a hint to improve performance, but the current
  4976   4997   ** SQLite btree implementation does not. (Note that the comdb2 btree
  4977   4998   ** implementation does use this hint, however.)
  4978   4999   */
  4979         -int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
         5000  +static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){
  4980   5001     int rc;
  4981   5002     MemPage *pPage;
  4982   5003   
  4983   5004     assert( cursorHoldsMutex(pCur) );
  4984   5005     assert( pRes!=0 );
  4985         -  assert( *pRes==0 || *pRes==1 );
         5006  +  assert( *pRes==0 );
  4986   5007     assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
  4987         -  pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl);
         5008  +  assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 );
         5009  +  assert( pCur->info.nSize==0 );
  4988   5010     if( pCur->eState!=CURSOR_VALID ){
  4989         -    if( ALWAYS(pCur->eState>=CURSOR_REQUIRESEEK) ){
  4990         -      rc = btreeRestoreCursorPosition(pCur);
  4991         -      if( rc!=SQLITE_OK ){
  4992         -        *pRes = 0;
  4993         -        return rc;
  4994         -      }
         5011  +    assert( pCur->eState>=CURSOR_REQUIRESEEK );
         5012  +    rc = btreeRestoreCursorPosition(pCur);
         5013  +    if( rc!=SQLITE_OK ){
         5014  +      return rc;
  4995   5015       }
  4996   5016       if( CURSOR_INVALID==pCur->eState ){
  4997   5017         *pRes = 1;
  4998   5018         return SQLITE_OK;
  4999   5019       }
  5000   5020       if( pCur->skipNext ){
  5001   5021         assert( pCur->eState==CURSOR_VALID || pCur->eState==CURSOR_SKIPNEXT );
  5002   5022         pCur->eState = CURSOR_VALID;
  5003   5023         if( pCur->skipNext<0 ){
  5004   5024           pCur->skipNext = 0;
  5005         -        *pRes = 0;
  5006   5025           return SQLITE_OK;
  5007   5026         }
  5008   5027         pCur->skipNext = 0;
  5009   5028       }
  5010   5029     }
  5011   5030   
  5012   5031     pPage = pCur->apPage[pCur->iPage];
  5013   5032     assert( pPage->isInit );
  5014   5033     if( !pPage->leaf ){
  5015   5034       int idx = pCur->aiIdx[pCur->iPage];
  5016   5035       rc = moveToChild(pCur, get4byte(findCell(pPage, idx)));
  5017         -    if( rc ){
  5018         -      *pRes = 0;
  5019         -      return rc;
  5020         -    }
         5036  +    if( rc ) return rc;
  5021   5037       rc = moveToRightmost(pCur);
  5022   5038     }else{
  5023   5039       while( pCur->aiIdx[pCur->iPage]==0 ){
  5024   5040         if( pCur->iPage==0 ){
  5025   5041           pCur->eState = CURSOR_INVALID;
  5026   5042           *pRes = 1;
  5027   5043           return SQLITE_OK;
  5028   5044         }
  5029   5045         moveToParent(pCur);
  5030   5046       }
  5031         -    pCur->info.nSize = 0;
  5032         -    pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
         5047  +    assert( pCur->info.nSize==0 );
         5048  +    assert( (pCur->curFlags & (BTCF_ValidNKey|BTCF_ValidOvfl))==0 );
  5033   5049   
  5034   5050       pCur->aiIdx[pCur->iPage]--;
  5035   5051       pPage = pCur->apPage[pCur->iPage];
  5036   5052       if( pPage->intKey && !pPage->leaf ){
  5037   5053         rc = sqlite3BtreePrevious(pCur, pRes);
  5038   5054       }else{
  5039   5055         rc = SQLITE_OK;
  5040   5056       }
  5041   5057     }
         5058  +  return rc;
         5059  +}
         5060  +int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
         5061  +  assert( cursorHoldsMutex(pCur) );
         5062  +  assert( pRes!=0 );
         5063  +  assert( *pRes==0 || *pRes==1 );
         5064  +  assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID );
  5042   5065     *pRes = 0;
  5043         -  return rc;
         5066  +  pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey);
         5067  +  pCur->info.nSize = 0;
         5068  +  if( pCur->eState!=CURSOR_VALID
         5069  +   || pCur->aiIdx[pCur->iPage]==0
         5070  +   || pCur->apPage[pCur->iPage]->leaf==0
         5071  +  ){
         5072  +    return btreePrevious(pCur, pRes);
         5073  +  }
         5074  +  pCur->aiIdx[pCur->iPage]--;
         5075  +  return SQLITE_OK;
  5044   5076   }
  5045   5077   
  5046   5078   /*
  5047   5079   ** Allocate a new page from the database file.
  5048   5080   **
  5049   5081   ** The new page is marked as dirty.  (In other words, sqlite3PagerWrite()
  5050   5082   ** has already been called on the new page.)  The new page has also

Changes to src/build.c.

  2675   2675     }else{
  2676   2676       tnum = pIndex->tnum;
  2677   2677     }
  2678   2678     pKey = sqlite3KeyInfoOfIndex(pParse, pIndex);
  2679   2679   
  2680   2680     /* Open the sorter cursor if we are to use one. */
  2681   2681     iSorter = pParse->nTab++;
  2682         -  sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, 0, (char*)
         2682  +  sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, pIndex->nKeyCol, (char*)
  2683   2683                       sqlite3KeyInfoRef(pKey), P4_KEYINFO);
  2684   2684   
  2685   2685     /* Open the table. Loop through all rows of the table, inserting index
  2686   2686     ** records into the sorter. */
  2687   2687     sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
  2688   2688     addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); VdbeCoverage(v);
  2689   2689     regRecord = sqlite3GetTempReg(pParse);
................................................................................
  3024   3024       }
  3025   3025       if( j>=pTab->nCol ){
  3026   3026         sqlite3ErrorMsg(pParse, "table %s has no column named %s",
  3027   3027           pTab->zName, zColName);
  3028   3028         pParse->checkSchema = 1;
  3029   3029         goto exit_create_index;
  3030   3030       }
  3031         -    assert( pTab->nCol<=0x7fff && j<=0x7fff );
         3031  +    assert( j<=0x7fff );
  3032   3032       pIndex->aiColumn[i] = (i16)j;
  3033   3033       if( pListItem->pExpr ){
  3034   3034         int nColl;
  3035   3035         assert( pListItem->pExpr->op==TK_COLLATE );
  3036   3036         zColl = pListItem->pExpr->u.zToken;
  3037   3037         nColl = sqlite3Strlen30(zColl) + 1;
  3038   3038         assert( nExtra>=nColl );

Changes to src/expr.c.

  1908   1908         assert( pExpr->op==TK_EXISTS || pExpr->op==TK_SELECT );
  1909   1909   
  1910   1910         assert( ExprHasProperty(pExpr, EP_xIsSelect) );
  1911   1911         pSel = pExpr->x.pSelect;
  1912   1912         sqlite3SelectDestInit(&dest, 0, ++pParse->nMem);
  1913   1913         if( pExpr->op==TK_SELECT ){
  1914   1914           dest.eDest = SRT_Mem;
         1915  +        dest.iSdst = dest.iSDParm;
  1915   1916           sqlite3VdbeAddOp2(v, OP_Null, 0, dest.iSDParm);
  1916   1917           VdbeComment((v, "Init subquery result"));
  1917   1918         }else{
  1918   1919           dest.eDest = SRT_Exists;
  1919   1920           sqlite3VdbeAddOp2(v, OP_Integer, 0, dest.iSDParm);
  1920   1921           VdbeComment((v, "Init EXISTS result"));
  1921   1922         }

Changes to src/main.c.

  2081   2081     SQLITE_MAX_COMPOUND_SELECT,
  2082   2082     SQLITE_MAX_VDBE_OP,
  2083   2083     SQLITE_MAX_FUNCTION_ARG,
  2084   2084     SQLITE_MAX_ATTACHED,
  2085   2085     SQLITE_MAX_LIKE_PATTERN_LENGTH,
  2086   2086     SQLITE_MAX_VARIABLE_NUMBER,      /* IMP: R-38091-32352 */
  2087   2087     SQLITE_MAX_TRIGGER_DEPTH,
         2088  +  SQLITE_MAX_WORKER_THREADS,
  2088   2089   };
  2089   2090   
  2090   2091   /*
  2091   2092   ** Make sure the hard limits are set to reasonable values
  2092   2093   */
  2093   2094   #if SQLITE_MAX_LENGTH<100
  2094   2095   # error SQLITE_MAX_LENGTH must be at least 100
................................................................................
  2116   2117   #endif
  2117   2118   #if SQLITE_MAX_COLUMN>32767
  2118   2119   # error SQLITE_MAX_COLUMN must not exceed 32767
  2119   2120   #endif
  2120   2121   #if SQLITE_MAX_TRIGGER_DEPTH<1
  2121   2122   # error SQLITE_MAX_TRIGGER_DEPTH must be at least 1
  2122   2123   #endif
         2124  +#if SQLITE_MAX_WORKER_THREADS<0 || SQLITE_MAX_WORKER_THREADS>50
         2125  +# error SQLITE_MAX_WORKER_THREADS must be between 0 and 50
         2126  +#endif
  2123   2127   
  2124   2128   
  2125   2129   /*
  2126   2130   ** Change the value of a limit.  Report the old value.
  2127   2131   ** If an invalid limit index is supplied, report -1.
  2128   2132   ** Make no changes but still report the old value if the
  2129   2133   ** new limit is negative.
................................................................................
  2149   2153     assert( aHardLimit[SQLITE_LIMIT_VDBE_OP]==SQLITE_MAX_VDBE_OP );
  2150   2154     assert( aHardLimit[SQLITE_LIMIT_FUNCTION_ARG]==SQLITE_MAX_FUNCTION_ARG );
  2151   2155     assert( aHardLimit[SQLITE_LIMIT_ATTACHED]==SQLITE_MAX_ATTACHED );
  2152   2156     assert( aHardLimit[SQLITE_LIMIT_LIKE_PATTERN_LENGTH]==
  2153   2157                                                  SQLITE_MAX_LIKE_PATTERN_LENGTH );
  2154   2158     assert( aHardLimit[SQLITE_LIMIT_VARIABLE_NUMBER]==SQLITE_MAX_VARIABLE_NUMBER);
  2155   2159     assert( aHardLimit[SQLITE_LIMIT_TRIGGER_DEPTH]==SQLITE_MAX_TRIGGER_DEPTH );
  2156         -  assert( SQLITE_LIMIT_TRIGGER_DEPTH==(SQLITE_N_LIMIT-1) );
         2160  +  assert( aHardLimit[SQLITE_LIMIT_WORKER_THREADS]==SQLITE_MAX_WORKER_THREADS );
         2161  +  assert( SQLITE_LIMIT_WORKER_THREADS==(SQLITE_N_LIMIT-1) );
  2157   2162   
  2158   2163   
  2159   2164     if( limitId<0 || limitId>=SQLITE_N_LIMIT ){
  2160   2165       return -1;
  2161   2166     }
  2162   2167     oldLimit = db->aLimit[limitId];
  2163   2168     if( newLimit>=0 ){                   /* IMP: R-52476-28732 */
................................................................................
  2585   2590     db->errMask = 0xff;
  2586   2591     db->nDb = 2;
  2587   2592     db->magic = SQLITE_MAGIC_BUSY;
  2588   2593     db->aDb = db->aDbStatic;
  2589   2594   
  2590   2595     assert( sizeof(db->aLimit)==sizeof(aHardLimit) );
  2591   2596     memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit));
         2597  +  db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS;
  2592   2598     db->autoCommit = 1;
  2593   2599     db->nextAutovac = -1;
  2594   2600     db->szMmap = sqlite3GlobalConfig.szMmap;
  2595   2601     db->nextPagesize = 0;
         2602  +  db->nMaxSorterMmap = 0x7FFFFFFF;
  2596   2603     db->flags |= SQLITE_ShortColNames | SQLITE_EnableTrigger | SQLITE_CacheSpill
  2597   2604   #if !defined(SQLITE_DEFAULT_AUTOMATIC_INDEX) || SQLITE_DEFAULT_AUTOMATIC_INDEX
  2598   2605                    | SQLITE_AutoIndex
  2599   2606   #endif
  2600   2607   #if SQLITE_DEFAULT_CKPTFULLFSYNC
  2601   2608                    | SQLITE_CkptFullFSync
  2602   2609   #endif
................................................................................
  3487   3494   #ifdef SQLITE_VDBE_COVERAGE
  3488   3495         typedef void (*branch_callback)(void*,int,u8,u8);
  3489   3496         sqlite3GlobalConfig.xVdbeBranch = va_arg(ap,branch_callback);
  3490   3497         sqlite3GlobalConfig.pVdbeBranchArg = va_arg(ap,void*);
  3491   3498   #endif
  3492   3499         break;
  3493   3500       }
         3501  +
         3502  +    /*   sqlite3_test_control(SQLITE_TESTCTRL_SORTER_MMAP, db, nMax); */
         3503  +    case SQLITE_TESTCTRL_SORTER_MMAP: {
         3504  +      sqlite3 *db = va_arg(ap, sqlite3*);
         3505  +      db->nMaxSorterMmap = va_arg(ap, int);
         3506  +      break;
         3507  +    }
  3494   3508   
  3495   3509       /*   sqlite3_test_control(SQLITE_TESTCTRL_ISINIT);
  3496   3510       **
  3497   3511       ** Return SQLITE_OK if SQLite has been initialized and SQLITE_ERROR if
  3498   3512       ** not.
  3499   3513       */
  3500   3514       case SQLITE_TESTCTRL_ISINIT: {
  3501   3515         if( sqlite3GlobalConfig.isInit==0 ) rc = SQLITE_ERROR;
  3502   3516         break;
  3503   3517       }
  3504         -
  3505   3518     }
  3506   3519     va_end(ap);
  3507   3520   #endif /* SQLITE_OMIT_BUILTIN_TEST */
  3508   3521     return rc;
  3509   3522   }
  3510   3523   
  3511   3524   /*

Changes to src/os_unix.c.

  7069   7069   ){
  7070   7070     int rc = SQLITE_OK;
  7071   7071     UNUSED_PARAMETER(NotUsed);
  7072   7072     SimulateIOError(return SQLITE_IOERR_DELETE);
  7073   7073     if( osUnlink(zPath)==(-1) ){
  7074   7074       if( errno==ENOENT
  7075   7075   #if OS_VXWORKS
  7076         -        || errno==0x380003
         7076  +        || osAccess(zPath,0)!=0
  7077   7077   #endif
  7078   7078       ){
  7079   7079         rc = SQLITE_IOERR_DELETE_NOENT;
  7080   7080       }else{
  7081   7081         rc = unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
  7082   7082       }
  7083   7083       return rc;

Changes to src/os_win.c.

   939    939   #else
   940    940     { "WaitForSingleObject",     (SYSCALL)0,                       0 },
   941    941   #endif
   942    942   
   943    943   #define osWaitForSingleObject ((DWORD(WINAPI*)(HANDLE, \
   944    944           DWORD))aSyscall[63].pCurrent)
   945    945   
   946         -#if SQLITE_OS_WINRT
   947    946     { "WaitForSingleObjectEx",   (SYSCALL)WaitForSingleObjectEx,   0 },
   948         -#else
   949         -  { "WaitForSingleObjectEx",   (SYSCALL)0,                       0 },
   950         -#endif
   951    947   
   952    948   #define osWaitForSingleObjectEx ((DWORD(WINAPI*)(HANDLE,DWORD, \
   953    949           BOOL))aSyscall[64].pCurrent)
   954    950   
   955    951   #if SQLITE_OS_WINRT
   956    952     { "SetFilePointerEx",        (SYSCALL)SetFilePointerEx,        0 },
   957    953   #else
................................................................................
  1285   1281     }
  1286   1282     assert( sleepObj!=NULL );
  1287   1283     osWaitForSingleObjectEx(sleepObj, milliseconds, FALSE);
  1288   1284   #else
  1289   1285     osSleep(milliseconds);
  1290   1286   #endif
  1291   1287   }
         1288  +
         1289  +DWORD sqlite3Win32Wait(HANDLE hObject){
         1290  +  DWORD rc;
         1291  +  while( (rc = osWaitForSingleObjectEx(hObject, INFINITE,
         1292  +                                       TRUE))==WAIT_IO_COMPLETION ){}
         1293  +  return rc;
         1294  +}
  1292   1295   
  1293   1296   /*
  1294   1297   ** Return true (non-zero) if we are running under WinNT, Win2K, WinXP,
  1295   1298   ** or WinCE.  Return false (zero) for Win95, Win98, or WinME.
  1296   1299   **
  1297   1300   ** Here is an interesting observation:  Win95, Win98, and WinME lack
  1298   1301   ** the LockFileEx() API.  But we can still statically link against that
................................................................................
  1313   1316   #endif
  1314   1317   
  1315   1318   /*
  1316   1319   ** This function determines if the machine is running a version of Windows
  1317   1320   ** based on the NT kernel.
  1318   1321   */
  1319   1322   int sqlite3_win32_is_nt(void){
  1320         -#if defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX
         1323  +#if SQLITE_OS_WINRT
         1324  +  /*
         1325  +  ** NOTE: The WinRT sub-platform is always assumed to be based on the NT
         1326  +  **       kernel.
         1327  +  */
         1328  +  return 1;
         1329  +#elif defined(SQLITE_WIN32_GETVERSIONEX) && SQLITE_WIN32_GETVERSIONEX
  1321   1330     if( osInterlockedCompareExchange(&sqlite3_os_type, 0, 0)==0 ){
  1322         -#if !SQLITE_OS_WINRT && defined(SQLITE_WIN32_HAS_WIDE) && \
  1323         -        defined(NTDDI_VERSION) && NTDDI_VERSION >= NTDDI_WIN8
  1324         -    OSVERSIONINFOW sInfo;
  1325         -    sInfo.dwOSVersionInfoSize = sizeof(sInfo);
  1326         -    osGetVersionExW(&sInfo);
  1327         -    osInterlockedCompareExchange(&sqlite3_os_type,
  1328         -        (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0);
  1329         -#elif defined(SQLITE_WIN32_HAS_ANSI)
         1331  +#if defined(SQLITE_WIN32_HAS_ANSI)
  1330   1332       OSVERSIONINFOA sInfo;
  1331   1333       sInfo.dwOSVersionInfoSize = sizeof(sInfo);
  1332   1334       osGetVersionExA(&sInfo);
         1335  +    osInterlockedCompareExchange(&sqlite3_os_type,
         1336  +        (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0);
         1337  +#elif defined(SQLITE_WIN32_HAS_WIDE)
         1338  +    OSVERSIONINFOW sInfo;
         1339  +    sInfo.dwOSVersionInfoSize = sizeof(sInfo);
         1340  +    osGetVersionExW(&sInfo);
  1333   1341       osInterlockedCompareExchange(&sqlite3_os_type,
  1334   1342           (sInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) ? 2 : 1, 0);
  1335   1343   #endif
  1336   1344     }
  1337   1345     return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2;
  1338   1346   #elif SQLITE_TEST
  1339   1347     return osInterlockedCompareExchange(&sqlite3_os_type, 2, 2)==2;
  1340   1348   #else
         1349  +  /*
         1350  +  ** NOTE: All sub-platforms where the GetVersionEx[AW] functions are
         1351  +  **       deprecated are always assumed to be based on the NT kernel.
         1352  +  */
  1341   1353     return 1;
  1342   1354   #endif
  1343   1355   }
  1344   1356   
  1345   1357   #ifdef SQLITE_WIN32_MALLOC
  1346   1358   /*
  1347   1359   ** Allocate nBytes of memory.

Changes to src/pager.c.

  3630   3630   
  3631   3631       if( rc==SQLITE_OK ){
  3632   3632         pager_reset(pPager);
  3633   3633         pPager->dbSize = (Pgno)((nByte+pageSize-1)/pageSize);
  3634   3634         pPager->pageSize = pageSize;
  3635   3635         sqlite3PageFree(pPager->pTmpSpace);
  3636   3636         pPager->pTmpSpace = pNew;
  3637         -      sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
         3637  +      rc = sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
  3638   3638       }
  3639   3639     }
  3640   3640   
  3641   3641     *pPageSize = pPager->pageSize;
  3642   3642     if( rc==SQLITE_OK ){
  3643   3643       if( nReserve<0 ) nReserve = pPager->nReserve;
  3644   3644       assert( nReserve>=0 && nReserve<1000 );
................................................................................
  4393   4393     **
  4394   4394     ** The doNotSpill ROLLBACK and OFF bits inhibits all cache spilling
  4395   4395     ** regardless of whether or not a sync is required.  This is set during
  4396   4396     ** a rollback or by user request, respectively.
  4397   4397     **
  4398   4398     ** Spilling is also prohibited when in an error state since that could
  4399   4399     ** lead to database corruption.   In the current implementaton it 
  4400         -  ** is impossible for sqlite3PcacheFetch() to be called with createFlag==1
         4400  +  ** is impossible for sqlite3PcacheFetch() to be called with createFlag==3
  4401   4401     ** while in the error state, hence it is impossible for this routine to
  4402   4402     ** be called in the error state.  Nevertheless, we include a NEVER()
  4403   4403     ** test for the error state as a safeguard against future changes.
  4404   4404     */
  4405   4405     if( NEVER(pPager->errCode) ) return SQLITE_OK;
  4406   4406     testcase( pPager->doNotSpill & SPILLFLAG_ROLLBACK );
  4407   4407     testcase( pPager->doNotSpill & SPILLFLAG_OFF );
................................................................................
  4729   4729     */
  4730   4730     if( rc==SQLITE_OK ){
  4731   4731       assert( pPager->memDb==0 );
  4732   4732       rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1);
  4733   4733       testcase( rc!=SQLITE_OK );
  4734   4734     }
  4735   4735   
  4736         -  /* If an error occurred in either of the blocks above, free the 
  4737         -  ** Pager structure and close the file.
         4736  +  /* Initialize the PCache object. */
         4737  +  if( rc==SQLITE_OK ){
         4738  +    assert( nExtra<1000 );
         4739  +    nExtra = ROUND8(nExtra);
         4740  +    rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
         4741  +                           !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
         4742  +  }
         4743  +
         4744  +  /* If an error occurred above, free the  Pager structure and close the file.
  4738   4745     */
  4739   4746     if( rc!=SQLITE_OK ){
  4740         -    assert( !pPager->pTmpSpace );
  4741   4747       sqlite3OsClose(pPager->fd);
         4748  +    sqlite3PageFree(pPager->pTmpSpace);
  4742   4749       sqlite3_free(pPager);
  4743   4750       return rc;
  4744   4751     }
  4745   4752   
  4746         -  /* Initialize the PCache object. */
  4747         -  assert( nExtra<1000 );
  4748         -  nExtra = ROUND8(nExtra);
  4749         -  sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
  4750         -                    !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
  4751         -
  4752   4753     PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename));
  4753   4754     IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
  4754   4755   
  4755   4756     pPager->useJournal = (u8)useJournal;
  4756   4757     /* pPager->stmtOpen = 0; */
  4757   4758     /* pPager->stmtInUse = 0; */
  4758   4759     /* pPager->nRef = 0; */
................................................................................
  5304   5305     }
  5305   5306   
  5306   5307     /* If the pager is in the error state, return an error immediately. 
  5307   5308     ** Otherwise, request the page from the PCache layer. */
  5308   5309     if( pPager->errCode!=SQLITE_OK ){
  5309   5310       rc = pPager->errCode;
  5310   5311     }else{
  5311         -
  5312   5312       if( bMmapOk && pagerUseWal(pPager) ){
  5313   5313         rc = sqlite3WalFindFrame(pPager->pWal, pgno, &iFrame);
  5314   5314         if( rc!=SQLITE_OK ) goto pager_acquire_err;
  5315   5315       }
  5316   5316   
  5317   5317       if( bMmapOk && iFrame==0 ){
  5318   5318         void *pData = 0;
................................................................................
  5319   5319   
  5320   5320         rc = sqlite3OsFetch(pPager->fd, 
  5321   5321             (i64)(pgno-1) * pPager->pageSize, pPager->pageSize, &pData
  5322   5322         );
  5323   5323   
  5324   5324         if( rc==SQLITE_OK && pData ){
  5325   5325           if( pPager->eState>PAGER_READER ){
  5326         -          (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
         5326  +          pPg = sqlite3PagerLookup(pPager, pgno);
  5327   5327           }
  5328   5328           if( pPg==0 ){
  5329   5329             rc = pagerAcquireMapPage(pPager, pgno, pData, &pPg);
  5330   5330           }else{
  5331   5331             sqlite3OsUnfetch(pPager->fd, (i64)(pgno-1)*pPager->pageSize, pData);
  5332   5332           }
  5333   5333           if( pPg ){
................................................................................
  5337   5337           }
  5338   5338         }
  5339   5339         if( rc!=SQLITE_OK ){
  5340   5340           goto pager_acquire_err;
  5341   5341         }
  5342   5342       }
  5343   5343   
  5344         -    rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
         5344  +    {
         5345  +      sqlite3_pcache_page *pBase;
         5346  +      pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3);
         5347  +      if( pBase==0 ){
         5348  +        rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase);
         5349  +        if( rc!=SQLITE_OK ) goto pager_acquire_err;
         5350  +      }
         5351  +      pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase);
         5352  +      if( pPg==0 ) rc = SQLITE_NOMEM;
         5353  +    }
  5345   5354     }
  5346   5355   
  5347   5356     if( rc!=SQLITE_OK ){
  5348   5357       /* Either the call to sqlite3PcacheFetch() returned an error or the
  5349   5358       ** pager was already in the error-state when this function was called.
  5350   5359       ** Set pPg to 0 and jump to the exception handler.  */
  5351   5360       pPg = 0;
................................................................................
  5434   5443   ** See also sqlite3PagerGet().  The difference between this routine
  5435   5444   ** and sqlite3PagerGet() is that _get() will go to the disk and read
  5436   5445   ** in the page if the page is not already in cache.  This routine
  5437   5446   ** returns NULL if the page is not in cache or if a disk I/O error 
  5438   5447   ** has ever happened.
  5439   5448   */
  5440   5449   DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
  5441         -  PgHdr *pPg = 0;
         5450  +  sqlite3_pcache_page *pPage;
  5442   5451     assert( pPager!=0 );
  5443   5452     assert( pgno!=0 );
  5444   5453     assert( pPager->pPCache!=0 );
  5445         -  sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
  5446         -  return pPg;
         5454  +  pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0);
         5455  +  return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage);
  5447   5456   }
  5448   5457   
  5449   5458   /*
  5450   5459   ** Release a page reference.
  5451   5460   **
  5452   5461   ** If the number of references to the page drop to zero, then the
  5453   5462   ** page is added to the LRU list.  When all references to all pages

Changes to src/pcache.c.

   139    139     if( p->pCache->bPurgeable ){
   140    140       if( p->pgno==1 ){
   141    141         p->pCache->pPage1 = 0;
   142    142       }
   143    143       sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0);
   144    144     }
   145    145   }
          146  +
          147  +/*
          148  +** Compute the number of pages of cache requested.
          149  +*/
          150  +static int numberOfCachePages(PCache *p){
          151  +  if( p->szCache>=0 ){
          152  +    return p->szCache;
          153  +  }else{
          154  +    return (int)((-1024*(i64)p->szCache)/(p->szPage+p->szExtra));
          155  +  }
          156  +}
   146    157   
   147    158   /*************************************************** General Interfaces ******
   148    159   **
   149    160   ** Initialize and shutdown the page cache subsystem. Neither of these 
   150    161   ** functions are threadsafe.
   151    162   */
   152    163   int sqlite3PcacheInitialize(void){
................................................................................
   172    183   
   173    184   /*
   174    185   ** Create a new PCache object. Storage space to hold the object
   175    186   ** has already been allocated and is passed in as the p pointer. 
   176    187   ** The caller discovers how much space needs to be allocated by 
   177    188   ** calling sqlite3PcacheSize().
   178    189   */
   179         -void sqlite3PcacheOpen(
          190  +int sqlite3PcacheOpen(
   180    191     int szPage,                  /* Size of every page */
   181    192     int szExtra,                 /* Extra space associated with each page */
   182    193     int bPurgeable,              /* True if pages are on backing store */
   183    194     int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */
   184    195     void *pStress,               /* Argument to xStress */
   185    196     PCache *p                    /* Preallocated space for the PCache */
   186    197   ){
   187    198     memset(p, 0, sizeof(PCache));
   188         -  p->szPage = szPage;
          199  +  p->szPage = 1;
   189    200     p->szExtra = szExtra;
   190    201     p->bPurgeable = bPurgeable;
   191    202     p->eCreate = 2;
   192    203     p->xStress = xStress;
   193    204     p->pStress = pStress;
   194    205     p->szCache = 100;
          206  +  return sqlite3PcacheSetPageSize(p, szPage);
   195    207   }
   196    208   
   197    209   /*
   198    210   ** Change the page size for PCache object. The caller must ensure that there
   199    211   ** are no outstanding page references when this function is called.
   200    212   */
   201         -void sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
          213  +int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
   202    214     assert( pCache->nRef==0 && pCache->pDirty==0 );
   203         -  if( pCache->pCache ){
   204         -    sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
   205         -    pCache->pCache = 0;
          215  +  if( pCache->szPage ){
          216  +    sqlite3_pcache *pNew;
          217  +    pNew = sqlite3GlobalConfig.pcache2.xCreate(
          218  +                szPage, pCache->szExtra + sizeof(PgHdr), pCache->bPurgeable
          219  +    );
          220  +    if( pNew==0 ) return SQLITE_NOMEM;
          221  +    sqlite3GlobalConfig.pcache2.xCachesize(pNew, numberOfCachePages(pCache));
          222  +    if( pCache->pCache ){
          223  +      sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
          224  +    }
          225  +    pCache->pCache = pNew;
   206    226       pCache->pPage1 = 0;
          227  +    pCache->szPage = szPage;
   207    228     }
   208         -  pCache->szPage = szPage;
   209         -}
   210         -
   211         -/*
   212         -** Compute the number of pages of cache requested.
   213         -*/
   214         -static int numberOfCachePages(PCache *p){
   215         -  if( p->szCache>=0 ){
   216         -    return p->szCache;
   217         -  }else{
   218         -    return (int)((-1024*(i64)p->szCache)/(p->szPage+p->szExtra));
   219         -  }
          229  +  return SQLITE_OK;
   220    230   }
   221    231   
   222    232   /*
   223    233   ** Try to obtain a page from the cache.
          234  +**
          235  +** This routine returns a pointer to an sqlite3_pcache_page object if
          236  +** such an object is already in cache, or if a new one is created.
          237  +** This routine returns a NULL pointer if the object was not in cache
          238  +** and could not be created.
          239  +**
          240  +** The createFlags should be 0 to check for existing pages and should
          241  +** be 3 (not 1, but 3) to try to create a new page.
          242  +**
          243  +** If the createFlag is 0, then NULL is always returned if the page
          244  +** is not already in the cache.  If createFlag is 1, then a new page
          245  +** is created only if that can be done without spilling dirty pages
          246  +** and without exceeding the cache size limit.
          247  +**
          248  +** The caller needs to invoke sqlite3PcacheFetchFinish() to properly
          249  +** initialize the sqlite3_pcache_page object and convert it into a
          250  +** PgHdr object.  The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish()
          251  +** routines are split this way for performance reasons. When separated
          252  +** they can both (usually) operate without having to push values to
          253  +** the stack on entry and pop them back off on exit, which saves a
          254  +** lot of pushing and popping.
   224    255   */
   225         -int sqlite3PcacheFetch(
          256  +sqlite3_pcache_page *sqlite3PcacheFetch(
   226    257     PCache *pCache,       /* Obtain the page from this cache */
   227    258     Pgno pgno,            /* Page number to obtain */
   228         -  int createFlag,       /* If true, create page if it does not exist already */
   229         -  PgHdr **ppPage        /* Write the page here */
          259  +  int createFlag        /* If true, create page if it does not exist already */
   230    260   ){
   231         -  sqlite3_pcache_page *pPage;
   232         -  PgHdr *pPgHdr = 0;
   233    261     int eCreate;
   234    262   
   235    263     assert( pCache!=0 );
   236         -  assert( createFlag==1 || createFlag==0 );
          264  +  assert( pCache->pCache!=0 );
          265  +  assert( createFlag==3 || createFlag==0 );
   237    266     assert( pgno>0 );
   238    267   
   239         -  /* If the pluggable cache (sqlite3_pcache*) has not been allocated,
   240         -  ** allocate it now.
   241         -  */
   242         -  if( !pCache->pCache ){
   243         -    sqlite3_pcache *p;
   244         -    if( !createFlag ){
   245         -      *ppPage = 0;
   246         -      return SQLITE_OK;
   247         -    }
   248         -    p = sqlite3GlobalConfig.pcache2.xCreate(
   249         -        pCache->szPage, pCache->szExtra + sizeof(PgHdr), pCache->bPurgeable
   250         -    );
   251         -    if( !p ){
   252         -      return SQLITE_NOMEM;
   253         -    }
   254         -    sqlite3GlobalConfig.pcache2.xCachesize(p, numberOfCachePages(pCache));
   255         -    pCache->pCache = p;
   256         -  }
   257         -
   258    268     /* eCreate defines what to do if the page does not exist.
   259    269     **    0     Do not allocate a new page.  (createFlag==0)
   260    270     **    1     Allocate a new page if doing so is inexpensive.
   261    271     **          (createFlag==1 AND bPurgeable AND pDirty)
   262    272     **    2     Allocate a new page even it doing so is difficult.
   263    273     **          (createFlag==1 AND !(bPurgeable AND pDirty)
   264    274     */
   265         -  eCreate = createFlag==0 ? 0 : pCache->eCreate;
   266         -  assert( (createFlag*(1+(!pCache->bPurgeable||!pCache->pDirty)))==eCreate );
   267         -  pPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate);
   268         -  if( !pPage && eCreate==1 ){
   269         -    PgHdr *pPg;
   270         -
   271         -    /* Find a dirty page to write-out and recycle. First try to find a 
   272         -    ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC
   273         -    ** cleared), but if that is not possible settle for any other 
   274         -    ** unreferenced dirty page.
   275         -    */
   276         -    expensive_assert( pcacheCheckSynced(pCache) );
   277         -    for(pPg=pCache->pSynced; 
   278         -        pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); 
   279         -        pPg=pPg->pDirtyPrev
   280         -    );
   281         -    pCache->pSynced = pPg;
   282         -    if( !pPg ){
   283         -      for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev);
   284         -    }
   285         -    if( pPg ){
   286         -      int rc;
          275  +  eCreate = createFlag & pCache->eCreate;
          276  +  assert( eCreate==0 || eCreate==1 || eCreate==2 );
          277  +  assert( createFlag==0 || pCache->eCreate==eCreate );
          278  +  assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) );
          279  +  return sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate);
          280  +}
          281  +
          282  +/*
          283  +** If the sqlite3PcacheFetch() routine is unable to allocate a new
          284  +** page because new clean pages are available for reuse and the cache
          285  +** size limit has been reached, then this routine can be invoked to 
          286  +** try harder to allocate a page.  This routine might invoke the stress
          287  +** callback to spill dirty pages to the journal.  It will then try to
          288  +** allocate the new page and will only fail to allocate a new page on
          289  +** an OOM error.
          290  +**
          291  +** This routine should be invoked only after sqlite3PcacheFetch() fails.
          292  +*/
          293  +int sqlite3PcacheFetchStress(
          294  +  PCache *pCache,                 /* Obtain the page from this cache */
          295  +  Pgno pgno,                      /* Page number to obtain */
          296  +  sqlite3_pcache_page **ppPage    /* Write result here */
          297  +){
          298  +  PgHdr *pPg;
          299  +  if( pCache->eCreate==2 ) return 0;
          300  +
          301  +
          302  +  /* Find a dirty page to write-out and recycle. First try to find a 
          303  +  ** page that does not require a journal-sync (one with PGHDR_NEED_SYNC
          304  +  ** cleared), but if that is not possible settle for any other 
          305  +  ** unreferenced dirty page.
          306  +  */
          307  +  expensive_assert( pcacheCheckSynced(pCache) );
          308  +  for(pPg=pCache->pSynced; 
          309  +      pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); 
          310  +      pPg=pPg->pDirtyPrev
          311  +  );
          312  +  pCache->pSynced = pPg;
          313  +  if( !pPg ){
          314  +    for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev);
          315  +  }
          316  +  if( pPg ){
          317  +    int rc;
   287    318   #ifdef SQLITE_LOG_CACHE_SPILL
   288         -      sqlite3_log(SQLITE_FULL, 
   289         -                  "spill page %d making room for %d - cache used: %d/%d",
   290         -                  pPg->pgno, pgno,
   291         -                  sqlite3GlobalConfig.pcache.xPagecount(pCache->pCache),
   292         -                  numberOfCachePages(pCache));
          319  +    sqlite3_log(SQLITE_FULL, 
          320  +                "spill page %d making room for %d - cache used: %d/%d",
          321  +                pPg->pgno, pgno,
          322  +                sqlite3GlobalConfig.pcache.xPagecount(pCache->pCache),
          323  +                numberOfCachePages(pCache));
   293    324   #endif
   294         -      rc = pCache->xStress(pCache->pStress, pPg);
   295         -      if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){
   296         -        return rc;
   297         -      }
   298         -    }
   299         -
   300         -    pPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2);
   301         -  }
   302         -
   303         -  if( pPage ){
   304         -    pPgHdr = (PgHdr *)pPage->pExtra;
   305         -
   306         -    if( !pPgHdr->pPage ){
   307         -      memset(pPgHdr, 0, sizeof(PgHdr));
   308         -      pPgHdr->pPage = pPage;
   309         -      pPgHdr->pData = pPage->pBuf;
   310         -      pPgHdr->pExtra = (void *)&pPgHdr[1];
   311         -      memset(pPgHdr->pExtra, 0, pCache->szExtra);
   312         -      pPgHdr->pCache = pCache;
   313         -      pPgHdr->pgno = pgno;
   314         -    }
   315         -    assert( pPgHdr->pCache==pCache );
   316         -    assert( pPgHdr->pgno==pgno );
   317         -    assert( pPgHdr->pData==pPage->pBuf );
   318         -    assert( pPgHdr->pExtra==(void *)&pPgHdr[1] );
   319         -
   320         -    if( 0==pPgHdr->nRef ){
   321         -      pCache->nRef++;
   322         -    }
   323         -    pPgHdr->nRef++;
   324         -    if( pgno==1 ){
   325         -      pCache->pPage1 = pPgHdr;
          325  +    rc = pCache->xStress(pCache->pStress, pPg);
          326  +    if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){
          327  +      return rc;
   326    328       }
   327    329     }
   328         -  *ppPage = pPgHdr;
   329         -  return (pPgHdr==0 && eCreate) ? SQLITE_NOMEM : SQLITE_OK;
          330  +  *ppPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2);
          331  +  return *ppPage==0 ? SQLITE_NOMEM : SQLITE_OK;
          332  +}
          333  +
          334  +/*
          335  +** This is a helper routine for sqlite3PcacheFetchFinish()
          336  +**
          337  +** In the uncommon case where the page being fetched has not been
          338  +** initialized, this routine is invoked to do the initialization.
          339  +** This routine is broken out into a separate function since it
          340  +** requires extra stack manipulation that can be avoided in the common
          341  +** case.
          342  +*/
          343  +static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit(
          344  +  PCache *pCache,             /* Obtain the page from this cache */
          345  +  Pgno pgno,                  /* Page number obtained */
          346  +  sqlite3_pcache_page *pPage  /* Page obtained by prior PcacheFetch() call */
          347  +){
          348  +  PgHdr *pPgHdr;
          349  +  assert( pPage!=0 );
          350  +  pPgHdr = (PgHdr*)pPage->pExtra;
          351  +  assert( pPgHdr->pPage==0 );
          352  + memset(pPgHdr, 0, sizeof(PgHdr));
          353  +  pPgHdr->pPage = pPage;
          354  +  pPgHdr->pData = pPage->pBuf;
          355  +  pPgHdr->pExtra = (void *)&pPgHdr[1];
          356  +  memset(pPgHdr->pExtra, 0, pCache->szExtra);
          357  +  pPgHdr->pCache = pCache;
          358  +  pPgHdr->pgno = pgno;
          359  +  return sqlite3PcacheFetchFinish(pCache,pgno,pPage);
          360  +}
          361  +
          362  +/*
          363  +** This routine converts the sqlite3_pcache_page object returned by
          364  +** sqlite3PcacheFetch() into an initialized PgHdr object.  This routine
          365  +** must be called after sqlite3PcacheFetch() in order to get a usable
          366  +** result.
          367  +*/
          368  +PgHdr *sqlite3PcacheFetchFinish(
          369  +  PCache *pCache,             /* Obtain the page from this cache */
          370  +  Pgno pgno,                  /* Page number obtained */
          371  +  sqlite3_pcache_page *pPage  /* Page obtained by prior PcacheFetch() call */
          372  +){
          373  +  PgHdr *pPgHdr;
          374  +
          375  +  if( pPage==0 ) return 0;
          376  +  pPgHdr = (PgHdr *)pPage->pExtra;
          377  +
          378  +  if( !pPgHdr->pPage ){
          379  +    return pcacheFetchFinishWithInit(pCache, pgno, pPage);
          380  +  }
          381  +  if( 0==pPgHdr->nRef ){
          382  +    pCache->nRef++;
          383  +  }
          384  +  pPgHdr->nRef++;
          385  +  if( pgno==1 ){
          386  +    pCache->pPage1 = pPgHdr;
          387  +  }
          388  +  return pPgHdr;
   330    389   }
   331    390   
   332    391   /*
   333    392   ** Decrement the reference count on a page. If the page is clean and the
   334    393   ** reference count drops to 0, then it is made elible for recycling.
   335    394   */
   336    395   void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){
................................................................................
   467    526     }
   468    527   }
   469    528   
   470    529   /*
   471    530   ** Close a cache.
   472    531   */
   473    532   void sqlite3PcacheClose(PCache *pCache){
   474         -  if( pCache->pCache ){
   475         -    sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
   476         -  }
          533  +  assert( pCache->pCache!=0 );
          534  +  sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
   477    535   }
   478    536   
   479    537   /* 
   480    538   ** Discard the contents of the cache.
   481    539   */
   482    540   void sqlite3PcacheClear(PCache *pCache){
   483    541     sqlite3PcacheTruncate(pCache, 0);
................................................................................
   578    636     return p->nRef;
   579    637   }
   580    638   
   581    639   /* 
   582    640   ** Return the total number of pages in the cache.
   583    641   */
   584    642   int sqlite3PcachePagecount(PCache *pCache){
   585         -  int nPage = 0;
   586         -  if( pCache->pCache ){
   587         -    nPage = sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache);
   588         -  }
   589         -  return nPage;
          643  +  assert( pCache->pCache!=0 );
          644  +  return sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache);
   590    645   }
   591    646   
   592    647   #ifdef SQLITE_TEST
   593    648   /*
   594    649   ** Get the suggested cache-size value.
   595    650   */
   596    651   int sqlite3PcacheGetCachesize(PCache *pCache){
................................................................................
   598    653   }
   599    654   #endif
   600    655   
   601    656   /*
   602    657   ** Set the suggested cache-size value.
   603    658   */
   604    659   void sqlite3PcacheSetCachesize(PCache *pCache, int mxPage){
          660  +  assert( pCache->pCache!=0 );
   605    661     pCache->szCache = mxPage;
   606         -  if( pCache->pCache ){
   607         -    sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache,
   608         -                                           numberOfCachePages(pCache));
   609         -  }
          662  +  sqlite3GlobalConfig.pcache2.xCachesize(pCache->pCache,
          663  +                                         numberOfCachePages(pCache));
   610    664   }
   611    665   
   612    666   /*
   613    667   ** Free up as much memory as possible from the page cache.
   614    668   */
   615    669   void sqlite3PcacheShrink(PCache *pCache){
   616         -  if( pCache->pCache ){
   617         -    sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache);
   618         -  }
          670  +  assert( pCache->pCache!=0 );
          671  +  sqlite3GlobalConfig.pcache2.xShrink(pCache->pCache);
   619    672   }
   620    673   
   621    674   #if defined(SQLITE_CHECK_PAGES) || defined(SQLITE_DEBUG)
   622    675   /*
   623    676   ** For all dirty pages currently in the cache, invoke the specified
   624    677   ** callback. This is only used if the SQLITE_CHECK_PAGES macro is
   625    678   ** defined.

Changes to src/pcache.h.

    64     64   */
    65     65   void sqlite3PCacheBufferSetup(void *, int sz, int n);
    66     66   
    67     67   /* Create a new pager cache.
    68     68   ** Under memory stress, invoke xStress to try to make pages clean.
    69     69   ** Only clean and unpinned pages can be reclaimed.
    70     70   */
    71         -void sqlite3PcacheOpen(
           71  +int sqlite3PcacheOpen(
    72     72     int szPage,                    /* Size of every page */
    73     73     int szExtra,                   /* Extra space associated with each page */
    74     74     int bPurgeable,                /* True if pages are on backing store */
    75     75     int (*xStress)(void*, PgHdr*), /* Call to try to make pages clean */
    76     76     void *pStress,                 /* Argument to xStress */
    77     77     PCache *pToInit                /* Preallocated space for the PCache */
    78     78   );
    79     79   
    80     80   /* Modify the page-size after the cache has been created. */
    81         -void sqlite3PcacheSetPageSize(PCache *, int);
           81  +int sqlite3PcacheSetPageSize(PCache *, int);
    82     82   
    83     83   /* Return the size in bytes of a PCache object.  Used to preallocate
    84     84   ** storage space.
    85     85   */
    86     86   int sqlite3PcacheSize(void);
    87     87   
    88     88   /* One release per successful fetch.  Page is pinned until released.
    89     89   ** Reference counted. 
    90     90   */
    91         -int sqlite3PcacheFetch(PCache*, Pgno, int createFlag, PgHdr**);
           91  +sqlite3_pcache_page *sqlite3PcacheFetch(PCache*, Pgno, int createFlag);
           92  +int sqlite3PcacheFetchStress(PCache*, Pgno, sqlite3_pcache_page**);
           93  +PgHdr *sqlite3PcacheFetchFinish(PCache*, Pgno, sqlite3_pcache_page *pPage);
    92     94   void sqlite3PcacheRelease(PgHdr*);
    93     95   
    94     96   void sqlite3PcacheDrop(PgHdr*);         /* Remove page from cache */
    95     97   void sqlite3PcacheMakeDirty(PgHdr*);    /* Make sure page is marked dirty */
    96     98   void sqlite3PcacheMakeClean(PgHdr*);    /* Mark a single page as clean */
    97     99   void sqlite3PcacheCleanAll(PCache*);    /* Mark all dirty list pages as clean */
    98    100   

Changes to src/pragma.c.

    57     57   #define PragTyp_SHRINK_MEMORY                 26
    58     58   #define PragTyp_SOFT_HEAP_LIMIT               27
    59     59   #define PragTyp_STATS                         28
    60     60   #define PragTyp_SYNCHRONOUS                   29
    61     61   #define PragTyp_TABLE_INFO                    30
    62     62   #define PragTyp_TEMP_STORE                    31
    63     63   #define PragTyp_TEMP_STORE_DIRECTORY          32
    64         -#define PragTyp_WAL_AUTOCHECKPOINT            33
    65         -#define PragTyp_WAL_CHECKPOINT                34
    66         -#define PragTyp_ACTIVATE_EXTENSIONS           35
    67         -#define PragTyp_HEXKEY                        36
    68         -#define PragTyp_KEY                           37
    69         -#define PragTyp_REKEY                         38
    70         -#define PragTyp_LOCK_STATUS                   39
    71         -#define PragTyp_PARSER_TRACE                  40
           64  +#define PragTyp_THREADS                       33
           65  +#define PragTyp_WAL_AUTOCHECKPOINT            34
           66  +#define PragTyp_WAL_CHECKPOINT                35
           67  +#define PragTyp_ACTIVATE_EXTENSIONS           36
           68  +#define PragTyp_HEXKEY                        37
           69  +#define PragTyp_KEY                           38
           70  +#define PragTyp_REKEY                         39
           71  +#define PragTyp_LOCK_STATUS                   40
           72  +#define PragTyp_PARSER_TRACE                  41
    72     73   #define PragFlag_NeedSchema           0x01
    73     74   static const struct sPragmaNames {
    74     75     const char *const zName;  /* Name of pragma */
    75     76     u8 ePragTyp;              /* PragTyp_XXX value */
    76     77     u8 mPragFlag;             /* Zero or more PragFlag_XXX values */
    77     78     u32 iArg;                 /* Extra argument */
    78     79   } aPragmaNames[] = {
................................................................................
   414    415       /* ePragFlag: */ 0,
   415    416       /* iArg:      */ 0 },
   416    417     { /* zName:     */ "temp_store_directory",
   417    418       /* ePragTyp:  */ PragTyp_TEMP_STORE_DIRECTORY,
   418    419       /* ePragFlag: */ 0,
   419    420       /* iArg:      */ 0 },
   420    421   #endif
          422  +  { /* zName:     */ "threads",
          423  +    /* ePragTyp:  */ PragTyp_THREADS,
          424  +    /* ePragFlag: */ 0,
          425  +    /* iArg:      */ 0 },
   421    426   #if !defined(SQLITE_OMIT_SCHEMA_VERSION_PRAGMAS)
   422    427     { /* zName:     */ "user_version",
   423    428       /* ePragTyp:  */ PragTyp_HEADER_VALUE,
   424    429       /* ePragFlag: */ 0,
   425    430       /* iArg:      */ 0 },
   426    431   #endif
   427    432   #if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
................................................................................
   461    466   #if !defined(SQLITE_OMIT_FLAG_PRAGMAS)
   462    467     { /* zName:     */ "writable_schema",
   463    468       /* ePragTyp:  */ PragTyp_FLAG,
   464    469       /* ePragFlag: */ 0,
   465    470       /* iArg:      */ SQLITE_WriteSchema|SQLITE_RecoveryMode },
   466    471   #endif
   467    472   };
   468         -/* Number of pragmas: 56 on by default, 69 total. */
          473  +/* Number of pragmas: 57 on by default, 70 total. */
   469    474   /* End of the automatically generated pragma table.
   470    475   ***************************************************************************/
   471    476   
   472    477   /*
   473    478   ** Interpret the given string as a safety level.  Return 0 for OFF,
   474    479   ** 1 for ON or NORMAL and 2 for FULL.  Return 1 for an empty or 
   475    480   ** unrecognized string argument.  The FULL option is disallowed
................................................................................
  2284   2289       sqlite3_int64 N;
  2285   2290       if( zRight && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK ){
  2286   2291         sqlite3_soft_heap_limit64(N);
  2287   2292       }
  2288   2293       returnSingleInt(pParse, "soft_heap_limit",  sqlite3_soft_heap_limit64(-1));
  2289   2294       break;
  2290   2295     }
         2296  +
         2297  +  /*
         2298  +  **   PRAGMA threads
         2299  +  **   PRAGMA threads = N
         2300  +  **
         2301  +  ** Configure the maximum number of worker threads.  Return the new
         2302  +  ** maximum, which might be less than requested.
         2303  +  */
         2304  +  case PragTyp_THREADS: {
         2305  +    sqlite3_int64 N;
         2306  +    if( zRight
         2307  +     && sqlite3DecOrHexToI64(zRight, &N)==SQLITE_OK
         2308  +     && N>=0
         2309  +    ){
         2310  +      sqlite3_limit(db, SQLITE_LIMIT_WORKER_THREADS, (int)(N&0x7fffffff));
         2311  +    }
         2312  +    returnSingleInt(pParse, "threads",
         2313  +                    sqlite3_limit(db, SQLITE_LIMIT_WORKER_THREADS, -1));
         2314  +    break;
         2315  +  }
  2291   2316   
  2292   2317   #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
  2293   2318     /*
  2294   2319     ** Report the current state of file logs for all databases
  2295   2320     */
  2296   2321     case PragTyp_LOCK_STATUS: {
  2297   2322       static const char *const azLockName[] = {

Changes to src/select.c.

   451    451     Parse *pParse,       /* Parsing context */
   452    452     ExprList *pList,     /* Form the KeyInfo object from this ExprList */
   453    453     int iStart,          /* Begin with this column of pList */
   454    454     int nExtra           /* Add this many extra columns to the end */
   455    455   );
   456    456   
   457    457   /*
   458         -** Insert code into "v" that will push the record in register regData
   459         -** into the sorter.
          458  +** Generate code that will push the record in registers regData
          459  +** through regData+nData-1 onto the sorter.
   460    460   */
   461    461   static void pushOntoSorter(
   462    462     Parse *pParse,         /* Parser context */
   463    463     SortCtx *pSort,        /* Information about the ORDER BY clause */
   464    464     Select *pSelect,       /* The whole SELECT statement */
   465         -  int regData            /* Register holding data to be sorted */
   466         -){
   467         -  Vdbe *v = pParse->pVdbe;
   468         -  int nExpr = pSort->pOrderBy->nExpr;
   469         -  int regRecord = ++pParse->nMem;
   470         -  int regBase = pParse->nMem+1;
   471         -  int nOBSat = pSort->nOBSat;
   472         -  int op;
   473         -
   474         -  pParse->nMem += nExpr+2;        /* nExpr+2 registers allocated at regBase */
   475         -  sqlite3ExprCacheClear(pParse);
   476         -  sqlite3ExprCodeExprList(pParse, pSort->pOrderBy, regBase, 0);
   477         -  sqlite3VdbeAddOp2(v, OP_Sequence, pSort->iECursor, regBase+nExpr);
   478         -  sqlite3ExprCodeMove(pParse, regData, regBase+nExpr+1, 1);
   479         -  sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase+nOBSat, nExpr+2-nOBSat,regRecord);
          465  +  int regData,           /* First register holding data to be sorted */
          466  +  int nData,             /* Number of elements in the data array */
          467  +  int nPrefixReg         /* No. of reg prior to regData available for use */
          468  +){
          469  +  Vdbe *v = pParse->pVdbe;                         /* Stmt under construction */
          470  +  int bSeq = ((pSort->sortFlags & SORTFLAG_UseSorter)==0);
          471  +  int nExpr = pSort->pOrderBy->nExpr;              /* No. of ORDER BY terms */
          472  +  int nBase = nExpr + bSeq + nData;                /* Fields in sorter record */
          473  +  int regBase;                                     /* Regs for sorter record */
          474  +  int regRecord = ++pParse->nMem;                  /* Assembled sorter record */
          475  +  int nOBSat = pSort->nOBSat;                      /* ORDER BY terms to skip */
          476  +  int op;                            /* Opcode to add sorter record to sorter */
          477  +
          478  +  assert( bSeq==0 || bSeq==1 );
          479  +  if( nPrefixReg ){
          480  +    assert( nPrefixReg==nExpr+bSeq );
          481  +    regBase = regData - nExpr - bSeq;
          482  +  }else{
          483  +    regBase = pParse->nMem + 1;
          484  +    pParse->nMem += nBase;
          485  +  }
          486  +  sqlite3ExprCodeExprList(pParse, pSort->pOrderBy, regBase, SQLITE_ECEL_DUP);
          487  +  if( bSeq ){
          488  +    sqlite3VdbeAddOp2(v, OP_Sequence, pSort->iECursor, regBase+nExpr);
          489  +  }
          490  +  if( nPrefixReg==0 ){
          491  +    sqlite3VdbeAddOp3(v, OP_Move, regData, regBase+nExpr+bSeq, nData);
          492  +  }
          493  +
          494  +  sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase+nOBSat, nBase-nOBSat, regRecord);
   480    495     if( nOBSat>0 ){
   481    496       int regPrevKey;   /* The first nOBSat columns of the previous row */
   482    497       int addrFirst;    /* Address of the OP_IfNot opcode */
   483    498       int addrJmp;      /* Address of the OP_Jump opcode */
   484    499       VdbeOp *pOp;      /* Opcode that opens the sorter */
   485    500       int nKey;         /* Number of sorting key columns, including OP_Sequence */
   486    501       KeyInfo *pKI;     /* Original KeyInfo on the sorter table */
   487    502   
   488    503       regPrevKey = pParse->nMem+1;
   489    504       pParse->nMem += pSort->nOBSat;
   490         -    nKey = nExpr - pSort->nOBSat + 1;
   491         -    addrFirst = sqlite3VdbeAddOp1(v, OP_IfNot, regBase+nExpr); VdbeCoverage(v);
          505  +    nKey = nExpr - pSort->nOBSat + bSeq;
          506  +    if( bSeq ){
          507  +      addrFirst = sqlite3VdbeAddOp1(v, OP_IfNot, regBase+nExpr); 
          508  +    }else{
          509  +      addrFirst = sqlite3VdbeAddOp1(v, OP_SequenceTest, pSort->iECursor);
          510  +    }
          511  +    VdbeCoverage(v);
   492    512       sqlite3VdbeAddOp3(v, OP_Compare, regPrevKey, regBase, pSort->nOBSat);
   493    513       pOp = sqlite3VdbeGetOp(v, pSort->addrSortIndex);
   494    514       if( pParse->db->mallocFailed ) return;
   495         -    pOp->p2 = nKey + 1;
          515  +    pOp->p2 = nKey + nData;
   496    516       pKI = pOp->p4.pKeyInfo;
   497    517       memset(pKI->aSortOrder, 0, pKI->nField); /* Makes OP_Jump below testable */
   498    518       sqlite3VdbeChangeP4(v, -1, (char*)pKI, P4_KEYINFO);
   499    519       pOp->p4.pKeyInfo = keyInfoFromExprList(pParse, pSort->pOrderBy, nOBSat, 1);
   500    520       addrJmp = sqlite3VdbeCurrentAddr(v);
   501    521       sqlite3VdbeAddOp3(v, OP_Jump, addrJmp+1, 0, addrJmp+1); VdbeCoverage(v);
   502    522       pSort->labelBkOut = sqlite3VdbeMakeLabel(v);
................................................................................
   622    642     Vdbe *v = pParse->pVdbe;
   623    643     int i;
   624    644     int hasDistinct;        /* True if the DISTINCT keyword is present */
   625    645     int regResult;              /* Start of memory holding result set */
   626    646     int eDest = pDest->eDest;   /* How to dispose of results */
   627    647     int iParm = pDest->iSDParm; /* First argument to disposal method */
   628    648     int nResultCol;             /* Number of result columns */
          649  +  int nPrefixReg = 0;         /* Number of extra registers before regResult */
   629    650   
   630    651     assert( v );
   631    652     assert( pEList!=0 );
   632    653     hasDistinct = pDistinct ? pDistinct->eTnctType : WHERE_DISTINCT_NOOP;
   633    654     if( pSort && pSort->pOrderBy==0 ) pSort = 0;
   634    655     if( pSort==0 && !hasDistinct ){
   635    656       assert( iContinue!=0 );
................................................................................
   637    658     }
   638    659   
   639    660     /* Pull the requested columns.
   640    661     */
   641    662     nResultCol = pEList->nExpr;
   642    663   
   643    664     if( pDest->iSdst==0 ){
          665  +    if( pSort ){
          666  +      nPrefixReg = pSort->pOrderBy->nExpr;
          667  +      if( !(pSort->sortFlags & SORTFLAG_UseSorter) ) nPrefixReg++;
          668  +      pParse->nMem += nPrefixReg;
          669  +    }
   644    670       pDest->iSdst = pParse->nMem+1;
   645    671       pParse->nMem += nResultCol;
   646    672     }else if( pDest->iSdst+nResultCol > pParse->nMem ){
   647    673       /* This is an error condition that can result, for example, when a SELECT
   648    674       ** on the right-hand side of an INSERT contains more result columns than
   649    675       ** there are columns in the table on the left.  The error will be caught
   650    676       ** and reported later.  But we need to make sure enough memory is allocated
................................................................................
   753    779   
   754    780       /* Store the result as data using a unique key.
   755    781       */
   756    782       case SRT_Fifo:
   757    783       case SRT_DistFifo:
   758    784       case SRT_Table:
   759    785       case SRT_EphemTab: {
   760         -      int r1 = sqlite3GetTempReg(pParse);
          786  +      int r1 = sqlite3GetTempRange(pParse, nPrefixReg+1);
   761    787         testcase( eDest==SRT_Table );
   762    788         testcase( eDest==SRT_EphemTab );
   763         -      sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1);
          789  +      sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1+nPrefixReg);
   764    790   #ifndef SQLITE_OMIT_CTE
   765    791         if( eDest==SRT_DistFifo ){
   766    792           /* If the destination is DistFifo, then cursor (iParm+1) is open
   767    793           ** on an ephemeral index. If the current row is already present
   768    794           ** in the index, do not write it to the output. If not, add the
   769    795           ** current row to the index and proceed with writing it to the
   770    796           ** output table as well.  */
................................................................................
   771    797           int addr = sqlite3VdbeCurrentAddr(v) + 4;
   772    798           sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, addr, r1, 0); VdbeCoverage(v);
   773    799           sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm+1, r1);
   774    800           assert( pSort==0 );
   775    801         }
   776    802   #endif
   777    803         if( pSort ){
   778         -        pushOntoSorter(pParse, pSort, p, r1);
          804  +        pushOntoSorter(pParse, pSort, p, r1+nPrefixReg, 1, nPrefixReg);
   779    805         }else{
   780    806           int r2 = sqlite3GetTempReg(pParse);
   781    807           sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, r2);
   782    808           sqlite3VdbeAddOp3(v, OP_Insert, iParm, r1, r2);
   783    809           sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
   784    810           sqlite3ReleaseTempReg(pParse, r2);
   785    811         }
   786         -      sqlite3ReleaseTempReg(pParse, r1);
          812  +      sqlite3ReleaseTempRange(pParse, r1, nPrefixReg+1);
   787    813         break;
   788    814       }
   789    815   
   790    816   #ifndef SQLITE_OMIT_SUBQUERY
   791    817       /* If we are creating a set for an "expr IN (SELECT ...)" construct,
   792    818       ** then there should be a single item on the stack.  Write this
   793    819       ** item into the set table with bogus data.
................................................................................
   797    823         pDest->affSdst =
   798    824                     sqlite3CompareAffinity(pEList->a[0].pExpr, pDest->affSdst);
   799    825         if( pSort ){
   800    826           /* At first glance you would think we could optimize out the
   801    827           ** ORDER BY in this case since the order of entries in the set
   802    828           ** does not matter.  But there might be a LIMIT clause, in which
   803    829           ** case the order does matter */
   804         -        pushOntoSorter(pParse, pSort, p, regResult);
          830  +        pushOntoSorter(pParse, pSort, p, regResult, 1, nPrefixReg);
   805    831         }else{
   806    832           int r1 = sqlite3GetTempReg(pParse);
   807    833           sqlite3VdbeAddOp4(v, OP_MakeRecord, regResult,1,r1, &pDest->affSdst, 1);
   808    834           sqlite3ExprCacheAffinityChange(pParse, regResult, 1);
   809    835           sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm, r1);
   810    836           sqlite3ReleaseTempReg(pParse, r1);
   811    837         }
................................................................................
   823    849       /* If this is a scalar select that is part of an expression, then
   824    850       ** store the results in the appropriate memory cell and break out
   825    851       ** of the scan loop.
   826    852       */
   827    853       case SRT_Mem: {
   828    854         assert( nResultCol==1 );
   829    855         if( pSort ){
   830         -        pushOntoSorter(pParse, pSort, p, regResult);
          856  +        pushOntoSorter(pParse, pSort, p, regResult, 1, nPrefixReg);
   831    857         }else{
   832         -        sqlite3ExprCodeMove(pParse, regResult, iParm, 1);
          858  +        assert( regResult==iParm );
   833    859           /* The LIMIT clause will jump out of the loop for us */
   834    860         }
   835    861         break;
   836    862       }
   837    863   #endif /* #ifndef SQLITE_OMIT_SUBQUERY */
   838    864   
   839    865       case SRT_Coroutine:       /* Send data to a co-routine */
   840    866       case SRT_Output: {        /* Return the results */
   841    867         testcase( eDest==SRT_Coroutine );
   842    868         testcase( eDest==SRT_Output );
   843    869         if( pSort ){
   844         -        int r1 = sqlite3GetTempReg(pParse);
   845         -        sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1);
   846         -        pushOntoSorter(pParse, pSort, p, r1);
   847         -        sqlite3ReleaseTempReg(pParse, r1);
          870  +        pushOntoSorter(pParse, pSort, p, regResult, nResultCol, nPrefixReg);
   848    871         }else if( eDest==SRT_Coroutine ){
   849    872           sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm);
   850    873         }else{
   851    874           sqlite3VdbeAddOp2(v, OP_ResultRow, regResult, nResultCol);
   852    875           sqlite3ExprCacheAffinityChange(pParse, regResult, nResultCol);
   853    876         }
   854    877         break;
................................................................................
  1120   1143   ){
  1121   1144     Vdbe *v = pParse->pVdbe;                     /* The prepared statement */
  1122   1145     int addrBreak = sqlite3VdbeMakeLabel(v);     /* Jump here to exit loop */
  1123   1146     int addrContinue = sqlite3VdbeMakeLabel(v);  /* Jump here for next cycle */
  1124   1147     int addr;
  1125   1148     int addrOnce = 0;
  1126   1149     int iTab;
  1127         -  int pseudoTab = 0;
  1128   1150     ExprList *pOrderBy = pSort->pOrderBy;
  1129   1151     int eDest = pDest->eDest;
  1130   1152     int iParm = pDest->iSDParm;
  1131   1153     int regRow;
  1132   1154     int regRowid;
  1133   1155     int nKey;
         1156  +  int iSortTab;                   /* Sorter cursor to read from */
         1157  +  int nSortData;                  /* Trailing values to read from sorter */
         1158  +  u8 p5;                          /* p5 parameter for 1st OP_Column */
         1159  +  int i;
         1160  +  int bSeq;                       /* True if sorter record includes seq. no. */
         1161  +#ifdef SQLITE_ENABLE_EXPLAIN_COMMENTS
         1162  +  struct ExprList_item *aOutEx = p->pEList->a;
         1163  +#endif
  1134   1164   
  1135   1165     if( pSort->labelBkOut ){
  1136   1166       sqlite3VdbeAddOp2(v, OP_Gosub, pSort->regReturn, pSort->labelBkOut);
  1137   1167       sqlite3VdbeAddOp2(v, OP_Goto, 0, addrBreak);
  1138   1168       sqlite3VdbeResolveLabel(v, pSort->labelBkOut);
  1139         -    addrOnce = sqlite3CodeOnce(pParse); VdbeCoverage(v);
  1140   1169     }
  1141   1170     iTab = pSort->iECursor;
  1142         -  regRow = sqlite3GetTempReg(pParse);
  1143   1171     if( eDest==SRT_Output || eDest==SRT_Coroutine ){
  1144         -    pseudoTab = pParse->nTab++;
  1145         -    sqlite3VdbeAddOp3(v, OP_OpenPseudo, pseudoTab, regRow, nColumn);
  1146   1172       regRowid = 0;
         1173  +    regRow = pDest->iSdst;
         1174  +    nSortData = nColumn;
  1147   1175     }else{
  1148   1176       regRowid = sqlite3GetTempReg(pParse);
         1177  +    regRow = sqlite3GetTempReg(pParse);
         1178  +    nSortData = 1;
  1149   1179     }
  1150   1180     nKey = pOrderBy->nExpr - pSort->nOBSat;
  1151   1181     if( pSort->sortFlags & SORTFLAG_UseSorter ){
  1152   1182       int regSortOut = ++pParse->nMem;
  1153         -    int ptab2 = pParse->nTab++;
  1154         -    sqlite3VdbeAddOp3(v, OP_OpenPseudo, ptab2, regSortOut, nKey+2);
         1183  +    iSortTab = pParse->nTab++;
         1184  +    if( pSort->labelBkOut ){
         1185  +      addrOnce = sqlite3CodeOnce(pParse); VdbeCoverage(v);
         1186  +    }
         1187  +    sqlite3VdbeAddOp3(v, OP_OpenPseudo, iSortTab, regSortOut, nKey+1+nSortData);
  1155   1188       if( addrOnce ) sqlite3VdbeJumpHere(v, addrOnce);
  1156   1189       addr = 1 + sqlite3VdbeAddOp2(v, OP_SorterSort, iTab, addrBreak);
  1157   1190       VdbeCoverage(v);
  1158   1191       codeOffset(v, p->iOffset, addrContinue);
  1159   1192       sqlite3VdbeAddOp2(v, OP_SorterData, iTab, regSortOut);
  1160         -    sqlite3VdbeAddOp3(v, OP_Column, ptab2, nKey+1, regRow);
  1161         -    sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE);
         1193  +    p5 = OPFLAG_CLEARCACHE;
         1194  +    bSeq = 0;
  1162   1195     }else{
  1163         -    if( addrOnce ) sqlite3VdbeJumpHere(v, addrOnce);
  1164   1196       addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak); VdbeCoverage(v);
  1165   1197       codeOffset(v, p->iOffset, addrContinue);
  1166         -    sqlite3VdbeAddOp3(v, OP_Column, iTab, nKey+1, regRow);
         1198  +    iSortTab = iTab;
         1199  +    p5 = 0;
         1200  +    bSeq = 1;
         1201  +  }
         1202  +  for(i=0; i<nSortData; i++){
         1203  +    sqlite3VdbeAddOp3(v, OP_Column, iSortTab, nKey+bSeq+i, regRow+i);
         1204  +    if( i==0 ) sqlite3VdbeChangeP5(v, p5);
         1205  +    VdbeComment((v, "%s", aOutEx[i].zName ? aOutEx[i].zName : aOutEx[i].zSpan));
  1167   1206     }
  1168   1207     switch( eDest ){
  1169   1208       case SRT_Table:
  1170   1209       case SRT_EphemTab: {
  1171   1210         testcase( eDest==SRT_Table );
  1172   1211         testcase( eDest==SRT_EphemTab );
  1173   1212         sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, regRowid);
................................................................................
  1188   1227         assert( nColumn==1 );
  1189   1228         sqlite3ExprCodeMove(pParse, regRow, iParm, 1);
  1190   1229         /* The LIMIT clause will terminate the loop for us */
  1191   1230         break;
  1192   1231       }
  1193   1232   #endif
  1194   1233       default: {
  1195         -      int i;
  1196   1234         assert( eDest==SRT_Output || eDest==SRT_Coroutine ); 
  1197   1235         testcase( eDest==SRT_Output );
  1198   1236         testcase( eDest==SRT_Coroutine );
  1199         -      for(i=0; i<nColumn; i++){
  1200         -        assert( regRow!=pDest->iSdst+i );
  1201         -        sqlite3VdbeAddOp3(v, OP_Column, pseudoTab, i, pDest->iSdst+i);
  1202         -        if( i==0 ){
  1203         -          sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE);
  1204         -        }
  1205         -      }
  1206   1237         if( eDest==SRT_Output ){
  1207   1238           sqlite3VdbeAddOp2(v, OP_ResultRow, pDest->iSdst, nColumn);
  1208   1239           sqlite3ExprCacheAffinityChange(pParse, pDest->iSdst, nColumn);
  1209   1240         }else{
  1210   1241           sqlite3VdbeAddOp1(v, OP_Yield, pDest->iSDParm);
  1211   1242         }
  1212   1243         break;
  1213   1244       }
  1214   1245     }
  1215         -  sqlite3ReleaseTempReg(pParse, regRow);
  1216         -  sqlite3ReleaseTempReg(pParse, regRowid);
  1217         -
         1246  +  if( regRowid ){
         1247  +    sqlite3ReleaseTempReg(pParse, regRow);
         1248  +    sqlite3ReleaseTempReg(pParse, regRowid);
         1249  +  }
  1218   1250     /* The bottom of the loop
  1219   1251     */
  1220   1252     sqlite3VdbeResolveLabel(v, addrContinue);
  1221   1253     if( pSort->sortFlags & SORTFLAG_UseSorter ){
  1222   1254       sqlite3VdbeAddOp2(v, OP_SorterNext, iTab, addr); VdbeCoverage(v);
  1223   1255     }else{
  1224   1256       sqlite3VdbeAddOp2(v, OP_Next, iTab, addr); VdbeCoverage(v);
................................................................................
  4751   4783     */
  4752   4784     if( sSort.pOrderBy ){
  4753   4785       KeyInfo *pKeyInfo;
  4754   4786       pKeyInfo = keyInfoFromExprList(pParse, sSort.pOrderBy, 0, 0);
  4755   4787       sSort.iECursor = pParse->nTab++;
  4756   4788       sSort.addrSortIndex =
  4757   4789         sqlite3VdbeAddOp4(v, OP_OpenEphemeral,
  4758         -                           sSort.iECursor, sSort.pOrderBy->nExpr+2, 0,
  4759         -                           (char*)pKeyInfo, P4_KEYINFO);
         4790  +          sSort.iECursor, sSort.pOrderBy->nExpr+1+pEList->nExpr, 0,
         4791  +          (char*)pKeyInfo, P4_KEYINFO
         4792  +      );
  4760   4793     }else{
  4761   4794       sSort.addrSortIndex = -1;
  4762   4795     }
  4763   4796   
  4764   4797     /* If the output is destined for a temporary table, open that table.
  4765   4798     */
  4766   4799     if( pDest->eDest==SRT_EphemTab ){
................................................................................
  4883   4916       ** SELECT statement.
  4884   4917       */
  4885   4918       memset(&sNC, 0, sizeof(sNC));
  4886   4919       sNC.pParse = pParse;
  4887   4920       sNC.pSrcList = pTabList;
  4888   4921       sNC.pAggInfo = &sAggInfo;
  4889   4922       sAggInfo.mnReg = pParse->nMem+1;
  4890         -    sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr+1 : 0;
         4923  +    sAggInfo.nSortingColumn = pGroupBy ? pGroupBy->nExpr : 0;
  4891   4924       sAggInfo.pGroupBy = pGroupBy;
  4892   4925       sqlite3ExprAnalyzeAggList(&sNC, pEList);
  4893   4926       sqlite3ExprAnalyzeAggList(&sNC, sSort.pOrderBy);
  4894   4927       if( pHaving ){
  4895   4928         sqlite3ExprAnalyzeAggregates(&sNC, pHaving);
  4896   4929       }
  4897   4930       sAggInfo.nAccumulator = sAggInfo.nColumn;
................................................................................
  4976   5009   
  4977   5010           explainTempTable(pParse, 
  4978   5011               (sDistinct.isTnct && (p->selFlags&SF_Distinct)==0) ?
  4979   5012                       "DISTINCT" : "GROUP BY");
  4980   5013   
  4981   5014           groupBySort = 1;
  4982   5015           nGroupBy = pGroupBy->nExpr;
  4983         -        nCol = nGroupBy + 1;
  4984         -        j = nGroupBy+1;
         5016  +        nCol = nGroupBy;
         5017  +        j = nGroupBy;
  4985   5018           for(i=0; i<sAggInfo.nColumn; i++){
  4986   5019             if( sAggInfo.aCol[i].iSorterColumn>=j ){
  4987   5020               nCol++;
  4988   5021               j++;
  4989   5022             }
  4990   5023           }
  4991   5024           regBase = sqlite3GetTempRange(pParse, nCol);
  4992   5025           sqlite3ExprCacheClear(pParse);
  4993   5026           sqlite3ExprCodeExprList(pParse, pGroupBy, regBase, 0);
  4994         -        sqlite3VdbeAddOp2(v, OP_Sequence, sAggInfo.sortingIdx,regBase+nGroupBy);
  4995         -        j = nGroupBy+1;
         5027  +        j = nGroupBy;
  4996   5028           for(i=0; i<sAggInfo.nColumn; i++){
  4997   5029             struct AggInfo_col *pCol = &sAggInfo.aCol[i];
  4998   5030             if( pCol->iSorterColumn>=j ){
  4999   5031               int r1 = j + regBase;
  5000   5032               int r2;
  5001   5033   
  5002   5034               r2 = sqlite3ExprCodeGetColumn(pParse, 

Changes to src/shell.c.

   458    458     int cnt;               /* Number of records displayed so far */
   459    459     FILE *out;             /* Write results here */
   460    460     FILE *traceOut;        /* Output for sqlite3_trace() */
   461    461     int nErr;              /* Number of errors seen */
   462    462     int mode;              /* An output mode setting */
   463    463     int writableSchema;    /* True if PRAGMA writable_schema=ON */
   464    464     int showHeader;        /* True to show column names in List or Column mode */
          465  +  unsigned shellFlgs;    /* Various flags */
   465    466     char *zDestTable;      /* Name of destination table when MODE_Insert */
   466    467     char separator[20];    /* Separator character for MODE_List */
   467    468     char newline[20];      /* Record separator in MODE_Csv */
   468    469     int colWidth[100];     /* Requested width of each column when in column mode*/
   469    470     int actualWidth[100];  /* Actual width of each column */
   470    471     char nullvalue[20];    /* The text to print when a NULL comes back from
   471    472                            ** the database */
................................................................................
   477    478     sqlite3_stmt *pStmt;   /* Current statement if any. */
   478    479     FILE *pLog;            /* Write log output here */
   479    480     int *aiIndent;         /* Array of indents used in MODE_Explain */
   480    481     int nIndent;           /* Size of array aiIndent[] */
   481    482     int iIndent;           /* Index of current op in aiIndent[] */
   482    483   };
   483    484   
          485  +/*
          486  +** These are the allowed shellFlgs values
          487  +*/
          488  +#define SHFLG_Scratch     0x00001     /* The --scratch option is used */
          489  +#define SHFLG_Pagecache   0x00002     /* The --pagecache option is used */
          490  +#define SHFLG_Lookaside   0x00004     /* Lookaside memory is used */
          491  +
   484    492   /*
   485    493   ** These are the allowed modes.
   486    494   */
   487    495   #define MODE_Line     0  /* One column per line.  Blank line between records */
   488    496   #define MODE_Column   1  /* One record per line in neat columns */
   489    497   #define MODE_List     2  /* One record per line with a separator */
   490    498   #define MODE_Semi     3  /* Same as MODE_List but append ";" to each line */
................................................................................
  1093   1101       
  1094   1102       iHiwtr = iCur = -1;
  1095   1103       sqlite3_status(SQLITE_STATUS_MEMORY_USED, &iCur, &iHiwtr, bReset);
  1096   1104       fprintf(pArg->out, "Memory Used:                         %d (max %d) bytes\n", iCur, iHiwtr);
  1097   1105       iHiwtr = iCur = -1;
  1098   1106       sqlite3_status(SQLITE_STATUS_MALLOC_COUNT, &iCur, &iHiwtr, bReset);
  1099   1107       fprintf(pArg->out, "Number of Outstanding Allocations:   %d (max %d)\n", iCur, iHiwtr);
  1100         -/*
  1101         -** Not currently used by the CLI.
  1102         -**    iHiwtr = iCur = -1;
  1103         -**    sqlite3_status(SQLITE_STATUS_PAGECACHE_USED, &iCur, &iHiwtr, bReset);
  1104         -**    fprintf(pArg->out, "Number of Pcache Pages Used:         %d (max %d) pages\n", iCur, iHiwtr);
  1105         -*/
         1108  +    if( pArg->shellFlgs & SHFLG_Pagecache ){
         1109  +      iHiwtr = iCur = -1;
         1110  +      sqlite3_status(SQLITE_STATUS_PAGECACHE_USED, &iCur, &iHiwtr, bReset);
         1111  +      fprintf(pArg->out, "Number of Pcache Pages Used:         %d (max %d) pages\n", iCur, iHiwtr);
         1112  +    }
  1106   1113       iHiwtr = iCur = -1;
  1107   1114       sqlite3_status(SQLITE_STATUS_PAGECACHE_OVERFLOW, &iCur, &iHiwtr, bReset);
  1108   1115       fprintf(pArg->out, "Number of Pcache Overflow Bytes:     %d (max %d) bytes\n", iCur, iHiwtr);
  1109         -/*
  1110         -** Not currently used by the CLI.
  1111         -**    iHiwtr = iCur = -1;
  1112         -**    sqlite3_status(SQLITE_STATUS_SCRATCH_USED, &iCur, &iHiwtr, bReset);
  1113         -**    fprintf(pArg->out, "Number of Scratch Allocations Used:  %d (max %d)\n", iCur, iHiwtr);
  1114         -*/
         1116  +    if( pArg->shellFlgs & SHFLG_Scratch ){
         1117  +      iHiwtr = iCur = -1;
         1118  +      sqlite3_status(SQLITE_STATUS_SCRATCH_USED, &iCur, &iHiwtr, bReset);
         1119  +      fprintf(pArg->out, "Number of Scratch Allocations Used:  %d (max %d)\n", iCur, iHiwtr);
         1120  +    }
  1115   1121       iHiwtr = iCur = -1;
  1116   1122       sqlite3_status(SQLITE_STATUS_SCRATCH_OVERFLOW, &iCur, &iHiwtr, bReset);
  1117   1123       fprintf(pArg->out, "Number of Scratch Overflow Bytes:    %d (max %d) bytes\n", iCur, iHiwtr);
  1118   1124       iHiwtr = iCur = -1;
  1119   1125       sqlite3_status(SQLITE_STATUS_MALLOC_SIZE, &iCur, &iHiwtr, bReset);
  1120   1126       fprintf(pArg->out, "Largest Allocation:                  %d bytes\n", iHiwtr);
  1121   1127       iHiwtr = iCur = -1;
................................................................................
  1128   1134       iHiwtr = iCur = -1;
  1129   1135       sqlite3_status(SQLITE_STATUS_PARSER_STACK, &iCur, &iHiwtr, bReset);
  1130   1136       fprintf(pArg->out, "Deepest Parser Stack:                %d (max %d)\n", iCur, iHiwtr);
  1131   1137   #endif
  1132   1138     }
  1133   1139   
  1134   1140     if( pArg && pArg->out && db ){
  1135         -    iHiwtr = iCur = -1;
  1136         -    sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_USED, &iCur, &iHiwtr, bReset);
  1137         -    fprintf(pArg->out, "Lookaside Slots Used:                %d (max %d)\n", iCur, iHiwtr);
  1138         -    sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_HIT, &iCur, &iHiwtr, bReset);
  1139         -    fprintf(pArg->out, "Successful lookaside attempts:       %d\n", iHiwtr);
  1140         -    sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE, &iCur, &iHiwtr, bReset);
  1141         -    fprintf(pArg->out, "Lookaside failures due to size:      %d\n", iHiwtr);
  1142         -    sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL, &iCur, &iHiwtr, bReset);
  1143         -    fprintf(pArg->out, "Lookaside failures due to OOM:       %d\n", iHiwtr);
         1141  +    if( pArg->shellFlgs & SHFLG_Lookaside ){
         1142  +      iHiwtr = iCur = -1;
         1143  +      sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_USED, &iCur, &iHiwtr, bReset);
         1144  +      fprintf(pArg->out, "Lookaside Slots Used:                %d (max %d)\n", iCur, iHiwtr);
         1145  +      sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_HIT, &iCur, &iHiwtr, bReset);
         1146  +      fprintf(pArg->out, "Successful lookaside attempts:       %d\n", iHiwtr);
         1147  +      sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE, &iCur, &iHiwtr, bReset);
         1148  +      fprintf(pArg->out, "Lookaside failures due to size:      %d\n", iHiwtr);
         1149  +      sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL, &iCur, &iHiwtr, bReset);
         1150  +      fprintf(pArg->out, "Lookaside failures due to OOM:       %d\n", iHiwtr);
         1151  +    }
  1144   1152       iHiwtr = iCur = -1;
  1145   1153       sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_USED, &iCur, &iHiwtr, bReset);
  1146   1154       fprintf(pArg->out, "Pager Heap Usage:                    %d bytes\n", iCur);    iHiwtr = iCur = -1;
  1147   1155       sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_HIT, &iCur, &iHiwtr, 1);
  1148   1156       fprintf(pArg->out, "Page cache hits:                     %d\n", iCur);
  1149   1157       iHiwtr = iCur = -1;
  1150   1158       sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_MISS, &iCur, &iHiwtr, 1);
................................................................................
  3775   3783     "   -heap SIZE           Size of heap for memsys3 or memsys5\n"
  3776   3784   #endif
  3777   3785     "   -help                show this message\n"
  3778   3786     "   -html                set output mode to HTML\n"
  3779   3787     "   -interactive         force interactive I/O\n"
  3780   3788     "   -line                set output mode to 'line'\n"
  3781   3789     "   -list                set output mode to 'list'\n"
         3790  +  "   -lookaside SIZE N    use N entries of SZ bytes for lookaside memory\n"
  3782   3791     "   -mmap N              default mmap size set to N\n"
  3783   3792   #ifdef SQLITE_ENABLE_MULTIPLEX
  3784   3793     "   -multiplex           enable the multiplexor VFS\n"
  3785   3794   #endif
  3786   3795     "   -newline SEP         set newline character(s) for CSV\n"
  3787   3796     "   -nullvalue TEXT      set text string for NULL values. Default ''\n"
         3797  +  "   -pagecache SIZE N    use N slots of SZ bytes each for page cache memory\n"
         3798  +  "   -scratch SIZE N      use N slots of SZ bytes each for scratch memory\n"
  3788   3799     "   -separator SEP       set output field separator. Default: '|'\n"
  3789   3800     "   -stats               print memory stats before each finalize\n"
  3790   3801     "   -version             show SQLite version\n"
  3791   3802     "   -vfs NAME            use NAME as the default VFS\n"
  3792   3803   #ifdef SQLITE_ENABLE_VFSTRACE
  3793   3804     "   -vfstrace            enable tracing of all VFS calls\n"
  3794   3805   #endif
................................................................................
  3811   3822   */
  3812   3823   static void main_init(ShellState *data) {
  3813   3824     memset(data, 0, sizeof(*data));
  3814   3825     data->mode = MODE_List;
  3815   3826     memcpy(data->separator,"|", 2);
  3816   3827     memcpy(data->newline,"\r\n", 3);
  3817   3828     data->showHeader = 0;
         3829  +  data->shellFlgs = SHFLG_Lookaside;
  3818   3830     sqlite3_config(SQLITE_CONFIG_URI, 1);
  3819   3831     sqlite3_config(SQLITE_CONFIG_LOG, shellLog, data);
         3832  +  sqlite3_config(SQLITE_CONFIG_MULTITHREAD);
  3820   3833     sqlite3_snprintf(sizeof(mainPrompt), mainPrompt,"sqlite> ");
  3821   3834     sqlite3_snprintf(sizeof(continuePrompt), continuePrompt,"   ...> ");
  3822         -  sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
  3823   3835   }
  3824   3836   
  3825   3837   /*
  3826   3838   ** Output text to the console in a font that attracts extra attention.
  3827   3839   */
  3828   3840   #ifdef _WIN32
  3829   3841   static void printBold(const char *zText){
................................................................................
  3924   3936         sqlite3_int64 szHeap;
  3925   3937   
  3926   3938         zSize = cmdline_option_value(argc, argv, ++i);
  3927   3939         szHeap = integerValue(zSize);
  3928   3940         if( szHeap>0x7fff0000 ) szHeap = 0x7fff0000;
  3929   3941         sqlite3_config(SQLITE_CONFIG_HEAP, malloc((int)szHeap), (int)szHeap, 64);
  3930   3942   #endif
         3943  +    }else if( strcmp(z,"-scratch")==0 ){
         3944  +      int n, sz;
         3945  +      sz = (int)integerValue(cmdline_option_value(argc,argv,++i));
         3946  +      if( sz>400000 ) sz = 400000;
         3947  +      if( sz<2500 ) sz = 2500;
         3948  +      n = (int)integerValue(cmdline_option_value(argc,argv,++i));
         3949  +      if( n>10 ) n = 10;
         3950  +      if( n<1 ) n = 1;
         3951  +      sqlite3_config(SQLITE_CONFIG_SCRATCH, malloc(n*sz+1), sz, n);
         3952  +      data.shellFlgs |= SHFLG_Scratch;
         3953  +    }else if( strcmp(z,"-pagecache")==0 ){
         3954  +      int n, sz;
         3955  +      sz = (int)integerValue(cmdline_option_value(argc,argv,++i));
         3956  +      if( sz>70000 ) sz = 70000;
         3957  +      if( sz<800 ) sz = 800;
         3958  +      n = (int)integerValue(cmdline_option_value(argc,argv,++i));
         3959  +      if( n<10 ) n = 10;
         3960  +      sqlite3_config(SQLITE_CONFIG_PAGECACHE, malloc(n*sz+1), sz, n);
         3961  +      data.shellFlgs |= SHFLG_Pagecache;
         3962  +    }else if( strcmp(z,"-lookaside")==0 ){
         3963  +      int n, sz;
         3964  +      sz = (int)integerValue(cmdline_option_value(argc,argv,++i));
         3965  +      if( sz<0 ) sz = 0;
         3966  +      n = (int)integerValue(cmdline_option_value(argc,argv,++i));
         3967  +      if( n<0 ) n = 0;
         3968  +      sqlite3_config(SQLITE_CONFIG_LOOKASIDE, sz, n);
         3969  +      if( sz*n==0 ) data.shellFlgs &= ~SHFLG_Lookaside;
  3931   3970   #ifdef SQLITE_ENABLE_VFSTRACE
  3932   3971       }else if( strcmp(z,"-vfstrace")==0 ){
  3933   3972         extern int vfstrace_register(
  3934   3973            const char *zTraceName,
  3935   3974            const char *zOldVfsName,
  3936   3975            int (*xOut)(const char*,void*),
  3937   3976            void *pOutArg,
................................................................................
  4039   4078         return 0;
  4040   4079       }else if( strcmp(z,"-interactive")==0 ){
  4041   4080         stdin_is_interactive = 1;
  4042   4081       }else if( strcmp(z,"-batch")==0 ){
  4043   4082         stdin_is_interactive = 0;
  4044   4083       }else if( strcmp(z,"-heap")==0 ){
  4045   4084         i++;
         4085  +    }else if( strcmp(z,"-scratch")==0 ){
         4086  +      i+=2;
         4087  +    }else if( strcmp(z,"-pagecache")==0 ){
         4088  +      i+=2;
         4089  +    }else if( strcmp(z,"-lookaside")==0 ){
         4090  +      i+=2;
  4046   4091       }else if( strcmp(z,"-mmap")==0 ){
  4047   4092         i++;
  4048   4093       }else if( strcmp(z,"-vfs")==0 ){
  4049   4094         i++;
  4050   4095   #ifdef SQLITE_ENABLE_VFSTRACE
  4051   4096       }else if( strcmp(z,"-vfstrace")==0 ){
  4052   4097         i++;

Changes to src/sqlite.h.in.

  3074   3074   **
  3075   3075   ** [[SQLITE_LIMIT_VARIABLE_NUMBER]]
  3076   3076   ** ^(<dt>SQLITE_LIMIT_VARIABLE_NUMBER</dt>
  3077   3077   ** <dd>The maximum index number of any [parameter] in an SQL statement.)^
  3078   3078   **
  3079   3079   ** [[SQLITE_LIMIT_TRIGGER_DEPTH]] ^(<dt>SQLITE_LIMIT_TRIGGER_DEPTH</dt>
  3080   3080   ** <dd>The maximum depth of recursion for triggers.</dd>)^
         3081  +**
         3082  +** [[SQLITE_LIMIT_WORKER_THREADS]] ^(<dt>SQLITE_LIMIT_WORKER_THREADS</dt>
         3083  +** <dd>The maximum number of auxiliary worker threads that a single
         3084  +** [prepared statement] may start.</dd>)^
  3081   3085   ** </dl>
  3082   3086   */
  3083   3087   #define SQLITE_LIMIT_LENGTH                    0
  3084   3088   #define SQLITE_LIMIT_SQL_LENGTH                1
  3085   3089   #define SQLITE_LIMIT_COLUMN                    2
  3086   3090   #define SQLITE_LIMIT_EXPR_DEPTH                3
  3087   3091   #define SQLITE_LIMIT_COMPOUND_SELECT           4
  3088   3092   #define SQLITE_LIMIT_VDBE_OP                   5
  3089   3093   #define SQLITE_LIMIT_FUNCTION_ARG              6
  3090   3094   #define SQLITE_LIMIT_ATTACHED                  7
  3091   3095   #define SQLITE_LIMIT_LIKE_PATTERN_LENGTH       8
  3092   3096   #define SQLITE_LIMIT_VARIABLE_NUMBER           9
  3093   3097   #define SQLITE_LIMIT_TRIGGER_DEPTH            10
         3098  +#define SQLITE_LIMIT_WORKER_THREADS           11
  3094   3099   
  3095   3100   /*
  3096   3101   ** CAPI3REF: Compiling An SQL Statement
  3097   3102   ** KEYWORDS: {SQL statement compiler}
  3098   3103   **
  3099   3104   ** To execute an SQL query, it must first be compiled into a byte-code
  3100   3105   ** program using one of these routines.
................................................................................
  6166   6171   #define SQLITE_TESTCTRL_SCRATCHMALLOC           17
  6167   6172   #define SQLITE_TESTCTRL_LOCALTIME_FAULT         18
  6168   6173   #define SQLITE_TESTCTRL_EXPLAIN_STMT            19
  6169   6174   #define SQLITE_TESTCTRL_NEVER_CORRUPT           20
  6170   6175   #define SQLITE_TESTCTRL_VDBE_COVERAGE           21
  6171   6176   #define SQLITE_TESTCTRL_BYTEORDER               22
  6172   6177   #define SQLITE_TESTCTRL_ISINIT                  23
  6173         -#define SQLITE_TESTCTRL_LAST                    23
         6178  +#define SQLITE_TESTCTRL_SORTER_MMAP             24
         6179  +#define SQLITE_TESTCTRL_LAST                    24
  6174   6180   
  6175   6181   /*
  6176   6182   ** CAPI3REF: SQLite Runtime Status
  6177   6183   **
  6178   6184   ** ^This interface is used to retrieve runtime status information
  6179   6185   ** about the performance of SQLite, and optionally to reset various
  6180   6186   ** highwater marks.  ^The first argument is an integer code for

Changes to src/sqliteInt.h.

   429    429   ** Provide a default value for SQLITE_TEMP_STORE in case it is not specified
   430    430   ** on the command-line
   431    431   */
   432    432   #ifndef SQLITE_TEMP_STORE
   433    433   # define SQLITE_TEMP_STORE 1
   434    434   # define SQLITE_TEMP_STORE_xc 1  /* Exclude from ctime.c */
   435    435   #endif
          436  +
          437  +/*
          438  +** If no value has been provided for SQLITE_MAX_WORKER_THREADS, or if
          439  +** SQLITE_TEMP_STORE is set to 3 (never use temporary files), set it 
          440  +** to zero.
          441  +*/
          442  +#if SQLITE_TEMP_STORE==3 || SQLITE_THREADSAFE==0
          443  +# undef SQLITE_MAX_WORKER_THREADS
          444  +# define SQLITE_MAX_WORKER_THREADS 0
          445  +#endif
          446  +#ifndef SQLITE_MAX_WORKER_THREADS
          447  +# define SQLITE_MAX_WORKER_THREADS 8
          448  +#endif
          449  +#ifndef SQLITE_DEFAULT_WORKER_THREADS
          450  +# define SQLITE_DEFAULT_WORKER_THREADS 0
          451  +#endif
          452  +#if SQLITE_DEFAULT_WORKER_THREADS>SQLITE_MAX_WORKER_THREADS
          453  +# undef SQLITE_MAX_WORKER_THREADS
          454  +# define SQLITE_MAX_WORKER_THREADS SQLITE_DEFAULT_WORKER_THREADS
          455  +#endif
          456  +
   436    457   
   437    458   /*
   438    459   ** GCC does not define the offsetof() macro so we'll have to do it
   439    460   ** ourselves.
   440    461   */
   441    462   #ifndef offsetof
   442    463   #define offsetof(STRUCTURE,FIELD) ((int)((char*)&((STRUCTURE*)0)->FIELD))
................................................................................
   813    834   typedef struct Module Module;
   814    835   typedef struct NameContext NameContext;
   815    836   typedef struct Parse Parse;
   816    837   typedef struct PrintfArguments PrintfArguments;
   817    838   typedef struct RowSet RowSet;
   818    839   typedef struct Savepoint Savepoint;
   819    840   typedef struct Select Select;
          841  +typedef struct SQLiteThread SQLiteThread;
   820    842   typedef struct SelectDest SelectDest;
   821    843   typedef struct SrcList SrcList;
   822    844   typedef struct StrAccum StrAccum;
   823    845   typedef struct Table Table;
   824    846   typedef struct TableLock TableLock;
   825    847   typedef struct Token Token;
   826    848   typedef struct Trigger Trigger;
................................................................................
   925    947   #define DB_UnresetViews    0x0002  /* Some views have defined column names */
   926    948   #define DB_Empty           0x0004  /* The file is empty (length 0 bytes) */
   927    949   
   928    950   /*
   929    951   ** The number of different kinds of things that can be limited
   930    952   ** using the sqlite3_limit() interface.
   931    953   */
   932         -#define SQLITE_N_LIMIT (SQLITE_LIMIT_TRIGGER_DEPTH+1)
          954  +#define SQLITE_N_LIMIT (SQLITE_LIMIT_WORKER_THREADS+1)
   933    955   
   934    956   /*
   935    957   ** Lookaside malloc is a set of fixed-size buffers that can be used
   936    958   ** to satisfy small transient memory allocation requests for objects
   937    959   ** associated with a particular database connection.  The use of
   938    960   ** lookaside malloc provides a significant performance enhancement
   939    961   ** (approx 10%) by avoiding numerous malloc/free requests while parsing
................................................................................
  1002   1024     u8 vtabOnConflict;            /* Value to return for s3_vtab_on_conflict() */
  1003   1025     u8 isTransactionSavepoint;    /* True if the outermost savepoint is a TS */
  1004   1026     int nextPagesize;             /* Pagesize after VACUUM if >0 */
  1005   1027     u32 magic;                    /* Magic number for detect library misuse */
  1006   1028     int nChange;                  /* Value returned by sqlite3_changes() */
  1007   1029     int nTotalChange;             /* Value returned by sqlite3_total_changes() */
  1008   1030     int aLimit[SQLITE_N_LIMIT];   /* Limits */
         1031  +  int nMaxSorterMmap;           /* Maximum size of regions mapped by sorter */
  1009   1032     struct sqlite3InitInfo {      /* Information used during initialization */
  1010   1033       int newTnum;                /* Rootpage of table being initialized */
  1011   1034       u8 iDb;                     /* Which db file is being initialized */
  1012   1035       u8 busy;                    /* TRUE if currently initializing */
  1013   1036       u8 orphanTrigger;           /* Last statement is orphaned TEMP trigger */
  1014   1037     } init;
  1015   1038     int nVdbeActive;              /* Number of VDBEs currently running */
................................................................................
  1665   1688   ** The r1 and r2 member variables are only used by the optimized comparison
  1666   1689   ** functions vdbeRecordCompareInt() and vdbeRecordCompareString().
  1667   1690   */
  1668   1691   struct UnpackedRecord {
  1669   1692     KeyInfo *pKeyInfo;  /* Collation and sort-order information */
  1670   1693     u16 nField;         /* Number of entries in apMem[] */
  1671   1694     i8 default_rc;      /* Comparison result if keys are equal */
  1672         -  u8 isCorrupt;       /* Corruption detected by xRecordCompare() */
         1695  +  u8 errCode;         /* Error detected by xRecordCompare (CORRUPT or NOMEM) */
  1673   1696     Mem *aMem;          /* Values */
  1674   1697     int r1;             /* Value to return if (lhs > rhs) */
  1675   1698     int r2;             /* Value to return if (rhs < lhs) */
  1676   1699   };
  1677   1700   
  1678   1701   
  1679   1702   /*
................................................................................
  3709   3732   #endif
  3710   3733   #define MEMTYPE_HEAP       0x01  /* General heap allocations */
  3711   3734   #define MEMTYPE_LOOKASIDE  0x02  /* Might have been lookaside memory */
  3712   3735   #define MEMTYPE_SCRATCH    0x04  /* Scratch allocations */
  3713   3736   #define MEMTYPE_PCACHE     0x08  /* Page cache allocations */
  3714   3737   #define MEMTYPE_DB         0x10  /* Uses sqlite3DbMalloc, not sqlite_malloc */
  3715   3738   
  3716         -
  3717   3739   #if (SQLITE_ENABLE_APPLE_SPI>0) && defined(__APPLE__)
  3718   3740   
  3719   3741   /*
  3720   3742   ** An instance of the following structure is used to hold the process ID
  3721   3743   ** and return-by-reference lockstate value.  The SQLITE_FCNTL_LOCKSTATE_PID
  3722   3744   ** requires the 4th argument to sqlite3_file_control to be a pointer to an
  3723   3745   ** instance of LockstatePID initialized with a LockstatePID.pid value equal
................................................................................
  3732   3754   */
  3733   3755   typedef struct LockstatePID LockstatePID;
  3734   3756   struct LockstatePID {
  3735   3757     pid_t pid;                 /* Process ID to test */
  3736   3758     int state;                 /* The state of the lock (return value) */
  3737   3759   };
  3738   3760   
         3761  +#endif /* (SQLITE_ENABLE_APPLE_SPI>0) && defined(__APPLE__) */
         3762  +
         3763  +/*
         3764  +** Threading interface
         3765  +*/
         3766  +#if SQLITE_MAX_WORKER_THREADS>0
         3767  +int sqlite3ThreadCreate(SQLiteThread**,void*(*)(void*),void*);
         3768  +int sqlite3ThreadJoin(SQLiteThread*, void**);
  3739   3769   #endif
  3740   3770   
  3741   3771   #endif /* _SQLITEINT_H_ */

Changes to src/test1.c.

  2712   2712     return TCL_OK;
  2713   2713   
  2714   2714   bad_args:
  2715   2715     Tcl_AppendResult(interp, "wrong # args: should be \"",
  2716   2716         Tcl_GetStringFromObj(objv[0], 0), " <DB> <utf8> <utf16le> <utf16be>", 0);
  2717   2717     return TCL_ERROR;
  2718   2718   }
         2719  +
         2720  +/*
         2721  +** Usage: add_test_utf16bin_collate <db ptr>
         2722  +**
         2723  +** Add a utf-16 collation sequence named "utf16bin" to the database
         2724  +** handle. This collation sequence compares arguments in the same way as the
         2725  +** built-in collation "binary".
         2726  +*/
         2727  +static int test_utf16bin_collate_func(
         2728  +  void *pCtx, 
         2729  +  int nA, const void *zA,
         2730  +  int nB, const void *zB
         2731  +){
         2732  +  int nCmp = (nA>nB ? nB : nA);
         2733  +  int res = memcmp(zA, zB, nCmp);
         2734  +  if( res==0 ) res = nA - nB;
         2735  +  return res;
         2736  +}
         2737  +static int test_utf16bin_collate(
         2738  +  void * clientData,
         2739  +  Tcl_Interp *interp,
         2740  +  int objc,
         2741  +  Tcl_Obj *CONST objv[]
         2742  +){
         2743  +  sqlite3 *db;
         2744  +  int rc;
         2745  +
         2746  +  if( objc!=2 ) goto bad_args;
         2747  +  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
         2748  +
         2749  +  rc = sqlite3_create_collation(db, "utf16bin", SQLITE_UTF16, 0, 
         2750  +      test_utf16bin_collate_func
         2751  +  );
         2752  +  if( sqlite3TestErrCode(interp, db, rc) ) return TCL_ERROR;
         2753  +  return TCL_OK;
         2754  +
         2755  +bad_args:
         2756  +  Tcl_WrongNumArgs(interp, 1, objv, "DB");
         2757  +  return TCL_ERROR;
         2758  +}
  2719   2759   
  2720   2760   /*
  2721   2761   ** When the collation needed callback is invoked, record the name of 
  2722   2762   ** the requested collating function here.  The recorded name is linked
  2723   2763   ** to a TCL variable and used to make sure that the requested collation
  2724   2764   ** name is correct.
  2725   2765   */
................................................................................
  6083   6123     Tcl_Obj *CONST objv[]
  6084   6124   ){
  6085   6125     struct Verb {
  6086   6126       const char *zName;
  6087   6127       int i;
  6088   6128     } aVerb[] = {
  6089   6129       { "SQLITE_TESTCTRL_LOCALTIME_FAULT", SQLITE_TESTCTRL_LOCALTIME_FAULT }, 
         6130  +    { "SQLITE_TESTCTRL_SORTER_MMAP", SQLITE_TESTCTRL_SORTER_MMAP }, 
  6090   6131     };
  6091   6132     int iVerb;
  6092   6133     int iFlag;
  6093   6134     int rc;
  6094   6135   
  6095   6136     if( objc<2 ){
  6096   6137       Tcl_WrongNumArgs(interp, 1, objv, "VERB ARGS...");
................................................................................
  6110   6151           Tcl_WrongNumArgs(interp, 2, objv, "ONOFF");
  6111   6152           return TCL_ERROR;
  6112   6153         }
  6113   6154         if( Tcl_GetBooleanFromObj(interp, objv[2], &val) ) return TCL_ERROR;
  6114   6155         sqlite3_test_control(SQLITE_TESTCTRL_LOCALTIME_FAULT, val);
  6115   6156         break;
  6116   6157       }
         6158  +
         6159  +    case SQLITE_TESTCTRL_SORTER_MMAP: {
         6160  +      int val;
         6161  +      sqlite3 *db;
         6162  +      if( objc!=4 ){
         6163  +        Tcl_WrongNumArgs(interp, 2, objv, "DB LIMIT");
         6164  +        return TCL_ERROR;
         6165  +      }
         6166  +      if( getDbPointer(interp, Tcl_GetString(objv[2]), &db) ) return TCL_ERROR;
         6167  +      if( Tcl_GetIntFromObj(interp, objv[3], &val) ) return TCL_ERROR;
         6168  +      sqlite3_test_control(SQLITE_TESTCTRL_SORTER_MMAP, db, val);
         6169  +      break;
         6170  +    }
  6117   6171     }
  6118   6172   
  6119   6173     Tcl_ResetResult(interp);
  6120   6174     return TCL_OK;
  6121   6175   }
  6122   6176   
  6123   6177   #if SQLITE_OS_UNIX
................................................................................
  6523   6577         sqlite3_free(zErrMsg);
  6524   6578         return TCL_ERROR;
  6525   6579       }
  6526   6580     }
  6527   6581     return TCL_OK;
  6528   6582   }
  6529   6583   
         6584  +/*
         6585  +**     sorter_test_fakeheap BOOL
         6586  +**
         6587  +*/
         6588  +static int sorter_test_fakeheap(
         6589  +  void * clientData,
         6590  +  Tcl_Interp *interp,
         6591  +  int objc,
         6592  +  Tcl_Obj *CONST objv[]
         6593  +){
         6594  +  int bArg;
         6595  +  if( objc!=2 ){
         6596  +    Tcl_WrongNumArgs(interp, 1, objv, "BOOL");
         6597  +    return TCL_ERROR;
         6598  +  }
         6599  +
         6600  +  if( Tcl_GetBooleanFromObj(interp, objv[1], &bArg) ){
         6601  +    return TCL_ERROR;
         6602  +  }
         6603  +
         6604  +  if( bArg ){
         6605  +    if( sqlite3GlobalConfig.pHeap==0 ){
         6606  +      sqlite3GlobalConfig.pHeap = SQLITE_INT_TO_PTR(-1);
         6607  +    }
         6608  +  }else{
         6609  +    if( sqlite3GlobalConfig.pHeap==SQLITE_INT_TO_PTR(-1) ){
         6610  +      sqlite3GlobalConfig.pHeap = 0;
         6611  +    }
         6612  +  }
         6613  +
         6614  +  Tcl_ResetResult(interp);
         6615  +  return TCL_OK;
         6616  +}
         6617  +
         6618  +/*
         6619  +**     sorter_test_sort4_helper DB SQL1 NSTEP SQL2
         6620  +**
         6621  +** Compile SQL statement $SQL1 and step it $NSTEP times. For each row, 
         6622  +** check that the leftmost and rightmost columns returned are both integers,
         6623  +** and that both contain the same value.
         6624  +**
         6625  +** Then execute statement $SQL2. Check that the statement returns the same
         6626  +** set of integers in the same order as in the previous step (using $SQL1).
         6627  +*/
         6628  +static int sorter_test_sort4_helper(
         6629  +  void * clientData,
         6630  +  Tcl_Interp *interp,
         6631  +  int objc,
         6632  +  Tcl_Obj *CONST objv[]
         6633  +){
         6634  +  const char *zSql1;
         6635  +  const char *zSql2;
         6636  +  int nStep; 
         6637  +  int iStep; 
         6638  +  int iCksum1 = 0; 
         6639  +  int iCksum2 = 0; 
         6640  +  int rc;
         6641  +  int iB;
         6642  +  sqlite3 *db;
         6643  +  sqlite3_stmt *pStmt;
         6644  +  
         6645  +  if( objc!=5 ){
         6646  +    Tcl_WrongNumArgs(interp, 1, objv, "DB SQL1 NSTEP SQL2");
         6647  +    return TCL_ERROR;
         6648  +  }
         6649  +
         6650  +  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;
         6651  +  zSql1 = Tcl_GetString(objv[2]);
         6652  +  if( Tcl_GetIntFromObj(interp, objv[3], &nStep) ) return TCL_ERROR;
         6653  +  zSql2 = Tcl_GetString(objv[4]);
         6654  +
         6655  +  rc = sqlite3_prepare_v2(db, zSql1, -1, &pStmt, 0);
         6656  +  if( rc!=SQLITE_OK ) goto sql_error;
         6657  +
         6658  +  iB = sqlite3_column_count(pStmt)-1;
         6659  +  for(iStep=0; iStep<nStep && SQLITE_ROW==sqlite3_step(pStmt); iStep++){
         6660  +    int a = sqlite3_column_int(pStmt, 0);
         6661  +    if( a!=sqlite3_column_int(pStmt, iB) ){
         6662  +      Tcl_AppendResult(interp, "data error: (a!=b)", 0);
         6663  +      return TCL_ERROR;
         6664  +    }
         6665  +
         6666  +    iCksum1 += (iCksum1 << 3) + a;
         6667  +  }
         6668  +  rc = sqlite3_finalize(pStmt);
         6669  +  if( rc!=SQLITE_OK ) goto sql_error;
         6670  +
         6671  +  rc = sqlite3_prepare_v2(db, zSql2, -1, &pStmt, 0);
         6672  +  if( rc!=SQLITE_OK ) goto sql_error;
         6673  +  for(iStep=0; SQLITE_ROW==sqlite3_step(pStmt); iStep++){
         6674  +    int a = sqlite3_column_int(pStmt, 0);
         6675  +    iCksum2 += (iCksum2 << 3) + a;
         6676  +  }
         6677  +  rc = sqlite3_finalize(pStmt);
         6678  +  if( rc!=SQLITE_OK ) goto sql_error;
         6679  +
         6680  +  if( iCksum1!=iCksum2 ){
         6681  +    Tcl_AppendResult(interp, "checksum mismatch", 0);
         6682  +    return TCL_ERROR;
         6683  +  }
         6684  +
         6685  +  return TCL_OK;
         6686  + sql_error:
         6687  +  Tcl_AppendResult(interp, "sql error: ", sqlite3_errmsg(db), 0);
         6688  +  return TCL_ERROR;
         6689  +}
         6690  +
  6530   6691   
  6531   6692   /*
  6532   6693   ** Register commands with the TCL interpreter.
  6533   6694   */
  6534   6695   int Sqlitetest1_Init(Tcl_Interp *interp){
  6535   6696     extern int sqlite3_search_count;
  6536   6697     extern int sqlite3_found_count;
................................................................................
  6732   6893        { "path_is_dos",                path_is_dos,  0   },
  6733   6894   
  6734   6895        /* Functions from os.h */
  6735   6896   #ifndef SQLITE_OMIT_UTF16
  6736   6897        { "add_test_collate",        test_collate, 0            },
  6737   6898        { "add_test_collate_needed", test_collate_needed, 0     },
  6738   6899        { "add_test_function",       test_function, 0           },
         6900  +     { "add_test_utf16bin_collate",    test_utf16bin_collate, 0        },
  6739   6901   #endif
  6740   6902        { "sqlite3_test_errstr",     test_errstr, 0             },
  6741   6903        { "tcl_variable_type",       tcl_variable_type, 0       },
  6742   6904   #ifndef SQLITE_OMIT_SHARED_CACHE
  6743   6905        { "sqlite3_enable_shared_cache", test_enable_shared, 0  },
  6744   6906        { "sqlite3_shared_cache_report", sqlite3BtreeSharedCacheReport, 0},
  6745   6907   #endif
................................................................................
  6765   6927        { "print_explain_query_plan", test_print_eqp, 0  },
  6766   6928   #endif
  6767   6929        { "sqlite3_test_control", test_test_control },
  6768   6930   #if SQLITE_OS_UNIX
  6769   6931        { "getrusage", test_getrusage },
  6770   6932   #endif
  6771   6933        { "load_static_extension", tclLoadStaticExtensionCmd },
         6934  +     { "sorter_test_fakeheap", sorter_test_fakeheap },
         6935  +     { "sorter_test_sort4_helper", sorter_test_sort4_helper },
  6772   6936     };
  6773   6937     static int bitmask_size = sizeof(Bitmask)*8;
  6774   6938     int i;
  6775   6939     extern int sqlite3_sync_count, sqlite3_fullsync_count;
  6776   6940     extern int sqlite3_opentemp_count;
  6777   6941     extern int sqlite3_like_count;
  6778   6942     extern int sqlite3_xferopt_count;

Changes to src/test_config.c.

    98     98   #endif
    99     99   
   100    100   #if SQLITE_MAX_MMAP_SIZE>0
   101    101     Tcl_SetVar2(interp, "sqlite_options", "mmap", "1", TCL_GLOBAL_ONLY);
   102    102   #else
   103    103     Tcl_SetVar2(interp, "sqlite_options", "mmap", "0", TCL_GLOBAL_ONLY);
   104    104   #endif
          105  +
          106  +  Tcl_SetVar2(interp, "sqlite_options", "worker_threads", 
          107  +      STRINGVALUE(SQLITE_MAX_WORKER_THREADS), TCL_GLOBAL_ONLY
          108  +  );
   105    109   
   106    110   #if 1 /* def SQLITE_MEMDEBUG */
   107    111     Tcl_SetVar2(interp, "sqlite_options", "memdebug", "1", TCL_GLOBAL_ONLY);
   108    112   #else
   109    113     Tcl_SetVar2(interp, "sqlite_options", "memdebug", "0", TCL_GLOBAL_ONLY);
   110    114   #endif
   111    115   

Changes to src/test_malloc.c.

  1248   1248     }
  1249   1249   
  1250   1250     rc = sqlite3_config(SQLITE_CONFIG_COVERING_INDEX_SCAN, bUseCis);
  1251   1251     Tcl_SetResult(interp, (char *)sqlite3ErrName(rc), TCL_VOLATILE);
  1252   1252   
  1253   1253     return TCL_OK;
  1254   1254   }
         1255  +
  1255   1256   
  1256   1257   /*
  1257   1258   ** Usage:    sqlite3_dump_memsys3  FILENAME
  1258   1259   **           sqlite3_dump_memsys5  FILENAME
  1259   1260   **
  1260   1261   ** Write a summary of unfreed memsys3 allocations to FILENAME.
  1261   1262   */

Added src/threads.c.

            1  +/*
            2  +** 2012 July 21
            3  +**
            4  +** The author disclaims copyright to this source code.  In place of
            5  +** a legal notice, here is a blessing:
            6  +**
            7  +**    May you do good and not evil.
            8  +**    May you find forgiveness for yourself and forgive others.
            9  +**    May you share freely, never taking more than you give.
           10  +**
           11  +******************************************************************************
           12  +**
           13  +** This file presents a simple cross-platform threading interface for
           14  +** use internally by SQLite.
           15  +**
           16  +** A "thread" can be created using sqlite3ThreadCreate().  This thread
           17  +** runs independently of its creator until it is joined using
           18  +** sqlite3ThreadJoin(), at which point it terminates.
           19  +**
           20  +** Threads do not have to be real.  It could be that the work of the
           21  +** "thread" is done by the main thread at either the sqlite3ThreadCreate()
           22  +** or sqlite3ThreadJoin() call.  This is, in fact, what happens in
           23  +** single threaded systems.  Nothing in SQLite requires multiple threads.
           24  +** This interface exists so that applications that want to take advantage
           25  +** of multiple cores can do so, while also allowing applications to stay
           26  +** single-threaded if desired.
           27  +*/
           28  +#include "sqliteInt.h"
           29  +
           30  +#if SQLITE_MAX_WORKER_THREADS>0
           31  +
           32  +/********************************* Unix Pthreads ****************************/
           33  +#if SQLITE_OS_UNIX && defined(SQLITE_MUTEX_PTHREADS) && SQLITE_THREADSAFE>0
           34  +
           35  +#define SQLITE_THREADS_IMPLEMENTED 1  /* Prevent the single-thread code below */
           36  +#include <pthread.h>
           37  +
           38  +/* A running thread */
           39  +struct SQLiteThread {
           40  +  pthread_t tid;                 /* Thread ID */
           41  +  int done;                      /* Set to true when thread finishes */
           42  +  void *pOut;                    /* Result returned by the thread */
           43  +  void *(*xTask)(void*);         /* The thread routine */
           44  +  void *pIn;                     /* Argument to the thread */
           45  +};
           46  +
           47  +/* Create a new thread */
           48  +int sqlite3ThreadCreate(
           49  +  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
           50  +  void *(*xTask)(void*),    /* Routine to run in a separate thread */
           51  +  void *pIn                 /* Argument passed into xTask() */
           52  +){
           53  +  SQLiteThread *p;
           54  +  int rc;
           55  +
           56  +  assert( ppThread!=0 );
           57  +  assert( xTask!=0 );
           58  +  /* This routine is never used in single-threaded mode */
           59  +  assert( sqlite3GlobalConfig.bCoreMutex!=0 );
           60  +
           61  +  *ppThread = 0;
           62  +  p = sqlite3Malloc(sizeof(*p));
           63  +  if( p==0 ) return SQLITE_NOMEM;
           64  +  memset(p, 0, sizeof(*p));
           65  +  p->xTask = xTask;
           66  +  p->pIn = pIn;
           67  +  if( sqlite3FaultSim(200) ){
           68  +    rc = 1;
           69  +  }else{    
           70  +    rc = pthread_create(&p->tid, 0, xTask, pIn);
           71  +  }
           72  +  if( rc ){
           73  +    p->done = 1;
           74  +    p->pOut = xTask(pIn);
           75  +  }
           76  +  *ppThread = p;
           77  +  return SQLITE_OK;
           78  +}
           79  +
           80  +/* Get the results of the thread */
           81  +int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
           82  +  int rc;
           83  +
           84  +  assert( ppOut!=0 );
           85  +  if( NEVER(p==0) ) return SQLITE_NOMEM;
           86  +  if( p->done ){
           87  +    *ppOut = p->pOut;
           88  +    rc = SQLITE_OK;
           89  +  }else{
           90  +    rc = pthread_join(p->tid, ppOut) ? SQLITE_ERROR : SQLITE_OK;
           91  +  }
           92  +  sqlite3_free(p);
           93  +  return rc;
           94  +}
           95  +
           96  +#endif /* SQLITE_OS_UNIX && defined(SQLITE_MUTEX_PTHREADS) */
           97  +/******************************** End Unix Pthreads *************************/
           98  +
           99  +
          100  +/********************************* Win32 Threads ****************************/
          101  +#if SQLITE_OS_WIN && !SQLITE_OS_WINRT && SQLITE_THREADSAFE>0
          102  +
          103  +#define SQLITE_THREADS_IMPLEMENTED 1  /* Prevent the single-thread code below */
          104  +#include <process.h>
          105  +
          106  +/* A running thread */
          107  +struct SQLiteThread {
          108  +  uintptr_t tid;           /* The thread handle */
          109  +  unsigned id;             /* The thread identifier */
          110  +  void *(*xTask)(void*);   /* The routine to run as a thread */
          111  +  void *pIn;               /* Argument to xTask */
          112  +  void *pResult;           /* Result of xTask */
          113  +};
          114  +
          115  +/* Thread procedure Win32 compatibility shim */
          116  +static unsigned __stdcall sqlite3ThreadProc(
          117  +  void *pArg  /* IN: Pointer to the SQLiteThread structure */
          118  +){
          119  +  SQLiteThread *p = (SQLiteThread *)pArg;
          120  +
          121  +  assert( p!=0 );
          122  +#if 0
          123  +  /*
          124  +  ** This assert appears to trigger spuriously on certain
          125  +  ** versions of Windows, possibly due to _beginthreadex()
          126  +  ** and/or CreateThread() not fully setting their thread
          127  +  ** ID parameter before starting the thread.
          128  +  */
          129  +  assert( p->id==GetCurrentThreadId() );
          130  +#endif
          131  +  assert( p->xTask!=0 );
          132  +  p->pResult = p->xTask(p->pIn);
          133  +
          134  +  _endthreadex(0);
          135  +  return 0; /* NOT REACHED */
          136  +}
          137  +
          138  +/* Create a new thread */
          139  +int sqlite3ThreadCreate(
          140  +  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
          141  +  void *(*xTask)(void*),    /* Routine to run in a separate thread */
          142  +  void *pIn                 /* Argument passed into xTask() */
          143  +){
          144  +  SQLiteThread *p;
          145  +
          146  +  assert( ppThread!=0 );
          147  +  assert( xTask!=0 );
          148  +  *ppThread = 0;
          149  +  p = sqlite3Malloc(sizeof(*p));
          150  +  if( p==0 ) return SQLITE_NOMEM;
          151  +  if( sqlite3GlobalConfig.bCoreMutex==0 ){
          152  +    memset(p, 0, sizeof(*p));
          153  +  }else{
          154  +    p->xTask = xTask;
          155  +    p->pIn = pIn;
          156  +    p->tid = _beginthreadex(0, 0, sqlite3ThreadProc, p, 0, &p->id);
          157  +    if( p->tid==0 ){
          158  +      memset(p, 0, sizeof(*p));
          159  +    }
          160  +  }
          161  +  if( p->xTask==0 ){
          162  +    p->id = GetCurrentThreadId();
          163  +    p->pResult = xTask(pIn);
          164  +  }
          165  +  *ppThread = p;
          166  +  return SQLITE_OK;
          167  +}
          168  +
          169  +DWORD sqlite3Win32Wait(HANDLE hObject); /* os_win.c */
          170  +
          171  +/* Get the results of the thread */
          172  +int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
          173  +  DWORD rc;
          174  +  BOOL bRc;
          175  +
          176  +  assert( ppOut!=0 );
          177  +  if( NEVER(p==0) ) return SQLITE_NOMEM;
          178  +  if( p->xTask==0 ){
          179  +    assert( p->id==GetCurrentThreadId() );
          180  +    rc = WAIT_OBJECT_0;
          181  +    assert( p->tid==0 );
          182  +  }else{
          183  +    assert( p->id!=0 && p->id!=GetCurrentThreadId() );
          184  +    rc = sqlite3Win32Wait((HANDLE)p->tid);
          185  +    assert( rc!=WAIT_IO_COMPLETION );
          186  +    bRc = CloseHandle((HANDLE)p->tid);
          187  +    assert( bRc );
          188  +  }
          189  +  if( rc==WAIT_OBJECT_0 ) *ppOut = p->pResult;
          190  +  sqlite3_free(p);
          191  +  return (rc==WAIT_OBJECT_0) ? SQLITE_OK : SQLITE_ERROR;
          192  +}
          193  +
          194  +#endif /* SQLITE_OS_WIN && !SQLITE_OS_WINRT */
          195  +/******************************** End Win32 Threads *************************/
          196  +
          197  +
          198  +/********************************* Single-Threaded **************************/
          199  +#ifndef SQLITE_THREADS_IMPLEMENTED
          200  +/*
          201  +** This implementation does not actually create a new thread.  It does the
          202  +** work of the thread in the main thread, when either the thread is created
          203  +** or when it is joined
          204  +*/
          205  +
          206  +/* A running thread */
          207  +struct SQLiteThread {
          208  +  void *(*xTask)(void*);   /* The routine to run as a thread */
          209  +  void *pIn;               /* Argument to xTask */
          210  +  void *pResult;           /* Result of xTask */
          211  +};
          212  +
          213  +/* Create a new thread */
          214  +int sqlite3ThreadCreate(
          215  +  SQLiteThread **ppThread,  /* OUT: Write the thread object here */
          216  +  void *(*xTask)(void*),    /* Routine to run in a separate thread */
          217  +  void *pIn                 /* Argument passed into xTask() */
          218  +){
          219  +  SQLiteThread *p;
          220  +
          221  +  assert( ppThread!=0 );
          222  +  assert( xTask!=0 );
          223  +  *ppThread = 0;
          224  +  p = sqlite3Malloc(sizeof(*p));
          225  +  if( p==0 ) return SQLITE_NOMEM;
          226  +  if( (SQLITE_PTR_TO_INT(p)/17)&1 ){
          227  +    p->xTask = xTask;
          228  +    p->pIn = pIn;
          229  +  }else{
          230  +    p->xTask = 0;
          231  +    p->pResult = xTask(pIn);
          232  +  }
          233  +  *ppThread = p;
          234  +  return SQLITE_OK;
          235  +}
          236  +
          237  +/* Get the results of the thread */
          238  +int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){
          239  +
          240  +  assert( ppOut!=0 );
          241  +  if( NEVER(p==0) ) return SQLITE_NOMEM;
          242  +  if( p->xTask ){
          243  +    *ppOut = p->xTask(p->pIn);
          244  +  }else{
          245  +    *ppOut = p->pResult;
          246  +  }
          247  +  sqlite3_free(p);
          248  +
          249  +#if defined(SQLITE_TEST)
          250  +  {
          251  +    void *pTstAlloc = sqlite3Malloc(10);
          252  +    if (!pTstAlloc) return SQLITE_NOMEM;
          253  +    sqlite3_free(pTstAlloc);
          254  +  }
          255  +#endif
          256  +
          257  +  return SQLITE_OK;
          258  +}
          259  +
          260  +#endif /* !defined(SQLITE_THREADS_IMPLEMENTED) */
          261  +/****************************** End Single-Threaded *************************/
          262  +#endif /* SQLITE_MAX_WORKER_THREADS>0 */

Changes to src/vdbe.c.

  1161   1161     pIn1 = &aMem[p1];
  1162   1162     pOut = &aMem[p2];
  1163   1163     do{
  1164   1164       assert( pOut<=&aMem[(p->nMem-p->nCursor)] );
  1165   1165       assert( pIn1<=&aMem[(p->nMem-p->nCursor)] );
  1166   1166       assert( memIsValid(pIn1) );
  1167   1167       memAboutToChange(p, pOut);
  1168         -    VdbeMemReleaseExtern(pOut);
         1168  +    sqlite3VdbeMemRelease(pOut);
  1169   1169       zMalloc = pOut->zMalloc;
  1170   1170       memcpy(pOut, pIn1, sizeof(Mem));
  1171   1171   #ifdef SQLITE_DEBUG
  1172   1172       if( pOut->pScopyFrom>=&aMem[p1] && pOut->pScopyFrom<&aMem[p1+pOp->p3] ){
  1173   1173         pOut->pScopyFrom += p1 - pOp->p2;
  1174   1174       }
  1175   1175   #endif
................................................................................
  1541   1541     sqlite3_value **apVal;
  1542   1542     int n;
  1543   1543   
  1544   1544     n = pOp->p5;
  1545   1545     apVal = p->apArg;
  1546   1546     assert( apVal || n==0 );
  1547   1547     assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
  1548         -  pOut = &aMem[pOp->p3];
  1549         -  memAboutToChange(p, pOut);
         1548  +  ctx.pOut = &aMem[pOp->p3];
         1549  +  memAboutToChange(p, ctx.pOut);
  1550   1550   
  1551   1551     assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) );
  1552   1552     assert( pOp->p3<pOp->p2 || pOp->p3>=pOp->p2+n );
  1553   1553     pArg = &aMem[pOp->p2];
  1554   1554     for(i=0; i<n; i++, pArg++){
  1555   1555       assert( memIsValid(pArg) );
  1556   1556       apVal[i] = pArg;
................................................................................
  1558   1558       REGISTER_TRACE(pOp->p2+i, pArg);
  1559   1559     }
  1560   1560   
  1561   1561     assert( pOp->p4type==P4_FUNCDEF );
  1562   1562     ctx.pFunc = pOp->p4.pFunc;
  1563   1563     ctx.iOp = pc;
  1564   1564     ctx.pVdbe = p;
  1565         -
  1566         -  /* The output cell may already have a buffer allocated. Move
  1567         -  ** the pointer to ctx.s so in case the user-function can use
  1568         -  ** the already allocated buffer instead of allocating a new one.
  1569         -  */
  1570         -  memcpy(&ctx.s, pOut, sizeof(Mem));
  1571         -  pOut->flags = MEM_Null;
  1572         -  pOut->xDel = 0;
  1573         -  pOut->zMalloc = 0;
  1574         -  MemSetTypeFlag(&ctx.s, MEM_Null);
         1565  +  MemSetTypeFlag(ctx.pOut, MEM_Null);
  1575   1566   
  1576   1567     ctx.fErrorOrAux = 0;
  1577   1568     if( ctx.pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
  1578   1569       assert( pOp>aOp );
  1579   1570       assert( pOp[-1].p4type==P4_COLLSEQ );
  1580   1571       assert( pOp[-1].opcode==OP_CollSeq );
  1581   1572       ctx.pColl = pOp[-1].p4.pColl;
  1582   1573     }
  1583   1574     db->lastRowid = lastRowid;
  1584   1575     (*ctx.pFunc->xFunc)(&ctx, n, apVal); /* IMP: R-24505-23230 */
  1585   1576     lastRowid = db->lastRowid;
  1586   1577   
  1587         -  if( db->mallocFailed ){
  1588         -    /* Even though a malloc() has failed, the implementation of the
  1589         -    ** user function may have called an sqlite3_result_XXX() function
  1590         -    ** to return a value. The following call releases any resources
  1591         -    ** associated with such a value.
  1592         -    */
  1593         -    sqlite3VdbeMemRelease(&ctx.s);
  1594         -    goto no_mem;
  1595         -  }
  1596         -
  1597   1578     /* If the function returned an error, throw an exception */
  1598   1579     if( ctx.fErrorOrAux ){
  1599   1580       if( ctx.isError ){
  1600         -      sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&ctx.s));
         1581  +      sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(ctx.pOut));
  1601   1582         rc = ctx.isError;
  1602   1583       }
  1603   1584       sqlite3VdbeDeleteAuxData(p, pc, pOp->p1);
  1604   1585     }
  1605   1586   
  1606   1587     /* Copy the result of the function into register P3 */
  1607         -  sqlite3VdbeChangeEncoding(&ctx.s, encoding);
  1608         -  assert( pOut->flags==MEM_Null );
  1609         -  memcpy(pOut, &ctx.s, sizeof(Mem));
  1610         -  if( sqlite3VdbeMemTooBig(pOut) ){
         1588  +  sqlite3VdbeChangeEncoding(ctx.pOut, encoding);
         1589  +  if( sqlite3VdbeMemTooBig(ctx.pOut) ){
  1611   1590       goto too_big;
  1612   1591     }
  1613   1592   
  1614         -#if 0
  1615         -  /* The app-defined function has done something that as caused this
  1616         -  ** statement to expire.  (Perhaps the function called sqlite3_exec()
  1617         -  ** with a CREATE TABLE statement.)
  1618         -  */
  1619         -  if( p->expired ) rc = SQLITE_ABORT;
  1620         -#endif
  1621         -
  1622         -  REGISTER_TRACE(pOp->p3, pOut);
  1623         -  UPDATE_MAX_BLOBSIZE(pOut);
         1593  +  REGISTER_TRACE(pOp->p3, ctx.pOut);
         1594  +  UPDATE_MAX_BLOBSIZE(ctx.pOut);
  1624   1595     break;
  1625   1596   }
  1626   1597   
  1627   1598   /* Opcode: BitAnd P1 P2 P3 * *
  1628   1599   ** Synopsis:  r[P3]=r[P1]&r[P2]
  1629   1600   **
  1630   1601   ** Take the bit-wise AND of the values in register P1 and P2 and
................................................................................
  1765   1736     }
  1766   1737     break;
  1767   1738   }
  1768   1739   #endif
  1769   1740   
  1770   1741   #ifndef SQLITE_OMIT_CAST
  1771   1742   /* Opcode: Cast P1 P2 * * *
         1743  +** Synopsis: affinity(r[P1])
  1772   1744   **
  1773   1745   ** Force the value in register P1 to be the type defined by P2.
  1774   1746   ** 
  1775   1747   ** <ul>
  1776   1748   ** <li value="97"> TEXT
  1777   1749   ** <li value="98"> BLOB
  1778   1750   ** <li value="99"> NUMERIC
................................................................................
  3386   3358         pCx->isTable = 1;
  3387   3359       }
  3388   3360     }
  3389   3361     pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED);
  3390   3362     break;
  3391   3363   }
  3392   3364   
  3393         -/* Opcode: SorterOpen P1 P2 * P4 *
         3365  +/* Opcode: SorterOpen P1 P2 P3 P4 *
  3394   3366   **
  3395   3367   ** This opcode works like OP_OpenEphemeral except that it opens
  3396   3368   ** a transient index that is specifically designed to sort large
  3397   3369   ** tables using an external merge-sort algorithm.
         3370  +**
         3371  +** If argument P3 is non-zero, then it indicates that the sorter may
         3372  +** assume that a stable sort considering the first P3 fields of each
         3373  +** key is sufficient to produce the required results.
  3398   3374   */
  3399   3375   case OP_SorterOpen: {
  3400   3376     VdbeCursor *pCx;
  3401   3377   
  3402   3378     assert( pOp->p1>=0 );
  3403   3379     assert( pOp->p2>=0 );
  3404   3380     pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
  3405   3381     if( pCx==0 ) goto no_mem;
  3406   3382     pCx->pKeyInfo = pOp->p4.pKeyInfo;
  3407   3383     assert( pCx->pKeyInfo->db==db );
  3408   3384     assert( pCx->pKeyInfo->enc==ENC(db) );
  3409         -  rc = sqlite3VdbeSorterInit(db, pCx);
         3385  +  rc = sqlite3VdbeSorterInit(db, pOp->p3, pCx);
         3386  +  break;
         3387  +}
         3388  +
         3389  +/* Opcode: SequenceTest P1 P2 * * *
         3390  +** Synopsis: if( cursor[P1].ctr++ ) pc = P2
         3391  +**
         3392  +** P1 is a sorter cursor. If the sequence counter is currently zero, jump
         3393  +** to P2. Regardless of whether or not the jump is taken, increment the
         3394  +** the sequence value.
         3395  +*/
         3396  +case OP_SequenceTest: {
         3397  +  VdbeCursor *pC;
         3398  +  assert( pOp->p1>=0 && pOp->p1<p->nCursor );
         3399  +  pC = p->apCsr[pOp->p1];
         3400  +  assert( pC->pSorter );
         3401  +  if( (pC->seqCount++)==0 ){
         3402  +    pc = pOp->p2 - 1;
         3403  +  }
  3410   3404     break;
  3411   3405   }
  3412   3406   
  3413   3407   /* Opcode: OpenPseudo P1 P2 P3 * *
  3414   3408   ** Synopsis: P3 columns in r[P2]
  3415   3409   **
  3416   3410   ** Open a new cursor that points to a fake table that contains a single
................................................................................
  4251   4245     int nKeyCol;
  4252   4246   
  4253   4247     pC = p->apCsr[pOp->p1];
  4254   4248     assert( isSorter(pC) );
  4255   4249     assert( pOp->p4type==P4_INT32 );
  4256   4250     pIn3 = &aMem[pOp->p3];
  4257   4251     nKeyCol = pOp->p4.i;
         4252  +  res = 0;
  4258   4253     rc = sqlite3VdbeSorterCompare(pC, pIn3, nKeyCol, &res);
  4259   4254     VdbeBranchTaken(res!=0,2);
  4260   4255     if( res ){
  4261   4256       pc = pOp->p2-1;
  4262   4257     }
  4263   4258     break;
  4264   4259   };
................................................................................
  4515   4510     assert( pC!=0 );
  4516   4511     assert( isSorter(pC)==(pOp->opcode==OP_SorterSort) );
  4517   4512     res = 1;
  4518   4513   #ifdef SQLITE_DEBUG
  4519   4514     pC->seekOp = OP_Rewind;
  4520   4515   #endif
  4521   4516     if( isSorter(pC) ){
  4522         -    rc = sqlite3VdbeSorterRewind(db, pC, &res);
         4517  +    rc = sqlite3VdbeSorterRewind(pC, &res);
  4523   4518     }else{
  4524   4519       pCrsr = pC->pCursor;
  4525   4520       assert( pCrsr );
  4526   4521       rc = sqlite3BtreeFirst(pCrsr, &res);
  4527   4522       pC->deferredMoveto = 0;
  4528   4523       pC->cacheStatus = CACHE_STALE;
  4529   4524       pC->rowidIsValid = 0;
................................................................................
  4693   4688     pCrsr = pC->pCursor;
  4694   4689     if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++;
  4695   4690     assert( pCrsr!=0 );
  4696   4691     assert( pC->isTable==0 );
  4697   4692     rc = ExpandBlob(pIn2);
  4698   4693     if( rc==SQLITE_OK ){
  4699   4694       if( isSorter(pC) ){
  4700         -      rc = sqlite3VdbeSorterWrite(db, pC, pIn2);
         4695  +      rc = sqlite3VdbeSorterWrite(pC, pIn2);
  4701   4696       }else{
  4702   4697         nKey = pIn2->n;
  4703   4698         zKey = pIn2->z;
  4704   4699         rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, 
  4705   4700             ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0)
  4706   4701             );
  4707   4702         assert( pC->deferredMoveto==0 );
................................................................................
  5606   5601   ** successors.
  5607   5602   */
  5608   5603   case OP_AggStep: {
  5609   5604     int n;
  5610   5605     int i;
  5611   5606     Mem *pMem;
  5612   5607     Mem *pRec;
         5608  +  Mem t;
  5613   5609     sqlite3_context ctx;
  5614   5610     sqlite3_value **apVal;
  5615   5611   
  5616   5612     n = pOp->p5;
  5617   5613     assert( n>=0 );
  5618   5614     pRec = &aMem[pOp->p2];
  5619   5615     apVal = p->apArg;
................................................................................
  5623   5619       apVal[i] = pRec;
  5624   5620       memAboutToChange(p, pRec);
  5625   5621     }
  5626   5622     ctx.pFunc = pOp->p4.pFunc;
  5627   5623     assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) );
  5628   5624     ctx.pMem = pMem = &aMem[pOp->p3];
  5629   5625     pMem->n++;
  5630         -  ctx.s.flags = MEM_Null;
  5631         -  ctx.s.z = 0;
  5632         -  ctx.s.zMalloc = 0;
  5633         -  ctx.s.xDel = 0;
  5634         -  ctx.s.db = db;
         5626  +  t.flags = MEM_Null;
         5627  +  t.z = 0;
         5628  +  t.zMalloc = 0;
         5629  +  t.xDel = 0;
         5630  +  t.db = db;
         5631  +  ctx.pOut = &t;
  5635   5632     ctx.isError = 0;
  5636   5633     ctx.pColl = 0;
  5637   5634     ctx.skipFlag = 0;
  5638   5635     if( ctx.pFunc->funcFlags & SQLITE_FUNC_NEEDCOLL ){
  5639   5636       assert( pOp>p->aOp );
  5640   5637       assert( pOp[-1].p4type==P4_COLLSEQ );
  5641   5638       assert( pOp[-1].opcode==OP_CollSeq );
  5642   5639       ctx.pColl = pOp[-1].p4.pColl;
  5643   5640     }
  5644   5641     (ctx.pFunc->xStep)(&ctx, n, apVal); /* IMP: R-24505-23230 */
  5645   5642     if( ctx.isError ){
  5646         -    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&ctx.s));
         5643  +    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&t));
  5647   5644       rc = ctx.isError;
  5648   5645     }
  5649   5646     if( ctx.skipFlag ){
  5650   5647       assert( pOp[-1].opcode==OP_CollSeq );
  5651   5648       i = pOp[-1].p1;
  5652   5649       if( i ) sqlite3VdbeMemSetInt64(&aMem[i], 1);
  5653   5650     }
  5654         -
  5655         -  sqlite3VdbeMemRelease(&ctx.s);
  5656         -
         5651  +  sqlite3VdbeMemRelease(&t);
  5657   5652     break;
  5658   5653   }
  5659   5654   
  5660   5655   /* Opcode: AggFinal P1 P2 * P4 *
  5661   5656   ** Synopsis: accum=r[P1] N=P2
  5662   5657   **
  5663   5658   ** Execute the finalizer function for an aggregate.  P1 is
................................................................................
  6099   6094       sqlite3VdbeMemSetNull(pDest);
  6100   6095       break;
  6101   6096     }
  6102   6097     pVtab = pCur->pVtabCursor->pVtab;
  6103   6098     pModule = pVtab->pModule;
  6104   6099     assert( pModule->xColumn );
  6105   6100     memset(&sContext, 0, sizeof(sContext));
  6106         -
  6107         -  /* The output cell may already have a buffer allocated. Move
  6108         -  ** the current contents to sContext.s so in case the user-function 
  6109         -  ** can use the already allocated buffer instead of allocating a 
  6110         -  ** new one.
  6111         -  */
  6112         -  sqlite3VdbeMemMove(&sContext.s, pDest);
  6113         -  MemSetTypeFlag(&sContext.s, MEM_Null);
  6114         -
         6101  +  sContext.pOut = pDest;
         6102  +  MemSetTypeFlag(pDest, MEM_Null);
  6115   6103     rc = pModule->xColumn(pCur->pVtabCursor, &sContext, pOp->p2);
  6116   6104     sqlite3VtabImportErrmsg(p, pVtab);
  6117   6105     if( sContext.isError ){
  6118   6106       rc = sContext.isError;
  6119   6107     }
  6120         -
  6121         -  /* Copy the result of the function to the P3 register. We
  6122         -  ** do this regardless of whether or not an error occurred to ensure any
  6123         -  ** dynamic allocation in sContext.s (a Mem struct) is  released.
  6124         -  */
  6125         -  sqlite3VdbeChangeEncoding(&sContext.s, encoding);
  6126         -  sqlite3VdbeMemMove(pDest, &sContext.s);
         6108  +  sqlite3VdbeChangeEncoding(pDest, encoding);
  6127   6109     REGISTER_TRACE(pOp->p3, pDest);
  6128   6110     UPDATE_MAX_BLOBSIZE(pDest);
  6129   6111   
  6130   6112     if( sqlite3VdbeMemTooBig(pDest) ){
  6131   6113       goto too_big;
  6132   6114     }
  6133   6115     break;

Changes to src/vdbeInt.h.

   262    262   ** But this file is the only place where the internal details of this
   263    263   ** structure are known.
   264    264   **
   265    265   ** This structure is defined inside of vdbeInt.h because it uses substructures
   266    266   ** (Mem) which are only defined there.
   267    267   */
   268    268   struct sqlite3_context {
          269  +  Mem *pOut;            /* The return value is stored here */
   269    270     FuncDef *pFunc;       /* Pointer to function information.  MUST BE FIRST */
   270         -  Mem s;                /* The return value is stored here */
   271    271     Mem *pMem;            /* Memory cell used to store aggregate context */
   272    272     CollSeq *pColl;       /* Collating sequence */
   273    273     Vdbe *pVdbe;          /* The VM that owns this context */
   274    274     int iOp;              /* Instruction number of OP_Function */
   275    275     int isError;          /* Error code returned by the function. */
   276    276     u8 skipFlag;          /* Skip skip accumulator loading if true */
   277    277     u8 fErrorOrAux;       /* isError!=0 or pVdbe->pAuxData modified */
................................................................................
   437    437   const char *sqlite3OpcodeName(int);
   438    438   int sqlite3VdbeMemGrow(Mem *pMem, int n, int preserve);
   439    439   int sqlite3VdbeCloseStatement(Vdbe *, int);
   440    440   void sqlite3VdbeFrameDelete(VdbeFrame*);
   441    441   int sqlite3VdbeFrameRestore(VdbeFrame *);
   442    442   int sqlite3VdbeTransferError(Vdbe *p);
   443    443   
   444         -int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
          444  +int sqlite3VdbeSorterInit(sqlite3 *, int, VdbeCursor *);
   445    445   void sqlite3VdbeSorterReset(sqlite3 *, VdbeSorter *);
   446    446   void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
   447    447   int sqlite3VdbeSorterRowkey(const VdbeCursor *, Mem *);
   448    448   int sqlite3VdbeSorterNext(sqlite3 *, const VdbeCursor *, int *);
   449         -int sqlite3VdbeSorterRewind(sqlite3 *, const VdbeCursor *, int *);
   450         -int sqlite3VdbeSorterWrite(sqlite3 *, const VdbeCursor *, Mem *);
          449  +int sqlite3VdbeSorterRewind(const VdbeCursor *, int *);
          450  +int sqlite3VdbeSorterWrite(const VdbeCursor *, Mem *);
   451    451   int sqlite3VdbeSorterCompare(const VdbeCursor *, Mem *, int, int *);
   452    452   
   453    453   #if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
   454    454     void sqlite3VdbeEnter(Vdbe*);
   455    455     void sqlite3VdbeLeave(Vdbe*);
   456    456   #else
   457    457   # define sqlite3VdbeEnter(X)

Changes to src/vdbeapi.c.

   235    235   static void setResultStrOrError(
   236    236     sqlite3_context *pCtx,  /* Function context */
   237    237     const char *z,          /* String pointer */
   238    238     int n,                  /* Bytes in string, or negative */
   239    239     u8 enc,                 /* Encoding of z.  0 for BLOBs */
   240    240     void (*xDel)(void*)     /* Destructor function */
   241    241   ){
   242         -  if( sqlite3VdbeMemSetStr(&pCtx->s, z, n, enc, xDel)==SQLITE_TOOBIG ){
          242  +  if( sqlite3VdbeMemSetStr(pCtx->pOut, z, n, enc, xDel)==SQLITE_TOOBIG ){
   243    243       sqlite3_result_error_toobig(pCtx);
   244    244     }
   245    245   }
   246    246   void sqlite3_result_blob(
   247    247     sqlite3_context *pCtx, 
   248    248     const void *z, 
   249    249     int n, 
   250    250     void (*xDel)(void *)
   251    251   ){
   252    252     assert( n>=0 );
   253         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          253  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   254    254     setResultStrOrError(pCtx, z, n, 0, xDel);
   255    255   }
   256    256   void sqlite3_result_double(sqlite3_context *pCtx, double rVal){
   257         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   258         -  sqlite3VdbeMemSetDouble(&pCtx->s, rVal);
          257  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          258  +  sqlite3VdbeMemSetDouble(pCtx->pOut, rVal);
   259    259   }
   260    260   void sqlite3_result_error(sqlite3_context *pCtx, const char *z, int n){
   261         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          261  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   262    262     pCtx->isError = SQLITE_ERROR;
   263    263     pCtx->fErrorOrAux = 1;
   264         -  sqlite3VdbeMemSetStr(&pCtx->s, z, n, SQLITE_UTF8, SQLITE_TRANSIENT);
          264  +  sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF8, SQLITE_TRANSIENT);
   265    265   }
   266    266   #ifndef SQLITE_OMIT_UTF16
   267    267   void sqlite3_result_error16(sqlite3_context *pCtx, const void *z, int n){
   268         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          268  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   269    269     pCtx->isError = SQLITE_ERROR;
   270    270     pCtx->fErrorOrAux = 1;
   271         -  sqlite3VdbeMemSetStr(&pCtx->s, z, n, SQLITE_UTF16NATIVE, SQLITE_TRANSIENT);
          271  +  sqlite3VdbeMemSetStr(pCtx->pOut, z, n, SQLITE_UTF16NATIVE, SQLITE_TRANSIENT);
   272    272   }
   273    273   #endif
   274    274   void sqlite3_result_int(sqlite3_context *pCtx, int iVal){
   275         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   276         -  sqlite3VdbeMemSetInt64(&pCtx->s, (i64)iVal);
          275  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          276  +  sqlite3VdbeMemSetInt64(pCtx->pOut, (i64)iVal);
   277    277   }
   278    278   void sqlite3_result_int64(sqlite3_context *pCtx, i64 iVal){
   279         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   280         -  sqlite3VdbeMemSetInt64(&pCtx->s, iVal);
          279  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          280  +  sqlite3VdbeMemSetInt64(pCtx->pOut, iVal);
   281    281   }
   282    282   void sqlite3_result_null(sqlite3_context *pCtx){
   283         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   284         -  sqlite3VdbeMemSetNull(&pCtx->s);
          283  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          284  +  sqlite3VdbeMemSetNull(pCtx->pOut);
   285    285   }
   286    286   void sqlite3_result_text(
   287    287     sqlite3_context *pCtx, 
   288    288     const char *z, 
   289    289     int n,
   290    290     void (*xDel)(void *)
   291    291   ){
   292         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          292  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   293    293     setResultStrOrError(pCtx, z, n, SQLITE_UTF8, xDel);
   294    294   }
   295    295   #ifndef SQLITE_OMIT_UTF16
   296    296   void sqlite3_result_text16(
   297    297     sqlite3_context *pCtx, 
   298    298     const void *z, 
   299    299     int n, 
   300    300     void (*xDel)(void *)
   301    301   ){
   302         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          302  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   303    303     setResultStrOrError(pCtx, z, n, SQLITE_UTF16NATIVE, xDel);
   304    304   }
   305    305   void sqlite3_result_text16be(
   306    306     sqlite3_context *pCtx, 
   307    307     const void *z, 
   308    308     int n, 
   309    309     void (*xDel)(void *)
   310    310   ){
   311         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          311  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   312    312     setResultStrOrError(pCtx, z, n, SQLITE_UTF16BE, xDel);
   313    313   }
   314    314   void sqlite3_result_text16le(
   315    315     sqlite3_context *pCtx, 
   316    316     const void *z, 
   317    317     int n, 
   318    318     void (*xDel)(void *)
   319    319   ){
   320         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          320  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   321    321     setResultStrOrError(pCtx, z, n, SQLITE_UTF16LE, xDel);
   322    322   }
   323    323   #endif /* SQLITE_OMIT_UTF16 */
   324    324   void sqlite3_result_value(sqlite3_context *pCtx, sqlite3_value *pValue){
   325         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   326         -  sqlite3VdbeMemCopy(&pCtx->s, pValue);
          325  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          326  +  sqlite3VdbeMemCopy(pCtx->pOut, pValue);
   327    327   }
   328    328   void sqlite3_result_zeroblob(sqlite3_context *pCtx, int n){
   329         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   330         -  sqlite3VdbeMemSetZeroBlob(&pCtx->s, n);
          329  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          330  +  sqlite3VdbeMemSetZeroBlob(pCtx->pOut, n);
   331    331   }
   332    332   void sqlite3_result_error_code(sqlite3_context *pCtx, int errCode){
   333    333     pCtx->isError = errCode;
   334    334     pCtx->fErrorOrAux = 1;
   335         -  if( pCtx->s.flags & MEM_Null ){
   336         -    sqlite3VdbeMemSetStr(&pCtx->s, sqlite3ErrStr(errCode), -1, 
          335  +  if( pCtx->pOut->flags & MEM_Null ){
          336  +    sqlite3VdbeMemSetStr(pCtx->pOut, sqlite3ErrStr(errCode), -1, 
   337    337                            SQLITE_UTF8, SQLITE_STATIC);
   338    338     }
   339    339   }
   340    340   
   341    341   /* Force an SQLITE_TOOBIG error. */
   342    342   void sqlite3_result_error_toobig(sqlite3_context *pCtx){
   343         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          343  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   344    344     pCtx->isError = SQLITE_TOOBIG;
   345    345     pCtx->fErrorOrAux = 1;
   346         -  sqlite3VdbeMemSetStr(&pCtx->s, "string or blob too big", -1, 
          346  +  sqlite3VdbeMemSetStr(pCtx->pOut, "string or blob too big", -1, 
   347    347                          SQLITE_UTF8, SQLITE_STATIC);
   348    348   }
   349    349   
   350    350   /* An SQLITE_NOMEM error. */
   351    351   void sqlite3_result_error_nomem(sqlite3_context *pCtx){
   352         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
   353         -  sqlite3VdbeMemSetNull(&pCtx->s);
          352  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
          353  +  sqlite3VdbeMemSetNull(pCtx->pOut);
   354    354     pCtx->isError = SQLITE_NOMEM;
   355    355     pCtx->fErrorOrAux = 1;
   356         -  pCtx->s.db->mallocFailed = 1;
          356  +  pCtx->pOut->db->mallocFailed = 1;
   357    357   }
   358    358   
   359    359   /*
   360    360   ** This function is called after a transaction has been committed. It 
   361    361   ** invokes callbacks registered with sqlite3_wal_hook() as required.
   362    362   */
   363    363   static int doWalCallbacks(sqlite3 *db){
................................................................................
   580    580   ** returns a copy of the pointer to the database connection (the 1st
   581    581   ** parameter) of the sqlite3_create_function() and
   582    582   ** sqlite3_create_function16() routines that originally registered the
   583    583   ** application defined function.
   584    584   */
   585    585   sqlite3 *sqlite3_context_db_handle(sqlite3_context *p){
   586    586     assert( p && p->pFunc );
   587         -  return p->s.db;
          587  +  return p->pOut->db;
   588    588   }
   589    589   
   590    590   /*
   591    591   ** Return the current time for a statement
   592    592   */
   593    593   sqlite3_int64 sqlite3StmtCurrentTime(sqlite3_context *p){
   594    594     Vdbe *v = p->pVdbe;
   595    595     int rc;
   596    596     if( v->iCurrentTime==0 ){
   597         -    rc = sqlite3OsCurrentTimeInt64(p->s.db->pVfs, &v->iCurrentTime);
          597  +    rc = sqlite3OsCurrentTimeInt64(p->pOut->db->pVfs, &v->iCurrentTime);
   598    598       if( rc ) v->iCurrentTime = 0;
   599    599     }
   600    600     return v->iCurrentTime;
   601    601   }
   602    602   
   603    603   /*
   604    604   ** The following is the implementation of an SQL function that always
................................................................................
   647    647   /*
   648    648   ** Allocate or return the aggregate context for a user function.  A new
   649    649   ** context is allocated on the first call.  Subsequent calls return the
   650    650   ** same context that was returned on prior calls.
   651    651   */
   652    652   void *sqlite3_aggregate_context(sqlite3_context *p, int nByte){
   653    653     assert( p && p->pFunc && p->pFunc->xStep );
   654         -  assert( sqlite3_mutex_held(p->s.db->mutex) );
          654  +  assert( sqlite3_mutex_held(p->pOut->db->mutex) );
   655    655     testcase( nByte<0 );
   656    656     if( (p->pMem->flags & MEM_Agg)==0 ){
   657    657       return createAggContext(p, nByte);
   658    658     }else{
   659    659       return (void*)p->pMem->z;
   660    660     }
   661    661   }
................................................................................
   663    663   /*
   664    664   ** Return the auxilary data pointer, if any, for the iArg'th argument to
   665    665   ** the user-function defined by pCtx.
   666    666   */
   667    667   void *sqlite3_get_auxdata(sqlite3_context *pCtx, int iArg){
   668    668     AuxData *pAuxData;
   669    669   
   670         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          670  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   671    671     for(pAuxData=pCtx->pVdbe->pAuxData; pAuxData; pAuxData=pAuxData->pNext){
   672    672       if( pAuxData->iOp==pCtx->iOp && pAuxData->iArg==iArg ) break;
   673    673     }
   674    674   
   675    675     return (pAuxData ? pAuxData->pAux : 0);
   676    676   }
   677    677   
................................................................................
   685    685     int iArg, 
   686    686     void *pAux, 
   687    687     void (*xDelete)(void*)
   688    688   ){
   689    689     AuxData *pAuxData;
   690    690     Vdbe *pVdbe = pCtx->pVdbe;
   691    691   
   692         -  assert( sqlite3_mutex_held(pCtx->s.db->mutex) );
          692  +  assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) );
   693    693     if( iArg<0 ) goto failed;
   694    694   
   695    695     for(pAuxData=pVdbe->pAuxData; pAuxData; pAuxData=pAuxData->pNext){
   696    696       if( pAuxData->iOp==pCtx->iOp && pAuxData->iArg==iArg ) break;
   697    697     }
   698    698     if( pAuxData==0 ){
   699    699       pAuxData = sqlite3DbMallocZero(pVdbe->db, sizeof(AuxData));

Changes to src/vdbeaux.c.

  3183   3183   /*
  3184   3184   ** This function compares two index or table record keys in the same way
  3185   3185   ** as the sqlite3VdbeRecordCompare() routine. Unlike VdbeRecordCompare(),
  3186   3186   ** this function deserializes and compares values using the
  3187   3187   ** sqlite3VdbeSerialGet() and sqlite3MemCompare() functions. It is used
  3188   3188   ** in assert() statements to ensure that the optimized code in
  3189   3189   ** sqlite3VdbeRecordCompare() returns results with these two primitives.
         3190  +**
         3191  +** Return true if the result of comparison is equivalent to desiredResult.
         3192  +** Return false if there is a disagreement.
  3190   3193   */
  3191   3194   static int vdbeRecordCompareDebug(
  3192   3195     int nKey1, const void *pKey1, /* Left key */
  3193         -  const UnpackedRecord *pPKey2  /* Right key */
         3196  +  const UnpackedRecord *pPKey2, /* Right key */
         3197  +  int desiredResult             /* Correct answer */
  3194   3198   ){
  3195   3199     u32 d1;            /* Offset into aKey[] of next data element */
  3196   3200     u32 idx1;          /* Offset into aKey[] of next header element */
  3197   3201     u32 szHdr1;        /* Number of bytes in header */
  3198   3202     int i = 0;
  3199   3203     int rc = 0;
  3200   3204     const unsigned char *aKey1 = (const unsigned char *)pKey1;
  3201   3205     KeyInfo *pKeyInfo;
  3202   3206     Mem mem1;
  3203   3207   
  3204   3208     pKeyInfo = pPKey2->pKeyInfo;
         3209  +  if( pKeyInfo->db==0 ) return 1;
  3205   3210     mem1.enc = pKeyInfo->enc;
  3206   3211     mem1.db = pKeyInfo->db;
  3207   3212     /* mem1.flags = 0;  // Will be initialized by sqlite3VdbeSerialGet() */
  3208   3213     VVA_ONLY( mem1.zMalloc = 0; ) /* Only needed by assert() statements */
  3209   3214   
  3210   3215     /* Compilers may complain that mem1.u.i is potentially uninitialized.
  3211   3216     ** We could initialize it, as shown here, to silence those complaints.
................................................................................
  3248   3253       */
  3249   3254       rc = sqlite3MemCompare(&mem1, &pPKey2->aMem[i], pKeyInfo->aColl[i]);
  3250   3255       if( rc!=0 ){
  3251   3256         assert( mem1.zMalloc==0 );  /* See comment below */
  3252   3257         if( pKeyInfo->aSortOrder[i] ){
  3253   3258           rc = -rc;  /* Invert the result for DESC sort order. */
  3254   3259         }
  3255         -      return rc;
         3260  +      goto debugCompareEnd;
  3256   3261       }
  3257   3262       i++;
  3258   3263     }while( idx1<szHdr1 && i<pPKey2->nField );
  3259   3264   
  3260   3265     /* No memory allocation is ever used on mem1.  Prove this using
  3261   3266     ** the following assert().  If the assert() fails, it indicates a
  3262   3267     ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1).
  3263   3268     */
  3264   3269     assert( mem1.zMalloc==0 );
  3265   3270   
  3266   3271     /* rc==0 here means that one of the keys ran out of fields and
  3267   3272     ** all the fields up to that point were equal. Return the the default_rc
  3268   3273     ** value.  */
  3269         -  return pPKey2->default_rc;
         3274  +  rc = pPKey2->default_rc;
         3275  +
         3276  +debugCompareEnd:
         3277  +  if( desiredResult==0 && rc==0 ) return 1;
         3278  +  if( desiredResult<0 && rc<0 ) return 1;
         3279  +  if( desiredResult>0 && rc>0 ) return 1;
         3280  +  if( CORRUPT_DB ) return 1;
         3281  +  if( pKeyInfo->db->mallocFailed ) return 1;
         3282  +  return 0;
  3270   3283   }
  3271   3284   #endif
  3272   3285   
  3273   3286   /*
  3274   3287   ** Both *pMem1 and *pMem2 contain string values. Compare the two values
  3275   3288   ** using the collation sequence pColl. As usual, return a negative , zero
  3276   3289   ** or positive value if *pMem1 is less than, equal to or greater than 
  3277   3290   ** *pMem2, respectively. Similar in spirit to "rc = (*pMem1) - (*pMem2);".
  3278   3291   */
  3279   3292   static int vdbeCompareMemString(
  3280   3293     const Mem *pMem1,
  3281   3294     const Mem *pMem2,
  3282         -  const CollSeq *pColl
         3295  +  const CollSeq *pColl,
         3296  +  u8 *prcErr                      /* If an OOM occurs, set to SQLITE_NOMEM */
  3283   3297   ){
  3284   3298     if( pMem1->enc==pColl->enc ){
  3285   3299       /* The strings are already in the correct encoding.  Call the
  3286   3300        ** comparison function directly */
  3287   3301       return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z);
  3288   3302     }else{
  3289   3303       int rc;
................................................................................
  3298   3312       v1 = sqlite3ValueText((sqlite3_value*)&c1, pColl->enc);
  3299   3313       n1 = v1==0 ? 0 : c1.n;
  3300   3314       v2 = sqlite3ValueText((sqlite3_value*)&c2, pColl->enc);
  3301   3315       n2 = v2==0 ? 0 : c2.n;
  3302   3316       rc = pColl->xCmp(pColl->pUser, n1, v1, n2, v2);
  3303   3317       sqlite3VdbeMemRelease(&c1);
  3304   3318       sqlite3VdbeMemRelease(&c2);
         3319  +    if( (v1==0 || v2==0) && prcErr ) *prcErr = SQLITE_NOMEM;
  3305   3320       return rc;
  3306   3321     }
  3307   3322   }
  3308   3323   
  3309   3324   /*
  3310   3325   ** Compare the values contained by the two memory cells, returning
  3311   3326   ** negative, zero or positive if pMem1 is less than, equal to, or greater
................................................................................
  3380   3395       /* The collation sequence must be defined at this point, even if
  3381   3396       ** the user deletes the collation sequence after the vdbe program is
  3382   3397       ** compiled (this was not always the case).
  3383   3398       */
  3384   3399       assert( !pColl || pColl->xCmp );
  3385   3400   
  3386   3401       if( pColl ){
  3387         -      return vdbeCompareMemString(pMem1, pMem2, pColl);
         3402  +      return vdbeCompareMemString(pMem1, pMem2, pColl, 0);
  3388   3403       }
  3389   3404       /* If a NULL pointer was passed as the collate function, fall through
  3390   3405       ** to the blob case and use memcmp().  */
  3391   3406     }
  3392   3407    
  3393   3408     /* Both values must be blobs.  Compare using memcmp().  */
  3394   3409     rc = memcmp(pMem1->z, pMem2->z, (pMem1->n>pMem2->n)?pMem2->n:pMem1->n);
................................................................................
  3452   3467   ** If argument bSkip is non-zero, it is assumed that the caller has already
  3453   3468   ** determined that the first fields of the keys are equal.
  3454   3469   **
  3455   3470   ** Key1 and Key2 do not have to contain the same number of fields. If all 
  3456   3471   ** fields that appear in both keys are equal, then pPKey2->default_rc is 
  3457   3472   ** returned.
  3458   3473   **
  3459         -** If database corruption is discovered, set pPKey2->isCorrupt to non-zero
  3460         -** and return 0.
         3474  +** If database corruption is discovered, set pPKey2->errCode to 
         3475  +** SQLITE_CORRUPT and return 0. If an OOM error is encountered, 
         3476  +** pPKey2->errCode is set to SQLITE_NOMEM and, if it is not NULL, the
         3477  +** malloc-failed flag set on database handle (pPKey2->pKeyInfo->db).
  3461   3478   */
  3462   3479   int sqlite3VdbeRecordCompare(
  3463   3480     int nKey1, const void *pKey1,   /* Left key */
  3464   3481     UnpackedRecord *pPKey2,         /* Right key */
  3465   3482     int bSkip                       /* If true, skip the first field */
  3466   3483   ){
  3467   3484     u32 d1;                         /* Offset into aKey[] of next data element */
................................................................................
  3484   3501       d1 = szHdr1 + sqlite3VdbeSerialTypeLen(s1);
  3485   3502       i = 1;
  3486   3503       pRhs++;
  3487   3504     }else{
  3488   3505       idx1 = getVarint32(aKey1, szHdr1);
  3489   3506       d1 = szHdr1;
  3490   3507       if( d1>(unsigned)nKey1 ){ 
  3491         -      pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT;
         3508  +      pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
  3492   3509         return 0;  /* Corruption */
  3493   3510       }
  3494   3511       i = 0;
  3495   3512     }
  3496   3513   
  3497   3514     VVA_ONLY( mem1.zMalloc = 0; ) /* Only needed by assert() statements */
  3498   3515     assert( pPKey2->pKeyInfo->nField+pPKey2->pKeyInfo->nXField>=pPKey2->nField 
................................................................................
  3563   3580         }else if( !(serial_type & 0x01) ){
  3564   3581           rc = +1;
  3565   3582         }else{
  3566   3583           mem1.n = (serial_type - 12) / 2;
  3567   3584           testcase( (d1+mem1.n)==(unsigned)nKey1 );
  3568   3585           testcase( (d1+mem1.n+1)==(unsigned)nKey1 );
  3569   3586           if( (d1+mem1.n) > (unsigned)nKey1 ){
  3570         -          pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT;
         3587  +          pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
  3571   3588             return 0;                /* Corruption */
  3572   3589           }else if( pKeyInfo->aColl[i] ){
  3573   3590             mem1.enc = pKeyInfo->enc;
  3574   3591             mem1.db = pKeyInfo->db;
  3575   3592             mem1.flags = MEM_Str;
  3576   3593             mem1.z = (char*)&aKey1[d1];
  3577         -          rc = vdbeCompareMemString(&mem1, pRhs, pKeyInfo->aColl[i]);
         3594  +          rc = vdbeCompareMemString(
         3595  +              &mem1, pRhs, pKeyInfo->aColl[i], &pPKey2->errCode
         3596  +          );
  3578   3597           }else{
  3579   3598             int nCmp = MIN(mem1.n, pRhs->n);
  3580   3599             rc = memcmp(&aKey1[d1], pRhs->z, nCmp);
  3581   3600             if( rc==0 ) rc = mem1.n - pRhs->n; 
  3582   3601           }
  3583   3602         }
  3584   3603       }
................................................................................
  3590   3609         if( serial_type<12 || (serial_type & 0x01) ){
  3591   3610           rc = -1;
  3592   3611         }else{
  3593   3612           int nStr = (serial_type - 12) / 2;
  3594   3613           testcase( (d1+nStr)==(unsigned)nKey1 );
  3595   3614           testcase( (d1+nStr+1)==(unsigned)nKey1 );
  3596   3615           if( (d1+nStr) > (unsigned)nKey1 ){
  3597         -          pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT;
         3616  +          pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
  3598   3617             return 0;                /* Corruption */
  3599   3618           }else{
  3600   3619             int nCmp = MIN(nStr, pRhs->n);
  3601   3620             rc = memcmp(&aKey1[d1], pRhs->z, nCmp);
  3602   3621             if( rc==0 ) rc = nStr - pRhs->n;
  3603   3622           }
  3604   3623         }
................................................................................
  3610   3629         rc = (serial_type!=0);
  3611   3630       }
  3612   3631   
  3613   3632       if( rc!=0 ){
  3614   3633         if( pKeyInfo->aSortOrder[i] ){
  3615   3634           rc = -rc;
  3616   3635         }
  3617         -      assert( CORRUPT_DB
  3618         -          || (rc<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0)
  3619         -          || (rc>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0)
  3620         -          || pKeyInfo->db->mallocFailed
  3621         -      );
         3636  +      assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, rc) );
  3622   3637         assert( mem1.zMalloc==0 );  /* See comment below */
  3623   3638         return rc;
  3624   3639       }
  3625   3640   
  3626   3641       i++;
  3627   3642       pRhs++;
  3628   3643       d1 += sqlite3VdbeSerialTypeLen(serial_type);
................................................................................
  3634   3649     ** memory leak and a need to call sqlite3VdbeMemRelease(&mem1).  */
  3635   3650     assert( mem1.zMalloc==0 );
  3636   3651   
  3637   3652     /* rc==0 here means that one or both of the keys ran out of fields and
  3638   3653     ** all the fields up to that point were equal. Return the the default_rc
  3639   3654     ** value.  */
  3640   3655     assert( CORRUPT_DB 
  3641         -       || pPKey2->default_rc==vdbeRecordCompareDebug(nKey1, pKey1, pPKey2) 
         3656  +       || vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, pPKey2->default_rc) 
  3642   3657          || pKeyInfo->db->mallocFailed
  3643   3658     );
  3644   3659     return pPKey2->default_rc;
  3645   3660   }
  3646   3661   
  3647   3662   /*
  3648   3663   ** This function is an optimized version of sqlite3VdbeRecordCompare() 
................................................................................
  3733   3748       res = sqlite3VdbeRecordCompare(nKey1, pKey1, pPKey2, 1);
  3734   3749     }else{
  3735   3750       /* The first fields of the two keys are equal and there are no trailing
  3736   3751       ** fields. Return pPKey2->default_rc in this case. */
  3737   3752       res = pPKey2->default_rc;
  3738   3753     }
  3739   3754   
  3740         -  assert( (res==0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)==0)
  3741         -       || (res<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0)
  3742         -       || (res>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0)
  3743         -       || CORRUPT_DB
  3744         -  );
         3755  +  assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res) );
  3745   3756     return res;
  3746   3757   }
  3747   3758   
  3748   3759   /*
  3749   3760   ** This function is an optimized version of sqlite3VdbeRecordCompare() 
  3750   3761   ** that (a) the first field of pPKey2 is a string, that (b) the first field
  3751   3762   ** uses the collation sequence BINARY and (c) that the size-of-header varint 
................................................................................
  3771   3782     }else{
  3772   3783       int nCmp;
  3773   3784       int nStr;
  3774   3785       int szHdr = aKey1[0];
  3775   3786   
  3776   3787       nStr = (serial_type-12) / 2;
  3777   3788       if( (szHdr + nStr) > nKey1 ){
  3778         -      pPKey2->isCorrupt = (u8)SQLITE_CORRUPT_BKPT;
         3789  +      pPKey2->errCode = (u8)SQLITE_CORRUPT_BKPT;
  3779   3790         return 0;    /* Corruption */
  3780   3791       }
  3781   3792       nCmp = MIN( pPKey2->aMem[0].n, nStr );
  3782   3793       res = memcmp(&aKey1[szHdr], pPKey2->aMem[0].z, nCmp);
  3783   3794   
  3784   3795       if( res==0 ){
  3785   3796         res = nStr - pPKey2->aMem[0].n;
................................................................................
  3797   3808       }else if( res>0 ){
  3798   3809         res = pPKey2->r2;
  3799   3810       }else{
  3800   3811         res = pPKey2->r1;
  3801   3812       }
  3802   3813     }
  3803   3814   
  3804         -  assert( (res==0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)==0)
  3805         -       || (res<0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)<0)
  3806         -       || (res>0 && vdbeRecordCompareDebug(nKey1, pKey1, pPKey2)>0)
         3815  +  assert( vdbeRecordCompareDebug(nKey1, pKey1, pPKey2, res)
  3807   3816          || CORRUPT_DB
  3808   3817          || pPKey2->pKeyInfo->db->mallocFailed
  3809   3818     );
  3810   3819     return res;
  3811   3820   }
  3812   3821   
  3813   3822   /*

Changes to src/vdbemem.c.

   196    196       pMem->n += pMem->u.nZero;
   197    197       pMem->flags &= ~(MEM_Zero|MEM_Term);
   198    198     }
   199    199     return SQLITE_OK;
   200    200   }
   201    201   #endif
   202    202   
   203         -
   204    203   /*
   205         -** Make sure the given Mem is \u0000 terminated.
          204  +** It is already known that pMem contains an unterminated string.
          205  +** Add the zero terminator.
   206    206   */
   207         -int sqlite3VdbeMemNulTerminate(Mem *pMem){
   208         -  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
   209         -  if( (pMem->flags & MEM_Term)!=0 || (pMem->flags & MEM_Str)==0 ){
   210         -    return SQLITE_OK;   /* Nothing to do */
   211         -  }
          207  +static SQLITE_NOINLINE int vdbeMemAddTerminator(Mem *pMem){
   212    208     if( sqlite3VdbeMemGrow(pMem, pMem->n+2, 1) ){
   213    209       return SQLITE_NOMEM;
   214    210     }
   215    211     pMem->z[pMem->n] = 0;
   216    212     pMem->z[pMem->n+1] = 0;
   217    213     pMem->flags |= MEM_Term;
   218    214     return SQLITE_OK;
   219    215   }
          216  +
          217  +/*
          218  +** Make sure the given Mem is \u0000 terminated.
          219  +*/
          220  +int sqlite3VdbeMemNulTerminate(Mem *pMem){
          221  +  assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
          222  +  testcase( (pMem->flags & (MEM_Term|MEM_Str))==(MEM_Term|MEM_Str) );
          223  +  testcase( (pMem->flags & (MEM_Term|MEM_Str))==0 );
          224  +  if( (pMem->flags & (MEM_Term|MEM_Str))!=MEM_Str ){
          225  +    return SQLITE_OK;   /* Nothing to do */
          226  +  }else{
          227  +    return vdbeMemAddTerminator(pMem);
          228  +  }
          229  +}
   220    230   
   221    231   /*
   222    232   ** Add MEM_Str to the set of representations for the given Mem.  Numbers
   223    233   ** are converted using sqlite3_snprintf().  Converting a BLOB to a string
   224    234   ** is a no-op.
   225    235   **
   226    236   ** Existing representations MEM_Int and MEM_Real are invalidated if
................................................................................
   276    286   ** Return SQLITE_ERROR if the finalizer reports an error.  SQLITE_OK
   277    287   ** otherwise.
   278    288   */
   279    289   int sqlite3VdbeMemFinalize(Mem *pMem, FuncDef *pFunc){
   280    290     int rc = SQLITE_OK;
   281    291     if( ALWAYS(pFunc && pFunc->xFinalize) ){
   282    292       sqlite3_context ctx;
          293  +    Mem t;
   283    294       assert( (pMem->flags & MEM_Null)!=0 || pFunc==pMem->u.pDef );
   284    295       assert( pMem->db==0 || sqlite3_mutex_held(pMem->db->mutex) );
   285    296       memset(&ctx, 0, sizeof(ctx));
   286         -    ctx.s.flags = MEM_Null;
   287         -    ctx.s.db = pMem->db;
          297  +    memset(&t, 0, sizeof(t));
          298  +    t.flags = MEM_Null;
          299  +    t.db = pMem->db;
          300  +    ctx.pOut = &t;
   288    301       ctx.pMem = pMem;
   289    302       ctx.pFunc = pFunc;
   290    303       pFunc->xFinalize(&ctx); /* IMP: R-24505-23230 */
   291    304       assert( 0==(pMem->flags&MEM_Dyn) && !pMem->xDel );
   292    305       sqlite3DbFree(pMem->db, pMem->zMalloc);
   293         -    memcpy(pMem, &ctx.s, sizeof(ctx.s));
          306  +    memcpy(pMem, &t, sizeof(t));
   294    307       rc = ctx.isError;
   295    308     }
   296    309     return rc;
   297    310   }
   298    311   
   299    312   /*
   300    313   ** If the memory cell contains a string value that must be freed by
................................................................................
   600    613     sqlite3VdbeMemGrow(pMem, n, 0);
   601    614     if( pMem->z ){
   602    615       pMem->n = n;
   603    616       memset(pMem->z, 0, n);
   604    617     }
   605    618   #endif
   606    619   }
          620  +
          621  +/*
          622  +** The pMem is known to contain content that needs to be destroyed prior
          623  +** to a value change.  So invoke the destructor, then set the value to
          624  +** a 64-bit integer.
          625  +*/
          626  +static SQLITE_NOINLINE void vdbeReleaseAndSetInt64(Mem *pMem, i64 val){
          627  +  sqlite3VdbeMemReleaseExternal(pMem);
          628  +  pMem->u.i = val;
          629  +  pMem->flags = MEM_Int;
          630  +}
   607    631   
   608    632   /*
   609    633   ** Delete any previous value and set the value stored in *pMem to val,
   610    634   ** manifest type INTEGER.
   611    635   */
   612    636   void sqlite3VdbeMemSetInt64(Mem *pMem, i64 val){
   613         -  sqlite3VdbeMemRelease(pMem);
   614         -  pMem->u.i = val;
   615         -  pMem->flags = MEM_Int;
          637  +  if( VdbeMemDynamic(pMem) ){
          638  +    vdbeReleaseAndSetInt64(pMem, val);
          639  +  }else{
          640  +    pMem->u.i = val;
          641  +    pMem->flags = MEM_Int;
          642  +  }
   616    643   }
   617    644   
   618    645   #ifndef SQLITE_OMIT_FLOATING_POINT
   619    646   /*
   620    647   ** Delete any previous value and set the value stored in *pMem to val,
   621    648   ** manifest type REAL.
   622    649   */
................................................................................
   904    931       }else{
   905    932         sqlite3VdbeMemRelease(pMem);
   906    933       }
   907    934     }
   908    935   
   909    936     return rc;
   910    937   }
          938  +
          939  +/*
          940  +** The pVal argument is known to be a value other than NULL.
          941  +** Convert it into a string with encoding enc and return a pointer
          942  +** to a zero-terminated version of that string.
          943  +*/
          944  +SQLITE_NOINLINE const void *valueToText(sqlite3_value* pVal, u8 enc){
          945  +  assert( pVal!=0 );
          946  +  assert( pVal->db==0 || sqlite3_mutex_held(pVal->db->mutex) );
          947  +  assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) );
          948  +  assert( (pVal->flags & MEM_RowSet)==0 );
          949  +  assert( (pVal->flags & (MEM_Null))==0 );
          950  +  if( pVal->flags & (MEM_Blob|MEM_Str) ){
          951  +    pVal->flags |= MEM_Str;
          952  +    if( pVal->flags & MEM_Zero ){
          953  +      sqlite3VdbeMemExpandBlob(pVal);
          954  +    }
          955  +    if( pVal->enc != (enc & ~SQLITE_UTF16_ALIGNED) ){
          956  +      sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED);
          957  +    }
          958  +    if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&SQLITE_PTR_TO_INT(pVal->z)) ){
          959  +      assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 );
          960  +      if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){
          961  +        return 0;
          962  +      }
          963  +    }
          964  +    sqlite3VdbeMemNulTerminate(pVal); /* IMP: R-31275-44060 */
          965  +  }else{
          966  +    sqlite3VdbeMemStringify(pVal, enc, 0);
          967  +    assert( 0==(1&SQLITE_PTR_TO_INT(pVal->z)) );
          968  +  }
          969  +  assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || pVal->db==0
          970  +              || pVal->db->mallocFailed );
          971  +  if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){
          972  +    return pVal->z;
          973  +  }else{
          974  +    return 0;
          975  +  }
          976  +}
   911    977   
   912    978   /* This function is only available internally, it is not part of the
   913    979   ** external API. It works in a similar way to sqlite3_value_text(),
   914    980   ** except the data returned is in the encoding specified by the second
   915    981   ** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or
   916    982   ** SQLITE_UTF8.
   917    983   **
   918    984   ** (2006-02-16:)  The enc value can be or-ed with SQLITE_UTF16_ALIGNED.
   919    985   ** If that is the case, then the result must be aligned on an even byte
   920    986   ** boundary.
   921    987   */
   922    988   const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){
   923    989     if( !pVal ) return 0;
   924         -
   925    990     assert( pVal->db==0 || sqlite3_mutex_held(pVal->db->mutex) );
   926    991     assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) );
   927    992     assert( (pVal->flags & MEM_RowSet)==0 );
   928         -
          993  +  if( (pVal->flags&(MEM_Str|MEM_Term))==(MEM_Str|MEM_Term) && pVal->enc==enc ){
          994  +    return pVal->z;
          995  +  }
   929    996     if( pVal->flags&MEM_Null ){
   930    997       return 0;
   931    998     }
   932         -  assert( (MEM_Blob>>3) == MEM_Str );
   933         -  pVal->flags |= (pVal->flags & MEM_Blob)>>3;
   934         -  ExpandBlob(pVal);
   935         -  if( pVal->flags&MEM_Str ){
   936         -    sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED);
   937         -    if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&SQLITE_PTR_TO_INT(pVal->z)) ){
   938         -      assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 );
   939         -      if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){
   940         -        return 0;
   941         -      }
   942         -    }
   943         -    sqlite3VdbeMemNulTerminate(pVal); /* IMP: R-31275-44060 */
   944         -  }else{
   945         -    assert( (pVal->flags&MEM_Blob)==0 );
   946         -    sqlite3VdbeMemStringify(pVal, enc, 0);
   947         -    assert( 0==(1&SQLITE_PTR_TO_INT(pVal->z)) );
   948         -  }
   949         -  assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || pVal->db==0
   950         -              || pVal->db->mallocFailed );
   951         -  if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){
   952         -    return pVal->z;
   953         -  }else{
   954         -    return 0;
   955         -  }
          999  +  return valueToText(pVal, enc);
   956   1000   }
   957   1001   
   958   1002   /*
   959   1003   ** Create a new sqlite3_value object.
   960   1004   */
   961   1005   sqlite3_value *sqlite3ValueNew(sqlite3 *db){
   962   1006     Mem *p = sqlite3DbMallocZero(db, sizeof(*p));

Changes to src/vdbesort.c.

     1      1   /*
     2         -** 2011 July 9
            2  +** 2011-07-09
     3      3   **
     4      4   ** The author disclaims copyright to this source code.  In place of
     5      5   ** a legal notice, here is a blessing:
     6      6   **
     7      7   **    May you do good and not evil.
     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12     12   ** This file contains code for the VdbeSorter object, used in concert with
    13         -** a VdbeCursor to sort large numbers of keys (as may be required, for
    14         -** example, by CREATE INDEX statements on tables too large to fit in main
    15         -** memory).
    16         -*/
    17         -
           13  +** a VdbeCursor to sort large numbers of keys for CREATE INDEX statements
           14  +** or by SELECT statements with ORDER BY clauses that cannot be satisfied
           15  +** using indexes and without LIMIT clauses.
           16  +**
           17  +** The VdbeSorter object implements a multi-threaded external merge sort
           18  +** algorithm that is efficient even if the number of elements being sorted
           19  +** exceeds the available memory.
           20  +**
           21  +** Here is the (internal, non-API) interface between this module and the
           22  +** rest of the SQLite system:
           23  +**
           24  +**    sqlite3VdbeSorterInit()       Create a new VdbeSorter object.
           25  +**
           26  +**    sqlite3VdbeSorterWrite()      Add a single new row to the VdbeSorter
           27  +**                                  object.  The row is a binary blob in the
           28  +**                                  OP_MakeRecord format that contains both
           29  +**                                  the ORDER BY key columns and result columns
           30  +**                                  in the case of a SELECT w/ ORDER BY, or
           31  +**                                  the complete record for an index entry
           32  +**                                  in the case of a CREATE INDEX.
           33  +**
           34  +**    sqlite3VdbeSorterRewind()     Sort all content previously added.
           35  +**                                  Position the read cursor on the
           36  +**                                  first sorted element.
           37  +**
           38  +**    sqlite3VdbeSorterNext()       Advance the read cursor to the next sorted
           39  +**                                  element.
           40  +**
           41  +**    sqlite3VdbeSorterRowkey()     Return the complete binary blob for the
           42  +**                                  row currently under the read cursor.
           43  +**
           44  +**    sqlite3VdbeSorterCompare()    Compare the binary blob for the row
           45  +**                                  currently under the read cursor against
           46  +**                                  another binary blob X and report if
           47  +**                                  X is strictly less than the read cursor.
           48  +**                                  Used to enforce uniqueness in a
           49  +**                                  CREATE UNIQUE INDEX statement.
           50  +**
           51  +**    sqlite3VdbeSorterClose()      Close the VdbeSorter object and reclaim
           52  +**                                  all resources.
           53  +**
           54  +**    sqlite3VdbeSorterReset()      Refurbish the VdbeSorter for reuse.  This
           55  +**                                  is like Close() followed by Init() only
           56  +**                                  much faster.
           57  +**
           58  +** The interfaces above must be called in a particular order.  Write() can 
           59  +** only occur in between Init()/Reset() and Rewind().  Next(), Rowkey(), and
           60  +** Compare() can only occur in between Rewind() and Close()/Reset(). i.e.
           61  +**
           62  +**   Init()
           63  +**   for each record: Write()
           64  +**   Rewind()
           65  +**     Rowkey()/Compare()
           66  +**   Next() 
           67  +**   Close()
           68  +**
           69  +** Algorithm:
           70  +**
           71  +** Records passed to the sorter via calls to Write() are initially held 
           72  +** unsorted in main memory. Assuming the amount of memory used never exceeds
           73  +** a threshold, when Rewind() is called the set of records is sorted using
           74  +** an in-memory merge sort. In this case, no temporary files are required
           75  +** and subsequent calls to Rowkey(), Next() and Compare() read records 
           76  +** directly from main memory.
           77  +**
           78  +** If the amount of space used to store records in main memory exceeds the
           79  +** threshold, then the set of records currently in memory are sorted and
           80  +** written to a temporary file in "Packed Memory Array" (PMA) format.
           81  +** A PMA created at this point is known as a "level-0 PMA". Higher levels
           82  +** of PMAs may be created by merging existing PMAs together - for example
           83  +** merging two or more level-0 PMAs together creates a level-1 PMA.
           84  +**
           85  +** The threshold for the amount of main memory to use before flushing 
           86  +** records to a PMA is roughly the same as the limit configured for the
           87  +** page-cache of the main database. Specifically, the threshold is set to 
           88  +** the value returned by "PRAGMA main.page_size" multipled by 
           89  +** that returned by "PRAGMA main.cache_size", in bytes.
           90  +**
           91  +** If the sorter is running in single-threaded mode, then all PMAs generated
           92  +** are appended to a single temporary file. Or, if the sorter is running in
           93  +** multi-threaded mode then up to (N+1) temporary files may be opened, where
           94  +** N is the configured number of worker threads. In this case, instead of
           95  +** sorting the records and writing the PMA to a temporary file itself, the
           96  +** calling thread usually launches a worker thread to do so. Except, if
           97  +** there are already N worker threads running, the main thread does the work
           98  +** itself.
           99  +**
          100  +** The sorter is running in multi-threaded mode if (a) the library was built
          101  +** with pre-processor symbol SQLITE_MAX_WORKER_THREADS set to a value greater
          102  +** than zero, and (b) worker threads have been enabled at runtime by calling
          103  +** sqlite3_config(SQLITE_CONFIG_WORKER_THREADS, ...).
          104  +**
          105  +** When Rewind() is called, any data remaining in memory is flushed to a 
          106  +** final PMA. So at this point the data is stored in some number of sorted
          107  +** PMAs within temporary files on disk.
          108  +**
          109  +** If there are fewer than SORTER_MAX_MERGE_COUNT PMAs in total and the
          110  +** sorter is running in single-threaded mode, then these PMAs are merged
          111  +** incrementally as keys are retreived from the sorter by the VDBE.  The
          112  +** MergeEngine object, described in further detail below, performs this
          113  +** merge.
          114  +**
          115  +** Or, if running in multi-threaded mode, then a background thread is
          116  +** launched to merge the existing PMAs. Once the background thread has
          117  +** merged T bytes of data into a single sorted PMA, the main thread 
          118  +** begins reading keys from that PMA while the background thread proceeds
          119  +** with merging the next T bytes of data. And so on.
          120  +**
          121  +** Parameter T is set to half the value of the memory threshold used 
          122  +** by Write() above to determine when to create a new PMA.
          123  +**
          124  +** If there are more than SORTER_MAX_MERGE_COUNT PMAs in total when 
          125  +** Rewind() is called, then a hierarchy of incremental-merges is used. 
          126  +** First, T bytes of data from the first SORTER_MAX_MERGE_COUNT PMAs on 
          127  +** disk are merged together. Then T bytes of data from the second set, and
          128  +** so on, such that no operation ever merges more than SORTER_MAX_MERGE_COUNT
          129  +** PMAs at a time. This done is to improve locality.
          130  +**
          131  +** If running in multi-threaded mode and there are more than
          132  +** SORTER_MAX_MERGE_COUNT PMAs on disk when Rewind() is called, then more
          133  +** than one background thread may be created. Specifically, there may be
          134  +** one background thread for each temporary file on disk, and one background
          135  +** thread to merge the output of each of the others to a single PMA for
          136  +** the main thread to read from.
          137  +*/
    18    138   #include "sqliteInt.h"
    19    139   #include "vdbeInt.h"
    20    140   
    21         -
    22         -typedef struct VdbeSorterIter VdbeSorterIter;
    23         -typedef struct SorterRecord SorterRecord;
    24         -typedef struct FileWriter FileWriter;
    25         -
    26         -/*
    27         -** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES:
    28         -**
    29         -** As keys are added to the sorter, they are written to disk in a series
    30         -** of sorted packed-memory-arrays (PMAs). The size of each PMA is roughly
    31         -** the same as the cache-size allowed for temporary databases. In order
    32         -** to allow the caller to extract keys from the sorter in sorted order,
    33         -** all PMAs currently stored on disk must be merged together. This comment
    34         -** describes the data structure used to do so. The structure supports 
    35         -** merging any number of arrays in a single pass with no redundant comparison 
    36         -** operations.
    37         -**
    38         -** The aIter[] array contains an iterator for each of the PMAs being merged.
    39         -** An aIter[] iterator either points to a valid key or else is at EOF. For 
    40         -** the purposes of the paragraphs below, we assume that the array is actually 
    41         -** N elements in size, where N is the smallest power of 2 greater to or equal 
    42         -** to the number of iterators being merged. The extra aIter[] elements are 
    43         -** treated as if they are empty (always at EOF).
          141  +/* 
          142  +** If SQLITE_DEBUG_SORTER_THREADS is defined, this module outputs various
          143  +** messages to stderr that may be helpful in understanding the performance
          144  +** characteristics of the sorter in multi-threaded mode.
          145  +*/
          146  +#if 0
          147  +# define SQLITE_DEBUG_SORTER_THREADS 1
          148  +#endif
          149  +
          150  +/*
          151  +** Private objects used by the sorter
          152  +*/
          153  +typedef struct MergeEngine MergeEngine;     /* Merge PMAs together */
          154  +typedef struct PmaReader PmaReader;         /* Incrementally read one PMA */
          155  +typedef struct PmaWriter PmaWriter;         /* Incrementally write one PMA */
          156  +typedef struct SorterRecord SorterRecord;   /* A record being sorted */
          157  +typedef struct SortSubtask SortSubtask;     /* A sub-task in the sort process */
          158  +typedef struct SorterFile SorterFile;       /* Temporary file object wrapper */
          159  +typedef struct SorterList SorterList;       /* In-memory list of records */
          160  +typedef struct IncrMerger IncrMerger;       /* Read & merge multiple PMAs */
          161  +
          162  +/*
          163  +** A container for a temp file handle and the current amount of data 
          164  +** stored in the file.
          165  +*/
          166  +struct SorterFile {
          167  +  sqlite3_file *pFd;              /* File handle */
          168  +  i64 iEof;                       /* Bytes of data stored in pFd */
          169  +};
          170  +
          171  +/*
          172  +** An in-memory list of objects to be sorted.
          173  +**
          174  +** If aMemory==0 then each object is allocated separately and the objects
          175  +** are connected using SorterRecord.u.pNext.  If aMemory!=0 then all objects
          176  +** are stored in the aMemory[] bulk memory, one right after the other, and
          177  +** are connected using SorterRecord.u.iNext.
          178  +*/
          179  +struct SorterList {
          180  +  SorterRecord *pList;            /* Linked list of records */
          181  +  u8 *aMemory;                    /* If non-NULL, bulk memory to hold pList */
          182  +  int szPMA;                      /* Size of pList as PMA in bytes */
          183  +};
          184  +
          185  +/*
          186  +** The MergeEngine object is used to combine two or more smaller PMAs into
          187  +** one big PMA using a merge operation.  Separate PMAs all need to be
          188  +** combined into one big PMA in order to be able to step through the sorted
          189  +** records in order.
          190  +**
          191  +** The aReadr[] array contains a PmaReader object for each of the PMAs being
          192  +** merged.  An aReadr[] object either points to a valid key or else is at EOF.
          193  +** ("EOF" means "End Of File".  When aReadr[] is at EOF there is no more data.)
          194  +** For the purposes of the paragraphs below, we assume that the array is
          195  +** actually N elements in size, where N is the smallest power of 2 greater
          196  +** to or equal to the number of PMAs being merged. The extra aReadr[] elements
          197  +** are treated as if they are empty (always at EOF).
    44    198   **
    45    199   ** The aTree[] array is also N elements in size. The value of N is stored in
    46         -** the VdbeSorter.nTree variable.
          200  +** the MergeEngine.nTree variable.
    47    201   **
    48    202   ** The final (N/2) elements of aTree[] contain the results of comparing
    49         -** pairs of iterator keys together. Element i contains the result of 
    50         -** comparing aIter[2*i-N] and aIter[2*i-N+1]. Whichever key is smaller, the
          203  +** pairs of PMA keys together. Element i contains the result of 
          204  +** comparing aReadr[2*i-N] and aReadr[2*i-N+1]. Whichever key is smaller, the
    51    205   ** aTree element is set to the index of it. 
    52    206   **
    53    207   ** For the purposes of this comparison, EOF is considered greater than any
    54    208   ** other key value. If the keys are equal (only possible with two EOF
    55    209   ** values), it doesn't matter which index is stored.
    56    210   **
    57    211   ** The (N/4) elements of aTree[] that precede the final (N/2) described 
    58         -** above contains the index of the smallest of each block of 4 iterators.
    59         -** And so on. So that aTree[1] contains the index of the iterator that 
          212  +** above contains the index of the smallest of each block of 4 PmaReaders
          213  +** And so on. So that aTree[1] contains the index of the PmaReader that 
    60    214   ** currently points to the smallest key value. aTree[0] is unused.
    61    215   **
    62    216   ** Example:
    63    217   **
    64         -**     aIter[0] -> Banana
    65         -**     aIter[1] -> Feijoa
    66         -**     aIter[2] -> Elderberry
    67         -**     aIter[3] -> Currant
    68         -**     aIter[4] -> Grapefruit
    69         -**     aIter[5] -> Apple
    70         -**     aIter[6] -> Durian
    71         -**     aIter[7] -> EOF
          218  +**     aReadr[0] -> Banana
          219  +**     aReadr[1] -> Feijoa
          220  +**     aReadr[2] -> Elderberry
          221  +**     aReadr[3] -> Currant
          222  +**     aReadr[4] -> Grapefruit
          223  +**     aReadr[5] -> Apple
          224  +**     aReadr[6] -> Durian
          225  +**     aReadr[7] -> EOF
    72    226   **
    73    227   **     aTree[] = { X, 5   0, 5    0, 3, 5, 6 }
    74    228   **
    75    229   ** The current element is "Apple" (the value of the key indicated by 
    76         -** iterator 5). When the Next() operation is invoked, iterator 5 will
          230  +** PmaReader 5). When the Next() operation is invoked, PmaReader 5 will
    77    231   ** be advanced to the next key in its segment. Say the next key is
    78    232   ** "Eggplant":
    79    233   **
    80         -**     aIter[5] -> Eggplant
          234  +**     aReadr[5] -> Eggplant
    81    235   **
    82         -** The contents of aTree[] are updated first by comparing the new iterator
    83         -** 5 key to the current key of iterator 4 (still "Grapefruit"). The iterator
          236  +** The contents of aTree[] are updated first by comparing the new PmaReader
          237  +** 5 key to the current key of PmaReader 4 (still "Grapefruit"). The PmaReader
    84    238   ** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree.
    85         -** The value of iterator 6 - "Durian" - is now smaller than that of iterator
          239  +** The value of PmaReader 6 - "Durian" - is now smaller than that of PmaReader
    86    240   ** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Banana<Durian),
    87    241   ** so the value written into element 1 of the array is 0. As follows:
    88    242   **
    89    243   **     aTree[] = { X, 0   0, 6    0, 3, 5, 6 }
    90    244   **
    91    245   ** In other words, each time we advance to the next sorter element, log2(N)
    92    246   ** key comparison operations are required, where N is the number of segments
    93    247   ** being merged (rounded up to the next power of 2).
    94    248   */
          249  +struct MergeEngine {
          250  +  int nTree;                 /* Used size of aTree/aReadr (power of 2) */
          251  +  SortSubtask *pTask;        /* Used by this thread only */
          252  +  int *aTree;                /* Current state of incremental merge */
          253  +  PmaReader *aReadr;         /* Array of PmaReaders to merge data from */
          254  +};
          255  +
          256  +/*
          257  +** This object represents a single thread of control in a sort operation.
          258  +** Exactly VdbeSorter.nTask instances of this object are allocated
          259  +** as part of each VdbeSorter object. Instances are never allocated any
          260  +** other way. VdbeSorter.nTask is set to the number of worker threads allowed
          261  +** (see SQLITE_CONFIG_WORKER_THREADS) plus one (the main thread).  Thus for
          262  +** single-threaded operation, there is exactly one instance of this object
          263  +** and for multi-threaded operation there are two or more instances.
          264  +**
          265  +** Essentially, this structure contains all those fields of the VdbeSorter
          266  +** structure for which each thread requires a separate instance. For example,
          267  +** each thread requries its own UnpackedRecord object to unpack records in
          268  +** as part of comparison operations.
          269  +**
          270  +** Before a background thread is launched, variable bDone is set to 0. Then, 
          271  +** right before it exits, the thread itself sets bDone to 1. This is used for 
          272  +** two purposes:
          273  +**
          274  +**   1. When flushing the contents of memory to a level-0 PMA on disk, to
          275  +**      attempt to select a SortSubtask for which there is not already an
          276  +**      active background thread (since doing so causes the main thread
          277  +**      to block until it finishes).
          278  +**
          279  +**   2. If SQLITE_DEBUG_SORTER_THREADS is defined, to determine if a call
          280  +**      to sqlite3ThreadJoin() is likely to block. Cases that are likely to
          281  +**      block provoke debugging output.
          282  +**
          283  +** In both cases, the effects of the main thread seeing (bDone==0) even
          284  +** after the thread has finished are not dire. So we don't worry about
          285  +** memory barriers and such here.
          286  +*/
          287  +struct SortSubtask {
          288  +  SQLiteThread *pThread;          /* Background thread, if any */
          289  +  int bDone;                      /* Set if thread is finished but not joined */
          290  +  VdbeSorter *pSorter;            /* Sorter that owns this sub-task */
          291  +  UnpackedRecord *pUnpacked;      /* Space to unpack a record */
          292  +  SorterList list;                /* List for thread to write to a PMA */
          293  +  int nPMA;                       /* Number of PMAs currently in file */
          294  +  SorterFile file;                /* Temp file for level-0 PMAs */
          295  +  SorterFile file2;               /* Space for other PMAs */
          296  +};
          297  +
          298  +/*
          299  +** Main sorter structure. A single instance of this is allocated for each 
          300  +** sorter cursor created by the VDBE.
          301  +**
          302  +** mxKeysize:
          303  +**   As records are added to the sorter by calls to sqlite3VdbeSorterWrite(),
          304  +**   this variable is updated so as to be set to the size on disk of the
          305  +**   largest record in the sorter.
          306  +*/
    95    307   struct VdbeSorter {
    96         -  i64 iWriteOff;                  /* Current write offset within file pTemp1 */
    97         -  i64 iReadOff;                   /* Current read offset within file pTemp1 */
    98         -  int nInMemory;                  /* Current size of pRecord list as PMA */
    99         -  int nTree;                      /* Used size of aTree/aIter (power of 2) */
   100         -  int nPMA;                       /* Number of PMAs stored in pTemp1 */
   101    308     int mnPmaSize;                  /* Minimum PMA size, in bytes */
   102    309     int mxPmaSize;                  /* Maximum PMA size, in bytes.  0==no limit */
   103         -  VdbeSorterIter *aIter;          /* Array of iterators to merge */
   104         -  int *aTree;                     /* Current state of incremental merge */
   105         -  sqlite3_file *pTemp1;           /* PMA file 1 */
   106         -  SorterRecord *pRecord;          /* Head of in-memory record list */
   107         -  UnpackedRecord *pUnpacked;      /* Used to unpack keys */
          310  +  int mxKeysize;                  /* Largest serialized key seen so far */
          311  +  int pgsz;                       /* Main database page size */
          312  +  PmaReader *pReader;             /* Readr data from here after Rewind() */
          313  +  MergeEngine *pMerger;           /* Or here, if bUseThreads==0 */
          314  +  sqlite3 *db;                    /* Database connection */
          315  +  KeyInfo *pKeyInfo;              /* How to compare records */
          316  +  UnpackedRecord *pUnpacked;      /* Used by VdbeSorterCompare() */
          317  +  SorterList list;                /* List of in-memory records */
          318  +  int iMemory;                    /* Offset of free space in list.aMemory */
          319  +  int nMemory;                    /* Size of list.aMemory allocation in bytes */
          320  +  u8 bUsePMA;                     /* True if one or more PMAs created */
          321  +  u8 bUseThreads;                 /* True to use background threads */
          322  +  u8 iPrev;                       /* Previous thread used to flush PMA */
          323  +  u8 nTask;                       /* Size of aTask[] array */
          324  +  SortSubtask aTask[1];           /* One or more subtasks */
          325  +};
          326  +
          327  +/*
          328  +** An instance of the following object is used to read records out of a
          329  +** PMA, in sorted order.  The next key to be read is cached in nKey/aKey.
          330  +** aKey might point into aMap or into aBuffer.  If neither of those locations
          331  +** contain a contiguous representation of the key, then aAlloc is allocated
          332  +** and the key is copied into aAlloc and aKey is made to poitn to aAlloc.
          333  +**
          334  +** pFd==0 at EOF.
          335  +*/
          336  +struct PmaReader {
          337  +  i64 iReadOff;               /* Current read offset */
          338  +  i64 iEof;                   /* 1 byte past EOF for this PmaReader */
          339  +  int nAlloc;                 /* Bytes of space at aAlloc */
          340  +  int nKey;                   /* Number of bytes in key */
          341  +  sqlite3_file *pFd;          /* File handle we are reading from */
          342  +  u8 *aAlloc;                 /* Space for aKey if aBuffer and pMap wont work */
          343  +  u8 *aKey;                   /* Pointer to current key */
          344  +  u8 *aBuffer;                /* Current read buffer */
          345  +  int nBuffer;                /* Size of read buffer in bytes */
          346  +  u8 *aMap;                   /* Pointer to mapping of entire file */
          347  +  IncrMerger *pIncr;          /* Incremental merger */
   108    348   };
   109    349   
   110    350   /*
   111         -** The following type is an iterator for a PMA. It caches the current key in 
   112         -** variables nKey/aKey. If the iterator is at EOF, pFile==0.
   113         -*/
   114         -struct VdbeSorterIter {
   115         -  i64 iReadOff;                   /* Current read offset */
   116         -  i64 iEof;                       /* 1 byte past EOF for this iterator */
   117         -  int nAlloc;                     /* Bytes of space at aAlloc */
   118         -  int nKey;                       /* Number of bytes in key */
   119         -  sqlite3_file *pFile;            /* File iterator is reading from */
   120         -  u8 *aAlloc;                     /* Allocated space */
   121         -  u8 *aKey;                       /* Pointer to current key */
   122         -  u8 *aBuffer;                    /* Current read buffer */
   123         -  int nBuffer;                    /* Size of read buffer in bytes */
          351  +** Normally, a PmaReader object iterates through an existing PMA stored 
          352  +** within a temp file. However, if the PmaReader.pIncr variable points to
          353  +** an object of the following type, it may be used to iterate/merge through
          354  +** multiple PMAs simultaneously.
          355  +**
          356  +** There are two types of IncrMerger object - single (bUseThread==0) and 
          357  +** multi-threaded (bUseThread==1). 
          358  +**
          359  +** A multi-threaded IncrMerger object uses two temporary files - aFile[0] 
          360  +** and aFile[1]. Neither file is allowed to grow to more than mxSz bytes in 
          361  +** size. When the IncrMerger is initialized, it reads enough data from 
          362  +** pMerger to populate aFile[0]. It then sets variables within the 
          363  +** corresponding PmaReader object to read from that file and kicks off 
          364  +** a background thread to populate aFile[1] with the next mxSz bytes of 
          365  +** sorted record data from pMerger. 
          366  +**
          367  +** When the PmaReader reaches the end of aFile[0], it blocks until the
          368  +** background thread has finished populating aFile[1]. It then exchanges
          369  +** the contents of the aFile[0] and aFile[1] variables within this structure,
          370  +** sets the PmaReader fields to read from the new aFile[0] and kicks off
          371  +** another background thread to populate the new aFile[1]. And so on, until
          372  +** the contents of pMerger are exhausted.
          373  +**
          374  +** A single-threaded IncrMerger does not open any temporary files of its
          375  +** own. Instead, it has exclusive access to mxSz bytes of space beginning
          376  +** at offset iStartOff of file pTask->file2. And instead of using a 
          377  +** background thread to prepare data for the PmaReader, with a single
          378  +** threaded IncrMerger the allocate part of pTask->file2 is "refilled" with
          379  +** keys from pMerger by the calling thread whenever the PmaReader runs out
          380  +** of data.
          381  +*/
          382  +struct IncrMerger {
          383  +  SortSubtask *pTask;             /* Task that owns this merger */
          384  +  MergeEngine *pMerger;           /* Merge engine thread reads data from */
          385  +  i64 iStartOff;                  /* Offset to start writing file at */
          386  +  int mxSz;                       /* Maximum bytes of data to store */
          387  +  int bEof;                       /* Set to true when merge is finished */
          388  +  int bUseThread;                 /* True to use a bg thread for this object */
          389  +  SorterFile aFile[2];            /* aFile[0] for reading, [1] for writing */
   124    390   };
   125    391   
   126    392   /*
   127         -** An instance of this structure is used to organize the stream of records
   128         -** being written to files by the merge-sort code into aligned, page-sized
   129         -** blocks.  Doing all I/O in aligned page-sized blocks helps I/O to go
   130         -** faster on many operating systems.
          393  +** An instance of this object is used for writing a PMA.
          394  +**
          395  +** The PMA is written one record at a time.  Each record is of an arbitrary
          396  +** size.  But I/O is more efficient if it occurs in page-sized blocks where
          397  +** each block is aligned on a page boundary.  This object caches writes to
          398  +** the PMA so that aligned, page-size blocks are written.
   131    399   */
   132         -struct FileWriter {
          400  +struct PmaWriter {
   133    401     int eFWErr;                     /* Non-zero if in an error state */
   134    402     u8 *aBuffer;                    /* Pointer to write buffer */
   135    403     int nBuffer;                    /* Size of write buffer in bytes */
   136    404     int iBufStart;                  /* First byte of buffer to write */
   137    405     int iBufEnd;                    /* Last byte of buffer to write */
   138    406     i64 iWriteOff;                  /* Offset of start of buffer in file */
   139         -  sqlite3_file *pFile;            /* File to write to */
          407  +  sqlite3_file *pFd;              /* File handle to write to */
   140    408   };
   141    409   
   142    410   /*
   143         -** A structure to store a single record. All in-memory records are connected
   144         -** together into a linked list headed at VdbeSorter.pRecord using the 
   145         -** SorterRecord.pNext pointer.
          411  +** This object is the header on a single record while that record is being
          412  +** held in memory and prior to being written out as part of a PMA.
          413  +**
          414  +** How the linked list is connected depends on how memory is being managed
          415  +** by this module. If using a separate allocation for each in-memory record
          416  +** (VdbeSorter.list.aMemory==0), then the list is always connected using the
          417  +** SorterRecord.u.pNext pointers.
          418  +**
          419  +** Or, if using the single large allocation method (VdbeSorter.list.aMemory!=0),
          420  +** then while records are being accumulated the list is linked using the
          421  +** SorterRecord.u.iNext offset. This is because the aMemory[] array may
          422  +** be sqlite3Realloc()ed while records are being accumulated. Once the VM
          423  +** has finished passing records to the sorter, or when the in-memory buffer
          424  +** is full, the list is sorted. As part of the sorting process, it is
          425  +** converted to use the SorterRecord.u.pNext pointers. See function
          426  +** vdbeSorterSort() for details.
   146    427   */
   147    428   struct SorterRecord {
   148         -  void *pVal;
   149         -  int nVal;
   150         -  SorterRecord *pNext;
          429  +  int nVal;                       /* Size of the record in bytes */
          430  +  union {
          431  +    SorterRecord *pNext;          /* Pointer to next record in list */
          432  +    int iNext;                    /* Offset within aMemory of next record */
          433  +  } u;
          434  +  /* The data for the record immediately follows this header */
   151    435   };
   152    436   
   153         -/* Minimum allowable value for the VdbeSorter.nWorking variable */
          437  +/* Return a pointer to the buffer containing the record data for SorterRecord
          438  +** object p. Should be used as if:
          439  +**
          440  +**   void *SRVAL(SorterRecord *p) { return (void*)&p[1]; }
          441  +*/
          442  +#define SRVAL(p) ((void*)((SorterRecord*)(p) + 1))
          443  +
          444  +/* The minimum PMA size is set to this value multiplied by the database
          445  +** page size in bytes.  */
   154    446   #define SORTER_MIN_WORKING 10
   155    447   
   156         -/* Maximum number of segments to merge in a single pass. */
          448  +/* Maximum number of PMAs that a single MergeEngine can merge */
   157    449   #define SORTER_MAX_MERGE_COUNT 16
   158    450   
          451  +static int vdbeIncrSwap(IncrMerger*);
          452  +static void vdbeIncrFree(IncrMerger *);
          453  +
   159    454   /*
   160         -** Free all memory belonging to the VdbeSorterIter object passed as the second
          455  +** Free all memory belonging to the PmaReader object passed as the
   161    456   ** argument. All structure fields are set to zero before returning.
   162    457   */
   163         -static void vdbeSorterIterZero(sqlite3 *db, VdbeSorterIter *pIter){
   164         -  sqlite3DbFree(db, pIter->aAlloc);
   165         -  sqlite3DbFree(db, pIter->aBuffer);
   166         -  memset(pIter, 0, sizeof(VdbeSorterIter));
          458  +static void vdbePmaReaderClear(PmaReader *pReadr){
          459  +  sqlite3_free(pReadr->aAlloc);
          460  +  sqlite3_free(pReadr->aBuffer);
          461  +  if( pReadr->aMap ) sqlite3OsUnfetch(pReadr->pFd, 0, pReadr->aMap);
          462  +  vdbeIncrFree(pReadr->pIncr);
          463  +  memset(pReadr, 0, sizeof(PmaReader));
   167    464   }
   168    465   
   169    466   /*
   170         -** Read nByte bytes of data from the stream of data iterated by object p.
          467  +** Read the next nByte bytes of data from the PMA p.
   171    468   ** If successful, set *ppOut to point to a buffer containing the data
   172    469   ** and return SQLITE_OK. Otherwise, if an error occurs, return an SQLite
   173    470   ** error code.
   174    471   **
   175         -** The buffer indicated by *ppOut may only be considered valid until the
          472  +** The buffer returned in *ppOut is only valid until the
   176    473   ** next call to this function.
   177    474   */
   178         -static int vdbeSorterIterRead(
   179         -  sqlite3 *db,                    /* Database handle (for malloc) */
   180         -  VdbeSorterIter *p,              /* Iterator */
          475  +static int vdbePmaReadBlob(
          476  +  PmaReader *p,                   /* PmaReader from which to take the blob */
   181    477     int nByte,                      /* Bytes of data to read */
   182    478     u8 **ppOut                      /* OUT: Pointer to buffer containing data */
   183    479   ){
   184    480     int iBuf;                       /* Offset within buffer to read from */
   185    481     int nAvail;                     /* Bytes of data available in buffer */
          482  +
          483  +  if( p->aMap ){
          484  +    *ppOut = &p->aMap[p->iReadOff];
          485  +    p->iReadOff += nByte;
          486  +    return SQLITE_OK;
          487  +  }
          488  +
   186    489     assert( p->aBuffer );
   187    490   
   188    491     /* If there is no more data to be read from the buffer, read the next 
   189    492     ** p->nBuffer bytes of data from the file into it. Or, if there are less
   190    493     ** than p->nBuffer bytes remaining in the PMA, read all remaining data.  */
   191    494     iBuf = p->iReadOff % p->nBuffer;
   192    495     if( iBuf==0 ){
................................................................................
   197    500       if( (p->iEof - p->iReadOff) > (i64)p->nBuffer ){
   198    501         nRead = p->nBuffer;
   199    502       }else{
   200    503         nRead = (int)(p->iEof - p->iReadOff);
   201    504       }
   202    505       assert( nRead>0 );
   203    506   
   204         -    /* Read data from the file. Return early if an error occurs. */
   205         -    rc = sqlite3OsRead(p->pFile, p->aBuffer, nRead, p->iReadOff);
          507  +    /* Readr data from the file. Return early if an error occurs. */
          508  +    rc = sqlite3OsRead(p->pFd, p->aBuffer, nRead, p->iReadOff);
   206    509       assert( rc!=SQLITE_IOERR_SHORT_READ );
   207    510       if( rc!=SQLITE_OK ) return rc;
   208    511     }
   209    512     nAvail = p->nBuffer - iBuf; 
   210    513   
   211    514     if( nByte<=nAvail ){
   212    515       /* The requested data is available in the in-memory buffer. In this
................................................................................
   218    521       /* The requested data is not all available in the in-memory buffer.
   219    522       ** In this case, allocate space at p->aAlloc[] to copy the requested
   220    523       ** range into. Then return a copy of pointer p->aAlloc to the caller.  */
   221    524       int nRem;                     /* Bytes remaining to copy */
   222    525   
   223    526       /* Extend the p->aAlloc[] allocation if required. */
   224    527       if( p->nAlloc<nByte ){
   225         -      int nNew = p->nAlloc*2;
          528  +      u8 *aNew;
          529  +      int nNew = MAX(128, p->nAlloc*2);
   226    530         while( nByte>nNew ) nNew = nNew*2;
   227         -      p->aAlloc = sqlite3DbReallocOrFree(db, p->aAlloc, nNew);
   228         -      if( !p->aAlloc ) return SQLITE_NOMEM;
          531  +      aNew = sqlite3Realloc(p->aAlloc, nNew);
          532  +      if( !aNew ) return SQLITE_NOMEM;
   229    533         p->nAlloc = nNew;
          534  +      p->aAlloc = aNew;
   230    535       }
   231    536   
   232    537       /* Copy as much data as is available in the buffer into the start of
   233    538       ** p->aAlloc[].  */
   234    539       memcpy(p->aAlloc, &p->aBuffer[iBuf], nAvail);
   235    540       p->iReadOff += nAvail;
   236    541       nRem = nByte - nAvail;
   237    542   
   238    543       /* The following loop copies up to p->nBuffer bytes per iteration into
   239    544       ** the p->aAlloc[] buffer.  */
   240    545       while( nRem>0 ){
   241         -      int rc;                     /* vdbeSorterIterRead() return code */
          546  +      int rc;                     /* vdbePmaReadBlob() return code */
   242    547         int nCopy;                  /* Number of bytes to copy */
   243    548         u8 *aNext;                  /* Pointer to buffer to copy data from */
   244    549   
   245    550         nCopy = nRem;
   246    551         if( nRem>p->nBuffer ) nCopy = p->nBuffer;
   247         -      rc = vdbeSorterIterRead(db, p, nCopy, &aNext);
          552  +      rc = vdbePmaReadBlob(p, nCopy, &aNext);
   248    553         if( rc!=SQLITE_OK ) return rc;
   249    554         assert( aNext!=p->aAlloc );
   250    555         memcpy(&p->aAlloc[nByte - nRem], aNext, nCopy);
   251    556         nRem -= nCopy;
   252    557       }
   253    558   
   254    559       *ppOut = p->aAlloc;
................................................................................
   257    562     return SQLITE_OK;
   258    563   }
   259    564   
   260    565   /*
   261    566   ** Read a varint from the stream of data accessed by p. Set *pnOut to
   262    567   ** the value read.
   263    568   */
   264         -static int vdbeSorterIterVarint(sqlite3 *db, VdbeSorterIter *p, u64 *pnOut){
          569  +static int vdbePmaReadVarint(PmaReader *p, u64 *pnOut){
   265    570     int iBuf;
   266    571   
   267         -  iBuf = p->iReadOff % p->nBuffer;
   268         -  if( iBuf && (p->nBuffer-iBuf)>=9 ){
   269         -    p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut);
          572  +  if( p->aMap ){
          573  +    p->iReadOff += sqlite3GetVarint(&p->aMap[p->iReadOff], pnOut);
   270    574     }else{
   271         -    u8 aVarint[16], *a;
   272         -    int i = 0, rc;
   273         -    do{
   274         -      rc = vdbeSorterIterRead(db, p, 1, &a);
   275         -      if( rc ) return rc;
   276         -      aVarint[(i++)&0xf] = a[0];
   277         -    }while( (a[0]&0x80)!=0 );
   278         -    sqlite3GetVarint(aVarint, pnOut);
          575  +    iBuf = p->iReadOff % p->nBuffer;
          576  +    if( iBuf && (p->nBuffer-iBuf)>=9 ){
          577  +      p->iReadOff += sqlite3GetVarint(&p->aBuffer[iBuf], pnOut);
          578  +    }else{
          579  +      u8 aVarint[16], *a;
          580  +      int i = 0, rc;
          581  +      do{
          582  +        rc = vdbePmaReadBlob(p, 1, &a);
          583  +        if( rc ) return rc;
          584  +        aVarint[(i++)&0xf] = a[0];
          585  +      }while( (a[0]&0x80)!=0 );
          586  +      sqlite3GetVarint(aVarint, pnOut);
          587  +    }
   279    588     }
   280    589   
   281    590     return SQLITE_OK;
   282    591   }
   283    592   
   284         -
   285         -/*
   286         -** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
   287         -** no error occurs, or an SQLite error code if one does.
   288         -*/
   289         -static int vdbeSorterIterNext(
   290         -  sqlite3 *db,                    /* Database handle (for sqlite3DbMalloc() ) */
   291         -  VdbeSorterIter *pIter           /* Iterator to advance */
   292         -){
   293         -  int rc;                         /* Return Code */
   294         -  u64 nRec = 0;                   /* Size of record in bytes */
   295         -
   296         -  if( pIter->iReadOff>=pIter->iEof ){
   297         -    /* This is an EOF condition */
   298         -    vdbeSorterIterZero(db, pIter);
   299         -    return SQLITE_OK;
   300         -  }
   301         -
   302         -  rc = vdbeSorterIterVarint(db, pIter, &nRec);
   303         -  if( rc==SQLITE_OK ){
   304         -    pIter->nKey = (int)nRec;
   305         -    rc = vdbeSorterIterRead(db, pIter, (int)nRec, &pIter->aKey);
   306         -  }
   307         -
   308         -  return rc;
   309         -}
   310         -
   311         -/*
   312         -** Initialize iterator pIter to scan through the PMA stored in file pFile
   313         -** starting at offset iStart and ending at offset iEof-1. This function 
   314         -** leaves the iterator pointing to the first key in the PMA (or EOF if the 
   315         -** PMA is empty).
   316         -*/
   317         -static int vdbeSorterIterInit(
   318         -  sqlite3 *db,                    /* Database handle */
   319         -  const VdbeSorter *pSorter,      /* Sorter object */
   320         -  i64 iStart,                     /* Start offset in pFile */
   321         -  VdbeSorterIter *pIter,          /* Iterator to populate */
   322         -  i64 *pnByte                     /* IN/OUT: Increment this value by PMA size */
          593  +/*
          594  +** Attempt to memory map file pFile. If successful, set *pp to point to the
          595  +** new mapping and return SQLITE_OK. If the mapping is not attempted 
          596  +** (because the file is too large or the VFS layer is configured not to use
          597  +** mmap), return SQLITE_OK and set *pp to NULL.
          598  +**
          599  +** Or, if an error occurs, return an SQLite error code. The final value of
          600  +** *pp is undefined in this case.
          601  +*/
          602  +static int vdbeSorterMapFile(SortSubtask *pTask, SorterFile *pFile, u8 **pp){
          603  +  int rc = SQLITE_OK;
          604  +  if( pFile->iEof<=(i64)(pTask->pSorter->db->nMaxSorterMmap) ){
          605  +    rc = sqlite3OsFetch(pFile->pFd, 0, (int)pFile->iEof, (void**)pp);
          606  +    testcase( rc!=SQLITE_OK );
          607  +  }
          608  +  return rc;
          609  +}
          610  +
          611  +/*
          612  +** Attach PmaReader pReadr to file pFile (if it is not already attached to
          613  +** that file) and seek it to offset iOff within the file.  Return SQLITE_OK 
          614  +** if successful, or an SQLite error code if an error occurs.
          615  +*/
          616  +static int vdbePmaReaderSeek(
          617  +  SortSubtask *pTask,             /* Task context */
          618  +  PmaReader *pReadr,              /* Reader whose cursor is to be moved */
          619  +  SorterFile *pFile,              /* Sorter file to read from */
          620  +  i64 iOff                        /* Offset in pFile */
   323    621   ){
   324    622     int rc = SQLITE_OK;
   325         -  int nBuf;
   326         -
   327         -  nBuf = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
   328         -
   329         -  assert( pSorter->iWriteOff>iStart );
   330         -  assert( pIter->aAlloc==0 );
   331         -  assert( pIter->aBuffer==0 );
   332         -  pIter->pFile = pSorter->pTemp1;
   333         -  pIter->iReadOff = iStart;
   334         -  pIter->nAlloc = 128;
   335         -  pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
   336         -  pIter->nBuffer = nBuf;
   337         -  pIter->aBuffer = (u8 *)sqlite3DbMallocRaw(db, nBuf);
   338         -
   339         -  if( !pIter->aBuffer ){
   340         -    rc = SQLITE_NOMEM;
   341         -  }else{
   342         -    int iBuf;
   343         -
   344         -    iBuf = iStart % nBuf;
   345         -    if( iBuf ){
   346         -      int nRead = nBuf - iBuf;
   347         -      if( (iStart + nRead) > pSorter->iWriteOff ){
   348         -        nRead = (int)(pSorter->iWriteOff - iStart);
          623  +
          624  +  assert( pReadr->pIncr==0 || pReadr->pIncr->bEof==0 );
          625  +
          626  +  if( sqlite3FaultSim(201) ) return SQLITE_IOERR_READ;
          627  +  if( pReadr->aMap ){
          628  +    sqlite3OsUnfetch(pReadr->pFd, 0, pReadr->aMap);
          629  +    pReadr->aMap = 0;
          630  +  }
          631  +  pReadr->iReadOff = iOff;
          632  +  pReadr->iEof = pFile->iEof;
          633  +  pReadr->pFd = pFile->pFd;
          634  +
          635  +  rc = vdbeSorterMapFile(pTask, pFile, &pReadr->aMap);
          636  +  if( rc==SQLITE_OK && pReadr->aMap==0 ){
          637  +    int pgsz = pTask->pSorter->pgsz;
          638  +    int iBuf = pReadr->iReadOff % pgsz;
          639  +    if( pReadr->aBuffer==0 ){
          640  +      pReadr->aBuffer = (u8*)sqlite3Malloc(pgsz);
          641  +      if( pReadr->aBuffer==0 ) rc = SQLITE_NOMEM;
          642  +      pReadr->nBuffer = pgsz;
          643  +    }
          644  +    if( rc==SQLITE_OK && iBuf ){
          645  +      int nRead = pgsz - iBuf;
          646  +      if( (pReadr->iReadOff + nRead) > pReadr->iEof ){
          647  +        nRead = (int)(pReadr->iEof - pReadr->iReadOff);
   349    648         }
   350    649         rc = sqlite3OsRead(
   351         -          pSorter->pTemp1, &pIter->aBuffer[iBuf], nRead, iStart
          650  +          pReadr->pFd, &pReadr->aBuffer[iBuf], nRead, pReadr->iReadOff
   352    651         );
   353         -    }
   354         -
   355         -    if( rc==SQLITE_OK ){
   356         -      u64 nByte;                       /* Size of PMA in bytes */
   357         -      pIter->iEof = pSorter->iWriteOff;
   358         -      rc = vdbeSorterIterVarint(db, pIter, &nByte);
   359         -      pIter->iEof = pIter->iReadOff + nByte;
   360         -      *pnByte += nByte;
   361         -    }
          652  +      testcase( rc!=SQLITE_OK );
          653  +    }
          654  +  }
          655  +
          656  +  return rc;
          657  +}
          658  +
          659  +/*
          660  +** Advance PmaReader pReadr to the next key in its PMA. Return SQLITE_OK if
          661  +** no error occurs, or an SQLite error code if one does.
          662  +*/
          663  +static int vdbePmaReaderNext(PmaReader *pReadr){
          664  +  int rc = SQLITE_OK;             /* Return Code */
          665  +  u64 nRec = 0;                   /* Size of record in bytes */
          666  +
          667  +
          668  +  if( pReadr->iReadOff>=pReadr->iEof ){
          669  +    IncrMerger *pIncr = pReadr->pIncr;
          670  +    int bEof = 1;
          671  +    if( pIncr ){
          672  +      rc = vdbeIncrSwap(pIncr);
          673  +      if( rc==SQLITE_OK && pIncr->bEof==0 ){
          674  +        rc = vdbePmaReaderSeek(
          675  +            pIncr->pTask, pReadr, &pIncr->aFile[0], pIncr->iStartOff
          676  +        );
          677  +        bEof = 0;
          678  +      }
          679  +    }
          680  +
          681  +    if( bEof ){
          682  +      /* This is an EOF condition */
          683  +      vdbePmaReaderClear(pReadr);
          684  +      testcase( rc!=SQLITE_OK );
          685  +      return rc;
          686  +    }
          687  +  }
          688  +
          689  +  if( rc==SQLITE_OK ){
          690  +    rc = vdbePmaReadVarint(pReadr, &nRec);
          691  +  }
          692  +  if( rc==SQLITE_OK ){
          693  +    pReadr->nKey = (int)nRec;
          694  +    rc = vdbePmaReadBlob(pReadr, (int)nRec, &pReadr->aKey);
          695  +    testcase( rc!=SQLITE_OK );
          696  +  }
          697  +
          698  +  return rc;
          699  +}
          700  +
          701  +/*
          702  +** Initialize PmaReader pReadr to scan through the PMA stored in file pFile
          703  +** starting at offset iStart and ending at offset iEof-1. This function 
          704  +** leaves the PmaReader pointing to the first key in the PMA (or EOF if the 
          705  +** PMA is empty).
          706  +**
          707  +** If the pnByte parameter is NULL, then it is assumed that the file 
          708  +** contains a single PMA, and that that PMA omits the initial length varint.
          709  +*/
          710  +static int vdbePmaReaderInit(
          711  +  SortSubtask *pTask,             /* Task context */
          712  +  SorterFile *pFile,              /* Sorter file to read from */
          713  +  i64 iStart,                     /* Start offset in pFile */
          714  +  PmaReader *pReadr,              /* PmaReader to populate */
          715  +  i64 *pnByte                     /* IN/OUT: Increment this value by PMA size */
          716  +){
          717  +  int rc;
          718  +
          719  +  assert( pFile->iEof>iStart );
          720  +  assert( pReadr->aAlloc==0 && pReadr->nAlloc==0 );
          721  +  assert( pReadr->aBuffer==0 );
          722  +  assert( pReadr->aMap==0 );
          723  +
          724  +  rc = vdbePmaReaderSeek(pTask, pReadr, pFile, iStart);
          725  +  if( rc==SQLITE_OK ){
          726  +    u64 nByte;                    /* Size of PMA in bytes */
          727  +    rc = vdbePmaReadVarint(pReadr, &nByte);
          728  +    pReadr->iEof = pReadr->iReadOff + nByte;
          729  +    *pnByte += nByte;
   362    730     }
   363    731   
   364    732     if( rc==SQLITE_OK ){
   365         -    rc = vdbeSorterIterNext(db, pIter);
          733  +    rc = vdbePmaReaderNext(pReadr);
   366    734     }
   367    735     return rc;
   368    736   }
   369    737   
   370    738   
   371    739   /*
   372    740   ** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, 
   373         -** size nKey2 bytes).  Argument pKeyInfo supplies the collation functions
   374         -** used by the comparison. If an error occurs, return an SQLite error code.
   375         -** Otherwise, return SQLITE_OK and set *pRes to a negative, zero or positive
   376         -** value, depending on whether key1 is smaller, equal to or larger than key2.
          741  +** size nKey2 bytes). Use (pTask->pKeyInfo) for the collation sequences
          742  +** used by the comparison. Return the result of the comparison.
   377    743   **
   378         -** If the bOmitRowid argument is non-zero, assume both keys end in a rowid
   379         -** field. For the purposes of the comparison, ignore it. Also, if bOmitRowid
   380         -** is true and key1 contains even a single NULL value, it is considered to
   381         -** be less than key2. Even if key2 also contains NULL values.
          744  +** Before returning, object (pTask->pUnpacked) is populated with the
          745  +** unpacked version of key2. Or, if pKey2 is passed a NULL pointer, then it 
          746  +** is assumed that the (pTask->pUnpacked) structure already contains the 
          747  +** unpacked key to use as key2.
   382    748   **
   383         -** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
   384         -** has been allocated and contains an unpacked record that is used as key2.
          749  +** If an OOM error is encountered, (pTask->pUnpacked->error_rc) is set
          750  +** to SQLITE_NOMEM.
   385    751   */
   386         -static void vdbeSorterCompare(
   387         -  const VdbeCursor *pCsr,         /* Cursor object (for pKeyInfo) */
   388         -  int nKeyCol,                    /* Num of columns. 0 means "all" */
          752  +static int vdbeSorterCompare(
          753  +  SortSubtask *pTask,             /* Subtask context (for pKeyInfo) */
   389    754     const void *pKey1, int nKey1,   /* Left side of comparison */
   390         -  const void *pKey2, int nKey2,   /* Right side of comparison */
   391         -  int *pRes                       /* OUT: Result of comparison */
          755  +  const void *pKey2, int nKey2    /* Right side of comparison */
   392    756   ){
   393         -  KeyInfo *pKeyInfo = pCsr->pKeyInfo;
   394         -  VdbeSorter *pSorter = pCsr->pSorter;
   395         -  UnpackedRecord *r2 = pSorter->pUnpacked;
   396         -  int i;
   397         -
          757  +  UnpackedRecord *r2 = pTask->pUnpacked;
   398    758     if( pKey2 ){
   399         -    sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, r2);
          759  +    sqlite3VdbeRecordUnpack(pTask->pSorter->pKeyInfo, nKey2, pKey2, r2);
   400    760     }
   401         -
   402         -  if( nKeyCol ){
   403         -    r2->nField = nKeyCol;
   404         -    for(i=0; i<nKeyCol; i++){
   405         -      if( r2->aMem[i].flags & MEM_Null ){
   406         -        *pRes = -1;
   407         -        return;
   408         -      }
   409         -    }
   410         -    assert( r2->default_rc==0 );
   411         -  }
   412         -
   413         -  *pRes = sqlite3VdbeRecordCompare(nKey1, pKey1, r2, 0);
   414         -}
   415         -
   416         -/*
   417         -** This function is called to compare two iterator keys when merging 
   418         -** multiple b-tree segments. Parameter iOut is the index of the aTree[] 
   419         -** value to recalculate.
   420         -*/
   421         -static int vdbeSorterDoCompare(const VdbeCursor *pCsr, int iOut){
   422         -  VdbeSorter *pSorter = pCsr->pSorter;
   423         -  int i1;
   424         -  int i2;
   425         -  int iRes;
   426         -  VdbeSorterIter *p1;
   427         -  VdbeSorterIter *p2;
   428         -
   429         -  assert( iOut<pSorter->nTree && iOut>0 );
   430         -
   431         -  if( iOut>=(pSorter->nTree/2) ){
   432         -    i1 = (iOut - pSorter->nTree/2) * 2;
   433         -    i2 = i1 + 1;
   434         -  }else{
   435         -    i1 = pSorter->aTree[iOut*2];
   436         -    i2 = pSorter->aTree[iOut*2+1];
   437         -  }
   438         -
   439         -  p1 = &pSorter->aIter[i1];
   440         -  p2 = &pSorter->aIter[i2];
   441         -
   442         -  if( p1->pFile==0 ){
   443         -    iRes = i2;
   444         -  }else if( p2->pFile==0 ){
   445         -    iRes = i1;
   446         -  }else{
   447         -    int res;
   448         -    assert( pCsr->pSorter->pUnpacked!=0 );  /* allocated in vdbeSorterMerge() */
   449         -    vdbeSorterCompare(
   450         -        pCsr, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res
   451         -    );
   452         -    if( res<=0 ){
   453         -      iRes = i1;
   454         -    }else{
   455         -      iRes = i2;
   456         -    }
   457         -  }
   458         -
   459         -  pSorter->aTree[iOut] = iRes;
   460         -  return SQLITE_OK;
          761  +  return sqlite3VdbeRecordCompare(nKey1, pKey1, r2, 0);
   461    762   }
   462    763   
   463    764   /*
   464    765   ** Initialize the temporary index cursor just opened as a sorter cursor.
          766  +**
          767  +** Usually, the sorter module uses the value of (pCsr->pKeyInfo->nField)
          768  +** to determine the number of fields that should be compared from the
          769  +** records being sorted. However, if the value passed as argument nField
          770  +** is non-zero and the sorter is able to guarantee a stable sort, nField
          771  +** is used instead. This is used when sorting records for a CREATE INDEX
          772  +** statement. In this case, keys are always delivered to the sorter in
          773  +** order of the primary key, which happens to be make up the final part 
          774  +** of the records being sorted. So if the sort is stable, there is never
          775  +** any reason to compare PK fields and they can be ignored for a small
          776  +** performance boost.
          777  +**
          778  +** The sorter can guarantee a stable sort when running in single-threaded
          779  +** mode, but not in multi-threaded mode.
          780  +**
          781  +** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
   465    782   */
   466         -int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
          783  +int sqlite3VdbeSorterInit(
          784  +  sqlite3 *db,                    /* Database connection (for malloc()) */
          785  +  int nField,                     /* Number of key fields in each record */
          786  +  VdbeCursor *pCsr                /* Cursor that holds the new sorter */
          787  +){
   467    788     int pgsz;                       /* Page size of main database */
          789  +  int i;                          /* Used to iterate through aTask[] */
   468    790     int mxCache;                    /* Cache size */
   469    791     VdbeSorter *pSorter;            /* The new sorter */
   470         -  char *d;                        /* Dummy */
          792  +  KeyInfo *pKeyInfo;              /* Copy of pCsr->pKeyInfo with db==0 */
          793  +  int szKeyInfo;                  /* Size of pCsr->pKeyInfo in bytes */
          794  +  int sz;                         /* Size of pSorter in bytes */
          795  +  int rc = SQLITE_OK;
          796  +#if SQLITE_MAX_WORKER_THREADS==0
          797  +# define nWorker 0
          798  +#else
          799  +  int nWorker;
          800  +#endif
          801  +
          802  +  /* Initialize the upper limit on the number of worker threads */
          803  +#if SQLITE_MAX_WORKER_THREADS>0
          804  +  if( sqlite3TempInMemory(db) || sqlite3GlobalConfig.bCoreMutex==0 ){
          805  +    nWorker = 0;
          806  +  }else{
          807  +    nWorker = db->aLimit[SQLITE_LIMIT_WORKER_THREADS];
          808  +  }
          809  +#endif
          810  +
          811  +  /* Do not allow the total number of threads (main thread + all workers)
          812  +  ** to exceed the maximum merge count */
          813  +#if SQLITE_MAX_WORKER_THREADS>=SORTER_MAX_MERGE_COUNT
          814  +  if( nWorker>=SORTER_MAX_MERGE_COUNT ){
          815  +    nWorker = SORTER_MAX_MERGE_COUNT-1;
          816  +  }
          817  +#endif
   471    818   
   472    819     assert( pCsr->pKeyInfo && pCsr->pBt==0 );
   473         -  pCsr->pSorter = pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
          820  +  szKeyInfo = sizeof(KeyInfo) + (pCsr->pKeyInfo->nField-1)*sizeof(CollSeq*);
          821  +  sz = sizeof(VdbeSorter) + nWorker * sizeof(SortSubtask);
          822  +
          823  +  pSorter = (VdbeSorter*)sqlite3DbMallocZero(db, sz + szKeyInfo);
          824  +  pCsr->pSorter = pSorter;
   474    825     if( pSorter==0 ){
   475         -    return SQLITE_NOMEM;
   476         -  }
   477         -  
   478         -  pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo, 0, 0, &d);
   479         -  if( pSorter->pUnpacked==0 ) return SQLITE_NOMEM;
   480         -  assert( pSorter->pUnpacked==(UnpackedRecord *)d );
   481         -
   482         -  if( !sqlite3TempInMemory(db) ){
   483         -    pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
   484         -    pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
   485         -    mxCache = db->aDb[0].pSchema->cache_size;
   486         -    if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
   487         -    pSorter->mxPmaSize = mxCache * pgsz;
   488         -  }
   489         -
   490         -  return SQLITE_OK;
   491         -}
          826  +    rc = SQLITE_NOMEM;
          827  +  }else{
          828  +    pSorter->pKeyInfo = pKeyInfo = (KeyInfo*)((u8*)pSorter + sz);
          829  +    memcpy(pKeyInfo, pCsr->pKeyInfo, szKeyInfo);
          830  +    pKeyInfo->db = 0;
          831  +    if( nField && nWorker==0 ) pKeyInfo->nField = nField;
          832  +    pSorter->pgsz = pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
          833  +    pSorter->nTask = nWorker + 1;
          834  +    pSorter->bUseThreads = (pSorter->nTask>1);
          835  +    pSorter->db = db;
          836  +    for(i=0; i<pSorter->nTask; i++){
          837  +      SortSubtask *pTask = &pSorter->aTask[i];
          838  +      pTask->pSorter = pSorter;
          839  +    }
          840  +
          841  +    if( !sqlite3TempInMemory(db) ){
          842  +      pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
          843  +      mxCache = db->aDb[0].pSchema->cache_size;
          844  +      if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
          845  +      pSorter->mxPmaSize = mxCache * pgsz;
          846  +
          847  +      /* If the application has not configure scratch memory using
          848  +      ** SQLITE_CONFIG_SCRATCH then we assume it is OK to do large memory
          849  +      ** allocations.  If scratch memory has been configured, then assume
          850  +      ** large memory allocations should be avoided to prevent heap
          851  +      ** fragmentation.
          852  +      */
          853  +      if( sqlite3GlobalConfig.pScratch==0 ){
          854  +        assert( pSorter->iMemory==0 );
          855  +        pSorter->nMemory = pgsz;
          856  +        pSorter->list.aMemory = (u8*)sqlite3Malloc(pgsz);
          857  +        if( !pSorter->list.aMemory ) rc = SQLITE_NOMEM;
          858  +      }
          859  +    }
          860  +  }
          861  +
          862  +  return rc;
          863  +}
          864  +#undef nWorker   /* Defined at the top of this function */
   492    865   
   493    866   /*
   494    867   ** Free the list of sorted records starting at pRecord.
   495    868   */
   496    869   static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){
   497    870     SorterRecord *p;
   498    871     SorterRecord *pNext;
   499    872     for(p=pRecord; p; p=pNext){
   500         -    pNext = p->pNext;
          873  +    pNext = p->u.pNext;
   501    874       sqlite3DbFree(db, p);
   502    875     }
   503    876   }
          877  +
          878  +/*
          879  +** Free all resources owned by the object indicated by argument pTask. All 
          880  +** fields of *pTask are zeroed before returning.
          881  +*/
          882  +static void vdbeSortSubtaskCleanup(sqlite3 *db, SortSubtask *pTask){
          883  +  sqlite3DbFree(db, pTask->pUnpacked);
          884  +  pTask->pUnpacked = 0;
          885  +#if SQLITE_MAX_WORKER_THREADS>0
          886  +  /* pTask->list.aMemory can only be non-zero if it was handed memory
          887  +  ** from the main thread.  That only occurs SQLITE_MAX_WORKER_THREADS>0 */
          888  +  if( pTask->list.aMemory ){
          889  +    sqlite3_free(pTask->list.aMemory);
          890  +    pTask->list.aMemory = 0;
          891  +  }else
          892  +#endif
          893  +  {
          894  +    assert( pTask->list.aMemory==0 );
          895  +    vdbeSorterRecordFree(0, pTask->list.pList);
          896  +  }
          897  +  pTask->list.pList = 0;
          898  +  if( pTask->file.pFd ){
          899  +    sqlite3OsCloseFree(pTask->file.pFd);
          900  +    pTask->file.pFd = 0;
          901  +    pTask->file.iEof = 0;
          902  +  }
          903  +  if( pTask->file2.pFd ){
          904  +    sqlite3OsCloseFree(pTask->file2.pFd);
          905  +    pTask->file2.pFd = 0;
          906  +    pTask->file2.iEof = 0;
          907  +  }
          908  +}
          909  +
          910  +#ifdef SQLITE_DEBUG_SORTER_THREADS
          911  +static void vdbeSorterWorkDebug(SortSubtask *pTask, const char *zEvent){
          912  +  i64 t;
          913  +  int iTask = (pTask - pTask->pSorter->aTask);
          914  +  sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
          915  +  fprintf(stderr, "%lld:%d %s\n", t, iTask, zEvent);
          916  +}
          917  +static void vdbeSorterRewindDebug(const char *zEvent){
          918  +  i64 t;
          919  +  sqlite3OsCurrentTimeInt64(sqlite3_vfs_find(0), &t);
          920  +  fprintf(stderr, "%lld:X %s\n", t, zEvent);
          921  +}
          922  +static void vdbeSorterPopulateDebug(
          923  +  SortSubtask *pTask,
          924  +  const char *zEvent
          925  +){
          926  +  i64 t;
          927  +  int iTask = (pTask - pTask->pSorter->aTask);
          928  +  sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
          929  +  fprintf(stderr, "%lld:bg%d %s\n", t, iTask, zEvent);
          930  +}
          931  +static void vdbeSorterBlockDebug(
          932  +  SortSubtask *pTask,
          933  +  int bBlocked,
          934  +  const char *zEvent
          935  +){
          936  +  if( bBlocked ){
          937  +    i64 t;
          938  +    sqlite3OsCurrentTimeInt64(pTask->pSorter->db->pVfs, &t);
          939  +    fprintf(stderr, "%lld:main %s\n", t, zEvent);
          940  +  }
          941  +}
          942  +#else
          943  +# define vdbeSorterWorkDebug(x,y)
          944  +# define vdbeSorterRewindDebug(y)
          945  +# define vdbeSorterPopulateDebug(x,y)
          946  +# define vdbeSorterBlockDebug(x,y,z)
          947  +#endif
          948  +
          949  +#if SQLITE_MAX_WORKER_THREADS>0
          950  +/*
          951  +** Join thread pTask->thread.
          952  +*/
          953  +static int vdbeSorterJoinThread(SortSubtask *pTask){
          954  +  int rc = SQLITE_OK;
          955  +  if( pTask->pThread ){
          956  +#ifdef SQLITE_DEBUG_SORTER_THREADS
          957  +    int bDone = pTask->bDone;
          958  +#endif
          959  +    void *pRet = SQLITE_INT_TO_PTR(SQLITE_ERROR);
          960  +    vdbeSorterBlockDebug(pTask, !bDone, "enter");
          961  +    (void)sqlite3ThreadJoin(pTask->pThread, &pRet);
          962  +    vdbeSorterBlockDebug(pTask, !bDone, "exit");
          963  +    rc = SQLITE_PTR_TO_INT(pRet);
          964  +    assert( pTask->bDone==1 );
          965  +    pTask->bDone = 0;
          966  +    pTask->pThread = 0;
          967  +  }
          968  +  return rc;
          969  +}
          970  +
          971  +/*
          972  +** Launch a background thread to run xTask(pIn).
          973  +*/
          974  +static int vdbeSorterCreateThread(
          975  +  SortSubtask *pTask,             /* Thread will use this task object */
          976  +  void *(*xTask)(void*),          /* Routine to run in a separate thread */
          977  +  void *pIn                       /* Argument passed into xTask() */
          978  +){
          979  +  assert( pTask->pThread==0 && pTask->bDone==0 );
          980  +  return sqlite3ThreadCreate(&pTask->pThread, xTask, pIn);
          981  +}
          982  +
          983  +/*
          984  +** Join all outstanding threads launched by SorterWrite() to create 
          985  +** level-0 PMAs.
          986  +*/
          987  +static int vdbeSorterJoinAll(VdbeSorter *pSorter, int rcin){
          988  +  int rc = rcin;
          989  +  int i;
          990  +
          991  +  /* This function is always called by the main user thread.
          992  +  **
          993  +  ** If this function is being called after SorterRewind() has been called, 
          994  +  ** it is possible that thread pSorter->aTask[pSorter->nTask-1].pThread
          995  +  ** is currently attempt to join one of the other threads. To avoid a race
          996  +  ** condition where this thread also attempts to join the same object, join 
          997  +  ** thread pSorter->aTask[pSorter->nTask-1].pThread first. */
          998  +  for(i=pSorter->nTask-1; i>=0; i--){
          999  +    SortSubtask *pTask = &pSorter->aTask[i];
         1000  +    int rc2 = vdbeSorterJoinThread(pTask);
         1001  +    if( rc==SQLITE_OK ) rc = rc2;
         1002  +  }
         1003  +  return rc;
         1004  +}
         1005  +#else
         1006  +# define vdbeSorterJoinAll(x,rcin) (rcin)
         1007  +# define vdbeSorterJoinThread(pTask) SQLITE_OK
         1008  +#endif
         1009  +
         1010  +/*
         1011  +** Allocate a new MergeEngine object capable of handling up to
         1012  +** nReader PmaReader inputs.
         1013  +**
         1014  +** nReader is automatically rounded up to the next power of two.
         1015  +** nReader may not exceed SORTER_MAX_MERGE_COUNT even after rounding up.
         1016  +*/
         1017  +static MergeEngine *vdbeMergeEngineNew(int nReader){
         1018  +  int N = 2;                      /* Smallest power of two >= nReader */
         1019  +  int nByte;                      /* Total bytes of space to allocate */
         1020  +  MergeEngine *pNew;              /* Pointer to allocated object to return */
         1021  +
         1022  +  assert( nReader<=SORTER_MAX_MERGE_COUNT );
         1023  +
         1024  +  while( N<nReader ) N += N;
         1025  +  nByte = sizeof(MergeEngine) + N * (sizeof(int) + sizeof(PmaReader));
         1026  +
         1027  +  pNew = sqlite3FaultSim(100) ? 0 : (MergeEngine*)sqlite3MallocZero(nByte);
         1028  +  if( pNew ){
         1029  +    pNew->nTree = N;
         1030  +    pNew->pTask = 0;
         1031  +    pNew->aReadr = (PmaReader*)&pNew[1];
         1032  +    pNew->aTree = (int*)&pNew->aReadr[N];
         1033  +  }
         1034  +  return pNew;
         1035  +}
         1036  +
         1037  +/*
         1038  +** Free the MergeEngine object passed as the only argument.
         1039  +*/
         1040  +static void vdbeMergeEngineFree(MergeEngine *pMerger){
         1041  +  int i;
         1042  +  if( pMerger ){
         1043  +    for(i=0; i<pMerger->nTree; i++){
         1044  +      vdbePmaReaderClear(&pMerger->aReadr[i]);
         1045  +    }
         1046  +  }
         1047  +  sqlite3_free(pMerger);
         1048  +}
         1049  +
         1050  +/*
         1051  +** Free all resources associated with the IncrMerger object indicated by
         1052  +** the first argument.
         1053  +*/
         1054  +static void vdbeIncrFree(IncrMerger *pIncr){
         1055  +  if( pIncr ){
         1056  +#if SQLITE_MAX_WORKER_THREADS>0
         1057  +    if( pIncr->bUseThread ){
         1058  +      vdbeSorterJoinThread(pIncr->pTask);
         1059  +      if( pIncr->aFile[0].pFd ) sqlite3OsCloseFree(pIncr->aFile[0].pFd);
         1060  +      if( pIncr->aFile[1].pFd ) sqlite3OsCloseFree(pIncr->aFile[1].pFd);
         1061  +    }
         1062  +#endif
         1063  +    vdbeMergeEngineFree(pIncr->pMerger);
         1064  +    sqlite3_free(pIncr);
         1065  +  }
         1066  +}
   504   1067   
   505   1068   /*
   506   1069   ** Reset a sorting cursor back to its original empty state.
   507   1070   */
   508   1071   void sqlite3VdbeSorterReset(sqlite3 *db, VdbeSorter *pSorter){
   509         -  if( pSorter->aIter ){
   510         -    int i;
   511         -    for(i=0; i<pSorter->nTree; i++){
   512         -      vdbeSorterIterZero(db, &pSorter->aIter[i]);
   513         -    }
   514         -    sqlite3DbFree(db, pSorter->aIter);
   515         -    pSorter->aIter = 0;
   516         -  }
   517         -  if( pSorter->pTemp1 ){
   518         -    sqlite3OsCloseFree(pSorter->pTemp1);
   519         -    pSorter->pTemp1 = 0;
   520         -  }
   521         -  vdbeSorterRecordFree(db, pSorter->pRecord);
   522         -  pSorter->pRecord = 0;
   523         -  pSorter->iWriteOff = 0;
   524         -  pSorter->iReadOff = 0;
   525         -  pSorter->nInMemory = 0;
   526         -  pSorter->nTree = 0;
   527         -  pSorter->nPMA = 0;
   528         -  pSorter->aTree = 0;
   529         -}
   530         -
         1072  +  int i;
         1073  +  (void)vdbeSorterJoinAll(pSorter, SQLITE_OK);
         1074  +  assert( pSorter->bUseThreads || pSorter->pReader==0 );
         1075  +#if SQLITE_MAX_WORKER_THREADS>0
         1076  +  if( pSorter->pReader ){
         1077  +    vdbePmaReaderClear(pSorter->pReader);
         1078  +    sqlite3DbFree(db, pSorter->pReader);
         1079  +    pSorter->pReader = 0;
         1080  +  }
         1081  +#endif
         1082  +  vdbeMergeEngineFree(pSorter->pMerger);
         1083  +  pSorter->pMerger = 0;
         1084  +  for(i=0; i<pSorter->nTask; i++){
         1085  +    SortSubtask *pTask = &pSorter->aTask[i];
         1086  +    vdbeSortSubtaskCleanup(db, pTask);
         1087  +  }
         1088  +  if( pSorter->list.aMemory==0 ){
         1089  +    vdbeSorterRecordFree(0, pSorter->list.pList);
         1090  +  }
         1091  +  pSorter->list.pList = 0;
         1092  +  pSorter->list.szPMA = 0;
         1093  +  pSorter->bUsePMA = 0;
         1094  +  pSorter->iMemory = 0;
         1095  +  pSorter->mxKeysize = 0;
         1096  +  sqlite3DbFree(db, pSorter->pUnpacked);
         1097  +  pSorter->pUnpacked = 0;
         1098  +}
   531   1099   
   532   1100   /*
   533   1101   ** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
   534   1102   */
   535   1103   void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
   536   1104     VdbeSorter *pSorter = pCsr->pSorter;
   537   1105     if( pSorter ){
   538   1106       sqlite3VdbeSorterReset(db, pSorter);
   539         -    sqlite3DbFree(db, pSorter->pUnpacked);
         1107  +    sqlite3_free(pSorter->list.aMemory);
   540   1108       sqlite3DbFree(db, pSorter);
   541   1109       pCsr->pSorter = 0;
   542   1110     }
   543   1111   }
   544   1112   
         1113  +#if SQLITE_MAX_MMAP_SIZE>0
         1114  +/*
         1115  +** The first argument is a file-handle open on a temporary file. The file
         1116  +** is guaranteed to be nByte bytes or smaller in size. This function
         1117  +** attempts to extend the file to nByte bytes in size and to ensure that
         1118  +** the VFS has memory mapped it.
         1119  +**
         1120  +** Whether or not the file does end up memory mapped of course depends on
         1121  +** the specific VFS implementation.
         1122  +*/
         1123  +static void vdbeSorterExtendFile(sqlite3 *db, sqlite3_file *pFd, i64 nByte){
         1124  +  if( nByte<=(i64)(db->nMaxSorterMmap) ){
         1125  +    int rc = sqlite3OsTruncate(pFd, nByte);
         1126  +    if( rc==SQLITE_OK ){
         1127  +      void *p = 0;
         1128  +      sqlite3OsFetch(pFd, 0, (int)nByte, &p);
         1129  +      sqlite3OsUnfetch(pFd, 0, p);
         1130  +    }
         1131  +  }
         1132  +}
         1133  +#else
         1134  +# define vdbeSorterExtendFile(x,y,z)
         1135  +#endif
         1136  +
   545   1137   /*
   546   1138   ** Allocate space for a file-handle and open a temporary file. If successful,
   547         -** set *ppFile to point to the malloc'd file-handle and return SQLITE_OK.
   548         -** Otherwise, set *ppFile to 0 and return an SQLite error code.
         1139  +** set *ppFd to point to the malloc'd file-handle and return SQLITE_OK.
         1140  +** Otherwise, set *ppFd to 0 and return an SQLite error code.
   549   1141   */
   550         -static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){
   551         -  int dummy;
   552         -  return sqlite3OsOpenMalloc(db->pVfs, 0, ppFile,
         1142  +static int vdbeSorterOpenTempFile(
         1143  +  sqlite3 *db,                    /* Database handle doing sort */
         1144  +  i64 nExtend,                    /* Attempt to extend file to this size */
         1145  +  sqlite3_file **ppFd
         1146  +){
         1147  +  int rc;
         1148  +  rc = sqlite3OsOpenMalloc(db->pVfs, 0, ppFd,
   553   1149         SQLITE_OPEN_TEMP_JOURNAL |
   554   1150         SQLITE_OPEN_READWRITE    | SQLITE_OPEN_CREATE |
   555         -      SQLITE_OPEN_EXCLUSIVE    | SQLITE_OPEN_DELETEONCLOSE, &dummy
         1151  +      SQLITE_OPEN_EXCLUSIVE    | SQLITE_OPEN_DELETEONCLOSE, &rc
   556   1152     );
         1153  +  if( rc==SQLITE_OK ){
         1154  +    i64 max = SQLITE_MAX_MMAP_SIZE;
         1155  +    sqlite3OsFileControlHint(*ppFd, SQLITE_FCNTL_MMAP_SIZE, (void*)&max);
         1156  +    if( nExtend>0 ){
         1157  +      vdbeSorterExtendFile(db, *ppFd, nExtend);
         1158  +    }
         1159  +  }
         1160  +  return rc;
   557   1161   }
         1162  +
         1163  +/*
         1164  +** If it has not already been allocated, allocate the UnpackedRecord 
         1165  +** structure at pTask->pUnpacked. Return SQLITE_OK if successful (or 
         1166  +** if no allocation was required), or SQLITE_NOMEM otherwise.
         1167  +*/
         1168  +static int vdbeSortAllocUnpacked(SortSubtask *pTask){
         1169  +  if( pTask->pUnpacked==0 ){
         1170  +    char *pFree;
         1171  +    pTask->pUnpacked = sqlite3VdbeAllocUnpackedRecord(
         1172  +        pTask->pSorter->pKeyInfo, 0, 0, &pFree
         1173  +    );
         1174  +    assert( pTask->pUnpacked==(UnpackedRecord*)pFree );
         1175  +    if( pFree==0 ) return SQLITE_NOMEM;
         1176  +    pTask->pUnpacked->nField = pTask->pSorter->pKeyInfo->nField;
         1177  +    pTask->pUnpacked->errCode = 0;
         1178  +  }
         1179  +  return SQLITE_OK;
         1180  +}
         1181  +
   558   1182   
   559   1183   /*
   560   1184   ** Merge the two sorted lists p1 and p2 into a single list.
   561   1185   ** Set *ppOut to the head of the new list.
   562   1186   */
   563   1187   static void vdbeSorterMerge(
   564         -  const VdbeCursor *pCsr,         /* For pKeyInfo */
         1188  +  SortSubtask *pTask,             /* Calling thread context */
   565   1189     SorterRecord *p1,               /* First list to merge */
   566   1190     SorterRecord *p2,               /* Second list to merge */
   567   1191     SorterRecord **ppOut            /* OUT: Head of merged list */
   568   1192   ){
   569   1193     SorterRecord *pFinal = 0;
   570   1194     SorterRecord **pp = &pFinal;
   571         -  void *pVal2 = p2 ? p2->pVal : 0;
         1195  +  void *pVal2 = p2 ? SRVAL(p2) : 0;
   572   1196   
   573   1197     while( p1 && p2 ){
   574   1198       int res;
   575         -    vdbeSorterCompare(pCsr, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res);
         1199  +    res = vdbeSorterCompare(pTask, SRVAL(p1), p1->nVal, pVal2, p2->nVal);
   576   1200       if( res<=0 ){
   577   1201         *pp = p1;
   578         -      pp = &p1->pNext;
   579         -      p1 = p1->pNext;
         1202  +      pp = &p1->u.pNext;
         1203  +      p1 = p1->u.pNext;
   580   1204         pVal2 = 0;
   581   1205       }else{
   582   1206         *pp = p2;
   583         -       pp = &p2->pNext;
   584         -      p2 = p2->pNext;
         1207  +       pp = &p2->u.pNext;
         1208  +      p2 = p2->u.pNext;
   585   1209         if( p2==0 ) break;
   586         -      pVal2 = p2->pVal;
         1210  +      pVal2 = SRVAL(p2);
   587   1211       }
   588   1212     }
   589   1213     *pp = p1 ? p1 : p2;
   590   1214     *ppOut = pFinal;
   591   1215   }
   592   1216   
   593   1217   /*
   594         -** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK
   595         -** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error
   596         -** occurs.
         1218  +** Sort the linked list of records headed at pTask->pList. Return 
         1219  +** SQLITE_OK if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if 
         1220  +** an error occurs.
   597   1221   */
   598         -static int vdbeSorterSort(const VdbeCursor *pCsr){
         1222  +static int vdbeSorterSort(SortSubtask *pTask, SorterList *pList){
   599   1223     int i;
   600   1224     SorterRecord **aSlot;
   601   1225     SorterRecord *p;
   602         -  VdbeSorter *pSorter = pCsr->pSorter;
         1226  +  int rc;
         1227  +
         1228  +  rc = vdbeSortAllocUnpacked(pTask);
         1229  +  if( rc!=SQLITE_OK ) return rc;
   603   1230   
   604   1231     aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *));
   605   1232     if( !aSlot ){
   606   1233       return SQLITE_NOMEM;
   607   1234     }
   608   1235   
   609         -  p = pSorter->pRecord;
         1236  +  p = pList->pList;
   610   1237     while( p ){
   611         -    SorterRecord *pNext = p->pNext;
   612         -    p->pNext = 0;
         1238  +    SorterRecord *pNext;
         1239  +    if( pList->aMemory ){
         1240  +      if( (u8*)p==pList->aMemory ){
         1241  +        pNext = 0;
         1242  +      }else{
         1243  +        assert( p->u.iNext<sqlite3MallocSize(pList->aMemory) );
         1244  +        pNext = (SorterRecord*)&pList->aMemory[p->u.iNext];
         1245  +      }
         1246  +    }else{
         1247  +      pNext = p->u.pNext;
         1248  +    }
         1249  +
         1250  +    p->u.pNext = 0;
   613   1251       for(i=0; aSlot[i]; i++){
   614         -      vdbeSorterMerge(pCsr, p, aSlot[i], &p);
         1252  +      vdbeSorterMerge(pTask, p, aSlot[i], &p);
   615   1253         aSlot[i] = 0;
   616   1254       }
   617   1255       aSlot[i] = p;
   618   1256       p = pNext;
   619   1257     }
   620   1258   
   621   1259     p = 0;
   622   1260     for(i=0; i<64; i++){
   623         -    vdbeSorterMerge(pCsr, p, aSlot[i], &p);
         1261  +    vdbeSorterMerge(pTask, p, aSlot[i], &p);
   624   1262     }
   625         -  pSorter->pRecord = p;
         1263  +  pList->pList = p;
   626   1264   
   627   1265     sqlite3_free(aSlot);
   628         -  return SQLITE_OK;
         1266  +  assert( pTask->pUnpacked->errCode==SQLITE_OK 
         1267  +       || pTask->pUnpacked->errCode==SQLITE_NOMEM 
         1268  +  );
         1269  +  return pTask->pUnpacked->errCode;
   629   1270   }
   630   1271   
   631   1272   /*
   632         -** Initialize a file-writer object.
         1273  +** Initialize a PMA-writer object.
   633   1274   */
   634         -static void fileWriterInit(
   635         -  sqlite3 *db,                    /* Database (for malloc) */
   636         -  sqlite3_file *pFile,            /* File to write to */
   637         -  FileWriter *p,                  /* Object to populate */
   638         -  i64 iStart                      /* Offset of pFile to begin writing at */
         1275  +static void vdbePmaWriterInit(
         1276  +  sqlite3_file *pFd,              /* File handle to write to */
         1277  +  PmaWriter *p,                   /* Object to populate */
         1278  +  int nBuf,                       /* Buffer size */
         1279  +  i64 iStart                      /* Offset of pFd to begin writing at */
   639   1280   ){
   640         -  int nBuf = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
   641         -
   642         -  memset(p, 0, sizeof(FileWriter));
   643         -  p->aBuffer = (u8 *)sqlite3DbMallocRaw(db, nBuf);
         1281  +  memset(p, 0, sizeof(PmaWriter));
         1282  +  p->aBuffer = (u8*)sqlite3Malloc(nBuf);
   644   1283     if( !p->aBuffer ){
   645   1284       p->eFWErr = SQLITE_NOMEM;
   646   1285     }else{
   647   1286       p->iBufEnd = p->iBufStart = (iStart % nBuf);
   648   1287       p->iWriteOff = iStart - p->iBufStart;
   649   1288       p->nBuffer = nBuf;
   650         -    p->pFile = pFile;
         1289  +    p->pFd = pFd;
   651   1290     }
   652   1291   }
   653   1292   
   654   1293   /*
   655         -** Write nData bytes of data to the file-write object. Return SQLITE_OK
         1294  +** Write nData bytes of data to the PMA. Return SQLITE_OK
   656   1295   ** if successful, or an SQLite error code if an error occurs.
   657   1296   */
   658         -static void fileWriterWrite(FileWriter *p, u8 *pData, int nData){
         1297  +static void vdbePmaWriteBlob(PmaWriter *p, u8 *pData, int nData){
   659   1298     int nRem = nData;
   660   1299     while( nRem>0 && p->eFWErr==0 ){
   661   1300       int nCopy = nRem;
   662   1301       if( nCopy>(p->nBuffer - p->iBufEnd) ){
   663   1302         nCopy = p->nBuffer - p->iBufEnd;
   664   1303       }
   665   1304   
   666   1305       memcpy(&p->aBuffer[p->iBufEnd], &pData[nData-nRem], nCopy);
   667   1306       p->iBufEnd += nCopy;
   668   1307       if( p->iBufEnd==p->nBuffer ){
   669         -      p->eFWErr = sqlite3OsWrite(p->pFile, 
         1308  +      p->eFWErr = sqlite3OsWrite(p->pFd, 
   670   1309             &p->aBuffer[p->iBufStart], p->iBufEnd - p->iBufStart, 
   671   1310             p->iWriteOff + p->iBufStart
   672   1311         );
   673   1312         p->iBufStart = p->iBufEnd = 0;
   674   1313         p->iWriteOff += p->nBuffer;
   675   1314       }
   676   1315       assert( p->iBufEnd<p->nBuffer );
   677   1316   
   678   1317       nRem -= nCopy;
   679   1318     }
   680   1319   }
   681   1320   
   682   1321   /*
   683         -** Flush any buffered data to disk and clean up the file-writer object.
   684         -** The results of using the file-writer after this call are undefined.
         1322  +** Flush any buffered data to disk and clean up the PMA-writer object.
         1323  +** The results of using the PMA-writer after this call are undefined.
   685   1324   ** Return SQLITE_OK if flushing the buffered data succeeds or is not 
   686   1325   ** required. Otherwise, return an SQLite error code.
   687   1326   **
   688   1327   ** Before returning, set *piEof to the offset immediately following the
   689   1328   ** last byte written to the file.
   690   1329   */
   691         -static int fileWriterFinish(sqlite3 *db, FileWriter *p, i64 *piEof){
         1330  +static int vdbePmaWriterFinish(PmaWriter *p, i64 *piEof){
   692   1331     int rc;
   693   1332     if( p->eFWErr==0 && ALWAYS(p->aBuffer) && p->iBufEnd>p->iBufStart ){
   694         -    p->eFWErr = sqlite3OsWrite(p->pFile, 
         1333  +    p->eFWErr = sqlite3OsWrite(p->pFd, 
   695   1334           &p->aBuffer[p->iBufStart], p->iBufEnd - p->iBufStart, 
   696   1335           p->iWriteOff + p->iBufStart
   697   1336       );
   698   1337     }
   699   1338     *piEof = (p->iWriteOff + p->iBufEnd);
   700         -  sqlite3DbFree(db, p->aBuffer);
         1339  +  sqlite3_free(p->aBuffer);
   701   1340     rc = p->eFWErr;
   702         -  memset(p, 0, sizeof(FileWriter));
         1341  +  memset(p, 0, sizeof(PmaWriter));
   703   1342     return rc;
   704   1343   }
   705   1344   
   706   1345   /*
   707         -** Write value iVal encoded as a varint to the file-write object. Return 
         1346  +** Write value iVal encoded as a varint to the PMA. Return 
   708   1347   ** SQLITE_OK if successful, or an SQLite error code if an error occurs.
   709   1348   */
   710         -static void fileWriterWriteVarint(FileWriter *p, u64 iVal){
         1349  +static void vdbePmaWriteVarint(PmaWriter *p, u64 iVal){
   711   1350     int nByte; 
   712   1351     u8 aByte[10];
   713   1352     nByte = sqlite3PutVarint(aByte, iVal);
   714         -  fileWriterWrite(p, aByte, nByte);
         1353  +  vdbePmaWriteBlob(p, aByte, nByte);
   715   1354   }
   716   1355   
   717   1356   /*
   718         -** Write the current contents of the in-memory linked-list to a PMA. Return
   719         -** SQLITE_OK if successful, or an SQLite error code otherwise.
         1357  +** Write the current contents of in-memory linked-list pList to a level-0
         1358  +** PMA in the temp file belonging to sub-task pTask. Return SQLITE_OK if 
         1359  +** successful, or an SQLite error code otherwise.
   720   1360   **
   721   1361   ** The format of a PMA is:
   722   1362   **
   723   1363   **     * A varint. This varint contains the total number of bytes of content
   724   1364   **       in the PMA (not including the varint itself).
   725   1365   **
   726   1366   **     * One or more records packed end-to-end in order of ascending keys. 
   727   1367   **       Each record consists of a varint followed by a blob of data (the 
   728   1368   **       key). The varint is the number of bytes in the blob of data.
   729   1369   */
   730         -static int vdbeSorterListToPMA(sqlite3 *db, const VdbeCursor *pCsr){
         1370  +static int vdbeSorterListToPMA(SortSubtask *pTask, SorterList *pList){
         1371  +  sqlite3 *db = pTask->pSorter->db;
   731   1372     int rc = SQLITE_OK;             /* Return code */
   732         -  VdbeSorter *pSorter = pCsr->pSorter;
   733         -  FileWriter writer;
         1373  +  PmaWriter writer;               /* Object used to write to the file */
   734   1374   
   735         -  memset(&writer, 0, sizeof(FileWriter));
         1375  +#ifdef SQLITE_DEBUG
         1376  +  /* Set iSz to the expected size of file pTask->file after writing the PMA. 
         1377  +  ** This is used by an assert() statement at the end of this function.  */
         1378  +  i64 iSz = pList->szPMA + sqlite3VarintLen(pList->szPMA) + pTask->file.iEof;
         1379  +#endif
   736   1380   
   737         -  if( pSorter->nInMemory==0 ){
   738         -    assert( pSorter->pRecord==0 );
   739         -    return rc;
   740         -  }
   741         -
   742         -  rc = vdbeSorterSort(pCsr);
         1381  +  vdbeSorterWorkDebug(pTask, "enter");
         1382  +  memset(&writer, 0, sizeof(PmaWriter));
         1383  +  assert( pList->szPMA>0 );
   743   1384   
   744   1385     /* If the first temporary PMA file has not been opened, open it now. */
   745         -  if( rc==SQLITE_OK && pSorter->pTemp1==0 ){
   746         -    rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
   747         -    assert( rc!=SQLITE_OK || pSorter->pTemp1 );
   748         -    assert( pSorter->iWriteOff==0 );
   749         -    assert( pSorter->nPMA==0 );
         1386  +  if( pTask->file.pFd==0 ){
         1387  +    rc = vdbeSorterOpenTempFile(db, 0, &pTask->file.pFd);
         1388  +    assert( rc!=SQLITE_OK || pTask->file.pFd );
         1389  +    assert( pTask->file.iEof==0 );
         1390  +    assert( pTask->nPMA==0 );
         1391  +  }
         1392  +
         1393  +  /* Try to get the file to memory map */
         1394  +  if( rc==SQLITE_OK ){
         1395  +    vdbeSorterExtendFile(db, pTask->file.pFd, pTask->file.iEof+pList->szPMA+9);
         1396  +  }
         1397  +
         1398  +  /* Sort the list */
         1399  +  if( rc==SQLITE_OK ){
         1400  +    rc = vdbeSorterSort(pTask, pList);
   750   1401     }
   751   1402   
   752   1403     if( rc==SQLITE_OK ){
   753   1404       SorterRecord *p;
   754   1405       SorterRecord *pNext = 0;
   755   1406   
   756         -    fileWriterInit(db, pSorter->pTemp1, &writer, pSorter->iWriteOff);
   757         -    pSorter->nPMA++;
   758         -    fileWriterWriteVarint(&writer, pSorter->nInMemory);
   759         -    for(p=pSorter->pRecord; p; p=pNext){
   760         -      pNext = p->pNext;
   761         -      fileWriterWriteVarint(&writer, p->nVal);
   762         -      fileWriterWrite(&writer, p->pVal, p->nVal);
   763         -      sqlite3DbFree(db, p);
   764         -    }
   765         -    pSorter->pRecord = p;
   766         -    rc = fileWriterFinish(db, &writer, &pSorter->iWriteOff);
         1407  +    vdbePmaWriterInit(pTask->file.pFd, &writer, pTask->pSorter->pgsz,
         1408  +                      pTask->file.iEof);
         1409  +    pTask->nPMA++;
         1410  +    vdbePmaWriteVarint(&writer, pList->szPMA);
         1411  +    for(p=pList->pList; p; p=pNext){
         1412  +      pNext = p->u.pNext;
         1413  +      vdbePmaWriteVarint(&writer, p->nVal);
         1414  +      vdbePmaWriteBlob(&writer, SRVAL(p), p->nVal);
         1415  +      if( pList->aMemory==0 ) sqlite3_free(p);
         1416  +    }
         1417  +    pList->pList = p;
         1418  +    rc = vdbePmaWriterFinish(&writer, &pTask->file.iEof);
         1419  +  }
         1420  +
         1421  +  vdbeSorterWorkDebug(pTask, "exit");
         1422  +  assert( rc!=SQLITE_OK || pList->pList==0 );
         1423  +  assert( rc!=SQLITE_OK || pTask->file.iEof==iSz );
         1424  +  return rc;
         1425  +}
         1426  +
         1427  +/*
         1428  +** Advance the MergeEngine to its next entry.
         1429  +** Set *pbEof to true there is no next entry because
         1430  +** the MergeEngine has reached the end of all its inputs.
         1431  +**
         1432  +** Return SQLITE_OK if successful or an error code if an error occurs.
         1433  +*/
         1434  +static int vdbeMergeEngineStep(
         1435  +  MergeEngine *pMerger,      /* The merge engine to advance to the next row */
         1436  +  int *pbEof                 /* Set TRUE at EOF.  Set false for more content */
         1437  +){
         1438  +  int rc;
         1439  +  int iPrev = pMerger->aTree[1];/* Index of PmaReader to advance */
         1440  +  SortSubtask *pTask = pMerger->pTask;
         1441  +
         1442  +  /* Advance the current PmaReader */
         1443  +  rc = vdbePmaReaderNext(&pMerger->aReadr[iPrev]);
         1444  +
         1445  +  /* Update contents of aTree[] */
         1446  +  if( rc==SQLITE_OK ){
         1447  +    int i;                      /* Index of aTree[] to recalculate */
         1448  +    PmaReader *pReadr1;         /* First PmaReader to compare */
         1449  +    PmaReader *pReadr2;         /* Second PmaReader to compare */
         1450  +    u8 *pKey2;                  /* To pReadr2->aKey, or 0 if record cached */
         1451  +
         1452  +    /* Find the first two PmaReaders to compare. The one that was just
         1453  +    ** advanced (iPrev) and the one next to it in the array.  */
         1454  +    pReadr1 = &pMerger->aReadr[(iPrev & 0xFFFE)];
         1455  +    pReadr2 = &pMerger->aReadr[(iPrev | 0x0001)];
         1456  +    pKey2 = pReadr2->aKey;
         1457  +
         1458  +    for(i=(pMerger->nTree+iPrev)/2; i>0; i=i/2){
         1459  +      /* Compare pReadr1 and pReadr2. Store the result in variable iRes. */
         1460  +      int iRes;
         1461  +      if( pReadr1->pFd==0 ){
         1462  +        iRes = +1;
         1463  +      }else if( pReadr2->pFd==0 ){
         1464  +        iRes = -1;
         1465  +      }else{
         1466  +        iRes = vdbeSorterCompare(pTask, 
         1467  +            pReadr1->aKey, pReadr1->nKey, pKey2, pReadr2->nKey
         1468  +        );
         1469  +      }
         1470  +
         1471  +      /* If pReadr1 contained the smaller value, set aTree[i] to its index.
         1472  +      ** Then set pReadr2 to the next PmaReader to compare to pReadr1. In this
         1473  +      ** case there is no cache of pReadr2 in pTask->pUnpacked, so set
         1474  +      ** pKey2 to point to the record belonging to pReadr2.
         1475  +      **
         1476  +      ** Alternatively, if pReadr2 contains the smaller of the two values,
         1477  +      ** set aTree[i] to its index and update pReadr1. If vdbeSorterCompare()
         1478  +      ** was actually called above, then pTask->pUnpacked now contains
         1479  +      ** a value equivalent to pReadr2. So set pKey2 to NULL to prevent
         1480  +      ** vdbeSorterCompare() from decoding pReadr2 again.
         1481  +      **
         1482  +      ** If the two values were equal, then the value from the oldest
         1483  +      ** PMA should be considered smaller. The VdbeSorter.aReadr[] array
         1484  +      ** is sorted from oldest to newest, so pReadr1 contains older values
         1485  +      ** than pReadr2 iff (pReadr1<pReadr2).  */
         1486  +      if( iRes<0 || (iRes==0 && pReadr1<pReadr2) ){
         1487  +        pMerger->aTree[i] = (int)(pReadr1 - pMerger->aReadr);
         1488  +        pReadr2 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ];
         1489  +        pKey2 = pReadr2->aKey;
         1490  +      }else{
         1491  +        if( pReadr1->pFd ) pKey2 = 0;
         1492  +        pMerger->aTree[i] = (int)(pReadr2 - pMerger->aReadr);
         1493  +        pReadr1 = &pMerger->aReadr[ pMerger->aTree[i ^ 0x0001] ];
         1494  +      }
         1495  +    }
         1496  +    *pbEof = (pMerger->aReadr[pMerger->aTree[1]].pFd==0);
         1497  +  }
         1498  +
         1499  +  return (rc==SQLITE_OK ? pTask->pUnpacked->errCode : rc);
         1500  +}
         1501  +
         1502  +#if SQLITE_MAX_WORKER_THREADS>0
         1503  +/*
         1504  +** The main routine for background threads that write level-0 PMAs.
         1505  +*/
         1506  +static void *vdbeSorterFlushThread(void *pCtx){
         1507  +  SortSubtask *pTask = (SortSubtask*)pCtx;
         1508  +  int rc;                         /* Return code */
         1509  +  assert( pTask->bDone==0 );
         1510  +  rc = vdbeSorterListToPMA(pTask, &pTask->list);
         1511  +  pTask->bDone = 1;
         1512  +  return SQLITE_INT_TO_PTR(rc);
         1513  +}
         1514  +#endif /* SQLITE_MAX_WORKER_THREADS>0 */
         1515  +
         1516  +/*
         1517  +** Flush the current contents of VdbeSorter.list to a new PMA, possibly
         1518  +** using a background thread.
         1519  +*/
         1520  +static int vdbeSorterFlushPMA(VdbeSorter *pSorter){
         1521  +#if SQLITE_MAX_WORKER_THREADS==0
         1522  +  pSorter->bUsePMA = 1;
         1523  +  return vdbeSorterListToPMA(&pSorter->aTask[0], &pSorter->list);
         1524  +#else
         1525  +  int rc = SQLITE_OK;
         1526  +  int i;
         1527  +  SortSubtask *pTask = 0;    /* Thread context used to create new PMA */
         1528  +  int nWorker = (pSorter->nTask-1);
         1529  +
         1530  +  /* Set the flag to indicate that at least one PMA has been written. 
         1531  +  ** Or will be, anyhow.  */
         1532  +  pSorter->bUsePMA = 1;
         1533  +
         1534  +  /* Select a sub-task to sort and flush the current list of in-memory
         1535  +  ** records to disk. If the sorter is running in multi-threaded mode,
         1536  +  ** round-robin between the first (pSorter->nTask-1) tasks. Except, if
         1537  +  ** the background thread from a sub-tasks previous turn is still running,
         1538  +  ** skip it. If the first (pSorter->nTask-1) sub-tasks are all still busy,
         1539  +  ** fall back to using the final sub-task. The first (pSorter->nTask-1)
         1540  +  ** sub-tasks are prefered as they use background threads - the final 
         1541  +  ** sub-task uses the main thread. */
         1542  +  for(i=0; i<nWorker; i++){
         1543  +    int iTest = (pSorter->iPrev + i + 1) % nWorker;
         1544  +    pTask = &pSorter->aTask[iTest];
         1545  +    if( pTask->bDone ){
         1546  +      rc = vdbeSorterJoinThread(pTask);
         1547  +    }
         1548  +    if( rc!=SQLITE_OK || pTask->pThread==0 ) break;
         1549  +  }
         1550  +
         1551  +  if( rc==SQLITE_OK ){
         1552  +    if( i==nWorker ){
         1553  +      /* Use the foreground thread for this operation */
         1554  +      rc = vdbeSorterListToPMA(&pSorter->aTask[nWorker], &pSorter->list);
         1555  +    }else{
         1556  +      /* Launch a background thread for this operation */
         1557  +      u8 *aMem = pTask->list.aMemory;
         1558  +      void *pCtx = (void*)pTask;
         1559  +
         1560  +      assert( pTask->pThread==0 && pTask->bDone==0 );
         1561  +      assert( pTask->list.pList==0 );
         1562  +      assert( pTask->list.aMemory==0 || pSorter->list.aMemory!=0 );
         1563  +
         1564  +      pSorter->iPrev = (u8)(pTask - pSorter->aTask);
         1565  +      pTask->list = pSorter->list;
         1566  +      pSorter->list.pList = 0;
         1567  +      pSorter->list.szPMA = 0;
         1568  +      if( aMem ){
         1569  +        pSorter->list.aMemory = aMem;
         1570  +        pSorter->nMemory = sqlite3MallocSize(aMem);
         1571  +      }else if( pSorter->list.aMemory ){
         1572  +        pSorter->list.aMemory = sqlite3Malloc(pSorter->nMemory);
         1573  +        if( !pSorter->list.aMemory ) return SQLITE_NOMEM;
         1574  +      }
         1575  +
         1576  +      rc = vdbeSorterCreateThread(pTask, vdbeSorterFlushThread, pCtx);
         1577  +    }
   767   1578     }
   768   1579   
   769   1580     return rc;
         1581  +#endif /* SQLITE_MAX_WORKER_THREADS!=0 */
   770   1582   }
   771   1583   
   772   1584   /*
   773   1585   ** Add a record to the sorter.
   774   1586   */
   775   1587   int sqlite3VdbeSorterWrite(
   776         -  sqlite3 *db,                    /* Database handle */
   777         -  const VdbeCursor *pCsr,               /* Sorter cursor */
         1588  +  const VdbeCursor *pCsr,         /* Sorter cursor */
   778   1589     Mem *pVal                       /* Memory cell containing record */
   779   1590   ){
   780   1591     VdbeSorter *pSorter = pCsr->pSorter;
   781   1592     int rc = SQLITE_OK;             /* Return Code */
   782   1593     SorterRecord *pNew;             /* New list element */
   783   1594   
         1595  +  int bFlush;                     /* True to flush contents of memory to PMA */
         1596  +  int nReq;                       /* Bytes of memory required */
         1597  +  int nPMA;                       /* Bytes of PMA space required */
         1598  +
   784   1599     assert( pSorter );
   785         -  pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n;
   786   1600   
   787         -  pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord));
   788         -  if( pNew==0 ){
   789         -    rc = SQLITE_NOMEM;
   790         -  }else{
   791         -    pNew->pVal = (void *)&pNew[1];
   792         -    memcpy(pNew->pVal, pVal->z, pVal->n);
   793         -    pNew->nVal = pVal->n;
   794         -    pNew->pNext = pSorter->pRecord;
   795         -    pSorter->pRecord = pNew;
   796         -  }
   797         -
   798         -  /* See if the contents of the sorter should now be written out. They
   799         -  ** are written out when either of the following are true:
         1601  +  /* Figure out whether or not the current contents of memory should be
         1602  +  ** flushed to a PMA before continuing. If so, do so.
         1603  +  **
         1604  +  ** If using the single large allocation mode (pSorter->aMemory!=0), then
         1605  +  ** flush the contents of memory to a new PMA if (a) at least one value is
         1606  +  ** already in memory and (b) the new value will not fit in memory.
         1607  +  ** 
         1608  +  ** Or, if using separate allocations for each record, flush the contents
         1609  +  ** of memory to a PMA if either of the following are true:
   800   1610     **
   801   1611     **   * The total memory allocated for the in-memory list is greater 
   802   1612     **     than (page-size * cache-size), or
   803   1613     **
   804   1614     **   * The total memory allocated for the in-memory list is greater 
   805   1615     **     than (page-size * 10) and sqlite3HeapNearlyFull() returns true.
   806   1616     */
   807         -  if( rc==SQLITE_OK && pSorter->mxPmaSize>0 && (
   808         -        (pSorter->nInMemory>pSorter->mxPmaSize)
   809         -     || (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull())
   810         -  )){
   811         -#ifdef SQLITE_DEBUG
   812         -    i64 nExpect = pSorter->iWriteOff
   813         -                + sqlite3VarintLen(pSorter->nInMemory)
   814         -                + pSorter->nInMemory;
         1617  +  nReq = pVal->n + sizeof(SorterRecord);
         1618  +  nPMA = pVal->n + sqlite3VarintLen(pVal->n);
         1619  +  if( pSorter->mxPmaSize ){
         1620  +    if( pSorter->list.aMemory ){
         1621  +      bFlush = pSorter->iMemory && (pSorter->iMemory+nReq) > pSorter->mxPmaSize;
         1622  +    }else{
         1623  +      bFlush = (
         1624  +          (pSorter->list.szPMA > pSorter->mxPmaSize)
         1625  +       || (pSorter->list.szPMA > pSorter->mnPmaSize && sqlite3HeapNearlyFull())
         1626  +      );
         1627  +    }
         1628  +    if( bFlush ){
         1629  +      rc = vdbeSorterFlushPMA(pSorter);
         1630  +      pSorter->list.szPMA = 0;
         1631  +      pSorter->iMemory = 0;
         1632  +      assert( rc!=SQLITE_OK || pSorter->list.pList==0 );
         1633  +    }
         1634  +  }
         1635  +
         1636  +  pSorter->list.szPMA += nPMA;
         1637  +  if( nPMA>pSorter->mxKeysize ){
         1638  +    pSorter->mxKeysize = nPMA;
         1639  +  }
         1640  +
         1641  +  if( pSorter->list.aMemory ){
         1642  +    int nMin = pSorter->iMemory + nReq;
         1643  +
         1644  +    if( nMin>pSorter->nMemory ){
         1645  +      u8 *aNew;
         1646  +      int nNew = pSorter->nMemory * 2;
         1647  +      while( nNew < nMin ) nNew = nNew*2;
         1648  +      if( nNew > pSorter->mxPmaSize ) nNew = pSorter->mxPmaSize;
         1649  +      if( nNew < nMin ) nNew = nMin;
         1650  +
         1651  +      aNew = sqlite3Realloc(pSorter->list.aMemory, nNew);
         1652  +      if( !aNew ) return SQLITE_NOMEM;
         1653  +      pSorter->list.pList = (SorterRecord*)(
         1654  +          aNew + ((u8*)pSorter->list.pList - pSorter->list.aMemory)
         1655  +      );
         1656  +      pSorter->list.aMemory = aNew;
         1657  +      pSorter->nMemory = nNew;
         1658  +    }
         1659  +
         1660  +    pNew = (SorterRecord*)&pSorter->list.aMemory[pSorter->iMemory];
         1661  +    pSorter->iMemory += ROUND8(nReq);
         1662  +    pNew->u.iNext = (int)((u8*)(pSorter->list.pList) - pSorter->list.aMemory);
         1663  +  }else{
         1664  +    pNew = (SorterRecord *)sqlite3Malloc(nReq);
         1665  +    if( pNew==0 ){
         1666  +      return SQLITE_NOMEM;
         1667  +    }
         1668  +    pNew->u.pNext = pSorter->list.pList;
         1669  +  }
         1670  +
         1671  +  memcpy(SRVAL(pNew), pVal->z, pVal->n);
         1672  +  pNew->nVal = pVal->n;
         1673  +  pSorter->list.pList = pNew;
         1674  +
         1675  +  return rc;
         1676  +}
         1677  +
         1678  +/*
         1679  +** Read keys from pIncr->pMerger and populate pIncr->aFile[1]. The format
         1680  +** of the data stored in aFile[1] is the same as that used by regular PMAs,
         1681  +** except that the number-of-bytes varint is omitted from the start.
         1682  +*/
         1683  +static int vdbeIncrPopulate(IncrMerger *pIncr){
         1684  +  int rc = SQLITE_OK;
         1685  +  int rc2;
         1686  +  i64 iStart = pIncr->iStartOff;
         1687  +  SorterFile *pOut = &pIncr->aFile[1];
         1688  +  SortSubtask *pTask = pIncr->pTask;
         1689  +  MergeEngine *pMerger = pIncr->pMerger;
         1690  +  PmaWriter writer;
         1691  +  assert( pIncr->bEof==0 );
         1692  +
         1693  +  vdbeSorterPopulateDebug(pTask, "enter");
         1694  +
         1695  +  vdbePmaWriterInit(pOut->pFd, &writer, pTask->pSorter->pgsz, iStart);
         1696  +  while( rc==SQLITE_OK ){
         1697  +    int dummy;
         1698  +    PmaReader *pReader = &pMerger->aReadr[ pMerger->aTree[1] ];
         1699  +    int nKey = pReader->nKey;
         1700  +    i64 iEof = writer.iWriteOff + writer.iBufEnd;
         1701  +
         1702  +    /* Check if the output file is full or if the input has been exhausted.
         1703  +    ** In either case exit the loop. */
         1704  +    if( pReader->pFd==0 ) break;
         1705  +    if( (iEof + nKey + sqlite3VarintLen(nKey))>(iStart + pIncr->mxSz) ) break;
         1706  +
         1707  +    /* Write the next key to the output. */
         1708  +    vdbePmaWriteVarint(&writer, nKey);
         1709  +    vdbePmaWriteBlob(&writer, pReader->aKey, nKey);
         1710  +    assert( pIncr->pMerger->pTask==pTask );
         1711  +    rc = vdbeMergeEngineStep(pIncr->pMerger, &dummy);
         1712  +  }
         1713  +
         1714  +  rc2 = vdbePmaWriterFinish(&writer, &pOut->iEof);
         1715  +  if( rc==SQLITE_OK ) rc = rc2;
         1716  +  vdbeSorterPopulateDebug(pTask, "exit");
         1717  +  return rc;
         1718  +}
         1719  +
         1720  +#if SQLITE_MAX_WORKER_THREADS>0
         1721  +/*
         1722  +** The main routine for background threads that populate aFile[1] of
         1723  +** multi-threaded IncrMerger objects.
         1724  +*/
         1725  +static void *vdbeIncrPopulateThread(void *pCtx){
         1726  +  IncrMerger *pIncr = (IncrMerger*)pCtx;
         1727  +  void *pRet = SQLITE_INT_TO_PTR( vdbeIncrPopulate(pIncr) );
         1728  +  pIncr->pTask->bDone = 1;
         1729  +  return pRet;
         1730  +}
         1731  +
         1732  +/*
         1733  +** Launch a background thread to populate aFile[1] of pIncr.
         1734  +*/
         1735  +static int vdbeIncrBgPopulate(IncrMerger *pIncr){
         1736  +  void *p = (void*)pIncr;
         1737  +  assert( pIncr->bUseThread );
         1738  +  return vdbeSorterCreateThread(pIncr->pTask, vdbeIncrPopulateThread, p);
         1739  +}
         1740  +#endif
         1741  +
         1742  +/*
         1743  +** This function is called when the PmaReader corresponding to pIncr has
         1744  +** finished reading the contents of aFile[0]. Its purpose is to "refill"
         1745  +** aFile[0] such that the PmaReader should start rereading it from the
         1746  +** beginning.
         1747  +**
         1748  +** For single-threaded objects, this is accomplished by literally reading 
         1749  +** keys from pIncr->pMerger and repopulating aFile[0]. 
         1750  +**
         1751  +** For multi-threaded objects, all that is required is to wait until the 
         1752  +** background thread is finished (if it is not already) and then swap 
         1753  +** aFile[0] and aFile[1] in place. If the contents of pMerger have not
         1754  +** been exhausted, this function also launches a new background thread
         1755  +** to populate the new aFile[1].
         1756  +**
         1757  +** SQLITE_OK is returned on success, or an SQLite error code otherwise.
         1758  +*/
         1759  +static int vdbeIncrSwap(IncrMerger *pIncr){
         1760  +  int rc = SQLITE_OK;
         1761  +
         1762  +#if SQLITE_MAX_WORKER_THREADS>0
         1763  +  if( pIncr->bUseThread ){
         1764  +    rc = vdbeSorterJoinThread(pIncr->pTask);
         1765  +
         1766  +    if( rc==SQLITE_OK ){
         1767  +      SorterFile f0 = pIncr->aFile[0];
         1768  +      pIncr->aFile[0] = pIncr->aFile[1];
         1769  +      pIncr->aFile[1] = f0;
         1770  +    }
         1771  +
         1772  +    if( rc==SQLITE_OK ){
         1773  +      if( pIncr->aFile[0].iEof==pIncr->iStartOff ){
         1774  +        pIncr->bEof = 1;
         1775  +      }else{
         1776  +        rc = vdbeIncrBgPopulate(pIncr);
         1777  +      }
         1778  +    }
         1779  +  }else
         1780  +#endif
         1781  +  {
         1782  +    rc = vdbeIncrPopulate(pIncr);
         1783  +    pIncr->aFile[0] = pIncr->aFile[1];
         1784  +    if( pIncr->aFile[0].iEof==pIncr->iStartOff ){
         1785  +      pIncr->bEof = 1;
         1786  +    }
         1787  +  }
         1788  +
         1789  +  return rc;
         1790  +}
         1791  +
         1792  +/*
         1793  +** Allocate and return a new IncrMerger object to read data from pMerger.
         1794  +**
         1795  +** If an OOM condition is encountered, return NULL. In this case free the
         1796  +** pMerger argument before returning.
         1797  +*/
         1798  +static int vdbeIncrMergerNew(
         1799  +  SortSubtask *pTask,     /* The thread that will be using the new IncrMerger */
         1800  +  MergeEngine *pMerger,   /* The MergeEngine that the IncrMerger will control */
         1801  +  IncrMerger **ppOut      /* Write the new IncrMerger here */
         1802  +){
         1803  +  int rc = SQLITE_OK;
         1804  +  IncrMerger *pIncr = *ppOut = (IncrMerger*)
         1805  +       (sqlite3FaultSim(100) ? 0 : sqlite3MallocZero(sizeof(*pIncr)));
         1806  +  if( pIncr ){
         1807  +    pIncr->pMerger = pMerger;
         1808  +    pIncr->pTask = pTask;
         1809  +    pIncr->mxSz = MAX(pTask->pSorter->mxKeysize+9,pTask->pSorter->mxPmaSize/2);
         1810  +    pTask->file2.iEof += pIncr->mxSz;
         1811  +  }else{
         1812  +    vdbeMergeEngineFree(pMerger);
         1813  +    rc = SQLITE_NOMEM;
         1814  +  }
         1815  +  return rc;
         1816  +}
         1817  +
         1818  +#if SQLITE_MAX_WORKER_THREADS>0
         1819  +/*
         1820  +** Set the "use-threads" flag on object pIncr.
         1821  +*/
         1822  +static void vdbeIncrMergerSetThreads(IncrMerger *pIncr){
         1823  +  pIncr->bUseThread = 1;
         1824  +  pIncr->pTask->file2.iEof -= pIncr->mxSz;
         1825  +}
         1826  +#endif /* SQLITE_MAX_WORKER_THREADS>0 */
         1827  +
         1828  +
         1829  +
         1830  +/*
         1831  +** Recompute pMerger->aTree[iOut] by comparing the next keys on the
         1832  +** two PmaReaders that feed that entry.  Neither of the PmaReaders
         1833  +** are advanced.  This routine merely does the comparison.
         1834  +*/
         1835  +static void vdbeMergeEngineCompare(
         1836  +  MergeEngine *pMerger,  /* Merge engine containing PmaReaders to compare */
         1837  +  int iOut               /* Store the result in pMerger->aTree[iOut] */
         1838  +){
         1839  +  int i1;
         1840  +  int i2;
         1841  +  int iRes;
         1842  +  PmaReader *p1;
         1843  +  PmaReader *p2;
         1844  +
         1845  +  assert( iOut<pMerger->nTree && iOut>0 );
         1846  +
         1847  +  if( iOut>=(pMerger->nTree/2) ){
         1848  +    i1 = (iOut - pMerger->nTree/2) * 2;
         1849  +    i2 = i1 + 1;
         1850  +  }else{
         1851  +    i1 = pMerger->aTree[iOut*2];
         1852  +    i2 = pMerger->aTree[iOut*2+1];
         1853  +  }
         1854  +
         1855  +  p1 = &pMerger->aReadr[i1];
         1856  +  p2 = &pMerger->aReadr[i2];
         1857  +
         1858  +  if( p1->pFd==0 ){
         1859  +    iRes = i2;
         1860  +  }else if( p2->pFd==0 ){
         1861  +    iRes = i1;
         1862  +  }else{
         1863  +    int res;
         1864  +    assert( pMerger->pTask->pUnpacked!=0 );  /* from vdbeSortSubtaskMain() */
         1865  +    res = vdbeSorterCompare(
         1866  +        pMerger->pTask, p1->aKey, p1->nKey, p2->aKey, p2->nKey
         1867  +    );
         1868  +    if( res<=0 ){
         1869  +      iRes = i1;
         1870  +    }else{
         1871  +      iRes = i2;
         1872  +    }
         1873  +  }
         1874  +
         1875  +  pMerger->aTree[iOut] = iRes;
         1876  +}
         1877  +
         1878  +/*
         1879  +** Allowed values for the eMode parameter to vdbeMergeEngineInit()
         1880  +** and vdbePmaReaderIncrMergeInit().
         1881  +**
         1882  +** Only INCRINIT_NORMAL is valid in single-threaded builds (when
         1883  +** SQLITE_MAX_WORKER_THREADS==0).  The other values are only used
         1884  +** when there exists one or more separate worker threads.
         1885  +*/
         1886  +#define INCRINIT_NORMAL 0
         1887  +#define INCRINIT_TASK   1
         1888  +#define INCRINIT_ROOT   2
         1889  +
         1890  +/* Forward reference.
         1891  +** The vdbeIncrMergeInit() and vdbePmaReaderIncrMergeInit() routines call each
         1892  +** other (when building a merge tree).
         1893  +*/
         1894  +static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode);
         1895  +
         1896  +/*
         1897  +** Initialize the MergeEngine object passed as the second argument. Once this
         1898  +** function returns, the first key of merged data may be read from the 
         1899  +** MergeEngine object in the usual fashion.
         1900  +**
         1901  +** If argument eMode is INCRINIT_ROOT, then it is assumed that any IncrMerge
         1902  +** objects attached to the PmaReader objects that the merger reads from have
         1903  +** already been populated, but that they have not yet populated aFile[0] and
         1904  +** set the PmaReader objects up to read from it. In this case all that is
         1905  +** required is to call vdbePmaReaderNext() on each PmaReader to point it at
         1906  +** its first key.
         1907  +**
         1908  +** Otherwise, if eMode is any value other than INCRINIT_ROOT, then use 
         1909  +** vdbePmaReaderIncrMergeInit() to initialize each PmaReader that feeds data 
         1910  +** to pMerger.
         1911  +**
         1912  +** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
         1913  +*/
         1914  +static int vdbeMergeEngineInit(
         1915  +  SortSubtask *pTask,             /* Thread that will run pMerger */
         1916  +  MergeEngine *pMerger,           /* MergeEngine to initialize */
         1917  +  int eMode                       /* One of the INCRINIT_XXX constants */
         1918  +){
         1919  +  int rc = SQLITE_OK;             /* Return code */
         1920  +  int i;                          /* For looping over PmaReader objects */
         1921  +  int nTree = pMerger->nTree;
         1922  +
         1923  +  /* eMode is always INCRINIT_NORMAL in single-threaded mode */
         1924  +  assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL );
         1925  +
         1926  +  /* Verify that the MergeEngine is assigned to a single thread */
         1927  +  assert( pMerger->pTask==0 );
         1928  +  pMerger->pTask = pTask;
         1929  +
         1930  +  for(i=0; i<nTree; i++){
         1931  +    if( SQLITE_MAX_WORKER_THREADS>0 && eMode==INCRINIT_ROOT ){
         1932  +      /* PmaReaders should be normally initialized in order, as if they are
         1933  +      ** reading from the same temp file this makes for more linear file IO.
         1934  +      ** However, in the INCRINIT_ROOT case, if PmaReader aReadr[nTask-1] is
         1935  +      ** in use it will block the vdbePmaReaderNext() call while it uses
         1936  +      ** the main thread to fill its buffer. So calling PmaReaderNext()
         1937  +      ** on this PmaReader before any of the multi-threaded PmaReaders takes
         1938  +      ** better advantage of multi-processor hardware. */
         1939  +      rc = vdbePmaReaderNext(&pMerger->aReadr[nTree-i-1]);
         1940  +    }else{
         1941  +      rc = vdbePmaReaderIncrMergeInit(&pMerger->aReadr[i], INCRINIT_NORMAL);
         1942  +    }
         1943  +    if( rc!=SQLITE_OK ) return rc;
         1944  +  }
         1945  +
         1946  +  for(i=pMerger->nTree-1; i>0; i--){
         1947  +    vdbeMergeEngineCompare(pMerger, i);
         1948  +  }
         1949  +  return pTask->pUnpacked->errCode;
         1950  +}
         1951  +
         1952  +/*
         1953  +** Initialize the IncrMerge field of a PmaReader.
         1954  +**
         1955  +** If the PmaReader passed as the first argument is not an incremental-reader
         1956  +** (if pReadr->pIncr==0), then this function is a no-op. Otherwise, it serves
         1957  +** to open and/or initialize the temp file related fields of the IncrMerge
         1958  +** object at (pReadr->pIncr).
         1959  +**
         1960  +** If argument eMode is set to INCRINIT_NORMAL, then all PmaReaders
         1961  +** in the sub-tree headed by pReadr are also initialized. Data is then loaded
         1962  +** into the buffers belonging to pReadr and it is set to
         1963  +** point to the first key in its range.
         1964  +**
         1965  +** If argument eMode is set to INCRINIT_TASK, then pReadr is guaranteed
         1966  +** to be a multi-threaded PmaReader and this function is being called in a
         1967  +** background thread. In this case all PmaReaders in the sub-tree are 
         1968  +** initialized as for INCRINIT_NORMAL and the aFile[1] buffer belonging to
         1969  +** pReadr is populated. However, pReadr itself is not set up to point
         1970  +** to its first key. A call to vdbePmaReaderNext() is still required to do
         1971  +** that. 
         1972  +**
         1973  +** The reason this function does not call vdbePmaReaderNext() immediately 
         1974  +** in the INCRINIT_TASK case is that vdbePmaReaderNext() assumes that it has
         1975  +** to block on thread (pTask->thread) before accessing aFile[1]. But, since
         1976  +** this entire function is being run by thread (pTask->thread), that will
         1977  +** lead to the current background thread attempting to join itself.
         1978  +**
         1979  +** Finally, if argument eMode is set to INCRINIT_ROOT, it may be assumed
         1980  +** that pReadr->pIncr is a multi-threaded IncrMerge objects, and that all
         1981  +** child-trees have already been initialized using IncrInit(INCRINIT_TASK).
         1982  +** In this case vdbePmaReaderNext() is called on all child PmaReaders and
         1983  +** the current PmaReader set to point to the first key in its range.
         1984  +**
         1985  +** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
         1986  +*/
         1987  +static int vdbePmaReaderIncrMergeInit(PmaReader *pReadr, int eMode){
         1988  +  int rc = SQLITE_OK;
         1989  +  IncrMerger *pIncr = pReadr->pIncr;
         1990  +
         1991  +  /* eMode is always INCRINIT_NORMAL in single-threaded mode */
         1992  +  assert( SQLITE_MAX_WORKER_THREADS>0 || eMode==INCRINIT_NORMAL );
         1993  +
         1994  +  if( pIncr ){
         1995  +    SortSubtask *pTask = pIncr->pTask;
         1996  +    sqlite3 *db = pTask->pSorter->db;
         1997  +
         1998  +    rc = vdbeMergeEngineInit(pTask, pIncr->pMerger, eMode);
         1999  +
         2000  +    /* Set up the required files for pIncr. A multi-theaded IncrMerge object
         2001  +    ** requires two temp files to itself, whereas a single-threaded object
         2002  +    ** only requires a region of pTask->file2. */
         2003  +    if( rc==SQLITE_OK ){
         2004  +      int mxSz = pIncr->mxSz;
         2005  +#if SQLITE_MAX_WORKER_THREADS>0
         2006  +      if( pIncr->bUseThread ){
         2007  +        rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[0].pFd);
         2008  +        if( rc==SQLITE_OK ){
         2009  +          rc = vdbeSorterOpenTempFile(db, mxSz, &pIncr->aFile[1].pFd);
         2010  +        }
         2011  +      }else
         2012  +#endif
         2013  +      /*if( !pIncr->bUseThread )*/{
         2014  +        if( pTask->file2.pFd==0 ){
         2015  +          assert( pTask->file2.iEof>0 );
         2016  +          rc = vdbeSorterOpenTempFile(db, pTask->file2.iEof, &pTask->file2.pFd);
         2017  +          pTask->file2.iEof = 0;
         2018  +        }
         2019  +        if( rc==SQLITE_OK ){
         2020  +          pIncr->aFile[1].pFd = pTask->file2.pFd;
         2021  +          pIncr->iStartOff = pTask->file2.iEof;
         2022  +          pTask->file2.iEof += mxSz;
         2023  +        }
         2024  +      }
         2025  +    }
         2026  +
         2027  +#if SQLITE_MAX_WORKER_THREADS>0
         2028  +    if( rc==SQLITE_OK && pIncr->bUseThread ){
         2029  +      /* Use the current thread to populate aFile[1], even though this
         2030  +      ** PmaReader is multi-threaded. The reason being that this function
         2031  +      ** is already running in background thread pIncr->pTask->thread. */
         2032  +      assert( eMode==INCRINIT_ROOT || eMode==INCRINIT_TASK );
         2033  +      rc = vdbeIncrPopulate(pIncr);
         2034  +    }
         2035  +#endif
         2036  +
         2037  +    if( rc==SQLITE_OK
         2038  +     && (SQLITE_MAX_WORKER_THREADS==0 || eMode!=INCRINIT_TASK)
         2039  +    ){
         2040  +      rc = vdbePmaReaderNext(pReadr);
         2041  +    }
         2042  +  }
         2043  +  return rc;
         2044  +}
         2045  +
         2046  +#if SQLITE_MAX_WORKER_THREADS>0
         2047  +/*
         2048  +** The main routine for vdbePmaReaderIncrMergeInit() operations run in 
         2049  +** background threads.
         2050  +*/
         2051  +static void *vdbePmaReaderBgInit(void *pCtx){
         2052  +  PmaReader *pReader = (PmaReader*)pCtx;
         2053  +  void *pRet = SQLITE_INT_TO_PTR(
         2054  +                  vdbePmaReaderIncrMergeInit(pReader,INCRINIT_TASK)
         2055  +               );
         2056  +  pReader->pIncr->pTask->bDone = 1;
         2057  +  return pRet;
         2058  +}
         2059  +
         2060  +/*
         2061  +** Use a background thread to invoke vdbePmaReaderIncrMergeInit(INCRINIT_TASK) 
         2062  +** on the the PmaReader object passed as the first argument.
         2063  +**
         2064  +** This call will initialize the various fields of the pReadr->pIncr 
         2065  +** structure and, if it is a multi-threaded IncrMerger, launch a 
         2066  +** background thread to populate aFile[1].
         2067  +*/
         2068  +static int vdbePmaReaderBgIncrInit(PmaReader *pReadr){
         2069  +  void *pCtx = (void*)pReadr;
         2070  +  return vdbeSorterCreateThread(pReadr->pIncr->pTask, vdbePmaReaderBgInit, pCtx);
         2071  +}
         2072  +#endif
         2073  +
         2074  +/*
         2075  +** Allocate a new MergeEngine object to merge the contents of nPMA level-0
         2076  +** PMAs from pTask->file. If no error occurs, set *ppOut to point to
         2077  +** the new object and return SQLITE_OK. Or, if an error does occur, set *ppOut
         2078  +** to NULL and return an SQLite error code.
         2079  +**
         2080  +** When this function is called, *piOffset is set to the offset of the
         2081  +** first PMA to read from pTask->file. Assuming no error occurs, it is 
         2082  +** set to the offset immediately following the last byte of the last
         2083  +** PMA before returning. If an error does occur, then the final value of
         2084  +** *piOffset is undefined.
         2085  +*/
         2086  +static int vdbeMergeEngineLevel0(
         2087  +  SortSubtask *pTask,             /* Sorter task to read from */
         2088  +  int nPMA,                       /* Number of PMAs to read */
         2089  +  i64 *piOffset,                  /* IN/OUT: Readr offset in pTask->file */
         2090  +  MergeEngine **ppOut             /* OUT: New merge-engine */
         2091  +){
         2092  +  MergeEngine *pNew;              /* Merge engine to return */
         2093  +  i64 iOff = *piOffset;
         2094  +  int i;
         2095  +  int rc = SQLITE_OK;
         2096  +
         2097  +  *ppOut = pNew = vdbeMergeEngineNew(nPMA);
         2098  +  if( pNew==0 ) rc = SQLITE_NOMEM;
         2099  +
         2100  +  for(i=0; i<nPMA && rc==SQLITE_OK; i++){
         2101  +    i64 nDummy;
         2102  +    PmaReader *pReadr = &pNew->aReadr[i];
         2103  +    rc = vdbePmaReaderInit(pTask, &pTask->file, iOff, pReadr, &nDummy);
         2104  +    iOff = pReadr->iEof;
         2105  +  }
         2106  +
         2107  +  if( rc!=SQLITE_OK ){
         2108  +    vdbeMergeEngineFree(pNew);
         2109  +    *ppOut = 0;
         2110  +  }
         2111  +  *piOffset = iOff;
         2112  +  return rc;
         2113  +}
         2114  +
         2115  +/*
         2116  +** Return the depth of a tree comprising nPMA PMAs, assuming a fanout of
         2117  +** SORTER_MAX_MERGE_COUNT. The returned value does not include leaf nodes.
         2118  +**
         2119  +** i.e.
         2120  +**
         2121  +**   nPMA<=16    -> TreeDepth() == 0
         2122  +**   nPMA<=256   -> TreeDepth() == 1
         2123  +**   nPMA<=65536 -> TreeDepth() == 2
         2124  +*/
         2125  +static int vdbeSorterTreeDepth(int nPMA){
         2126  +  int nDepth = 0;
         2127  +  i64 nDiv = SORTER_MAX_MERGE_COUNT;
         2128  +  while( nDiv < (i64)nPMA ){
         2129  +    nDiv = nDiv * SORTER_MAX_MERGE_COUNT;
         2130  +    nDepth++;
         2131  +  }
         2132  +  return nDepth;
         2133  +}
         2134  +
         2135  +/*
         2136  +** pRoot is the root of an incremental merge-tree with depth nDepth (according
         2137  +** to vdbeSorterTreeDepth()). pLeaf is the iSeq'th leaf to be added to the
         2138  +** tree, counting from zero. This function adds pLeaf to the tree.
         2139  +**
         2140  +** If successful, SQLITE_OK is returned. If an error occurs, an SQLite error
         2141  +** code is returned and pLeaf is freed.
         2142  +*/
         2143  +static int vdbeSorterAddToTree(
         2144  +  SortSubtask *pTask,             /* Task context */
         2145  +  int nDepth,                     /* Depth of tree according to TreeDepth() */
         2146  +  int iSeq,                       /* Sequence number of leaf within tree */
         2147  +  MergeEngine *pRoot,             /* Root of tree */
         2148  +  MergeEngine *pLeaf              /* Leaf to add to tree */
         2149  +){
         2150  +  int rc = SQLITE_OK;
         2151  +  int nDiv = 1;
         2152  +  int i;
         2153  +  MergeEngine *p = pRoot;
         2154  +  IncrMerger *pIncr;
         2155  +
         2156  +  rc = vdbeIncrMergerNew(pTask, pLeaf, &pIncr);
         2157  +
         2158  +  for(i=1; i<nDepth; i++){
         2159  +    nDiv = nDiv * SORTER_MAX_MERGE_COUNT;
         2160  +  }
         2161  +
         2162  +  for(i=1; i<nDepth && rc==SQLITE_OK; i++){
         2163  +    int iIter = (iSeq / nDiv) % SORTER_MAX_MERGE_COUNT;
         2164  +    PmaReader *pReadr = &p->aReadr[iIter];
         2165  +
         2166  +    if( pReadr->pIncr==0 ){
         2167  +      MergeEngine *pNew = vdbeMergeEngineNew(SORTER_MAX_MERGE_COUNT);
         2168  +      if( pNew==0 ){
         2169  +        rc = SQLITE_NOMEM;
         2170  +      }else{
         2171  +        rc = vdbeIncrMergerNew(pTask, pNew, &pReadr->pIncr);
         2172  +      }
         2173  +    }
         2174  +    if( rc==SQLITE_OK ){
         2175  +      p = pReadr->pIncr->pMerger;
         2176  +      nDiv = nDiv / SORTER_MAX_MERGE_COUNT;
         2177  +    }
         2178  +  }
         2179  +
         2180  +  if( rc==SQLITE_OK ){
         2181  +    p->aReadr[iSeq % SORTER_MAX_MERGE_COUNT].pIncr = pIncr;
         2182  +  }else{
         2183  +    vdbeIncrFree(pIncr);
         2184  +  }
         2185  +  return rc;
         2186  +}
         2187  +
         2188  +/*
         2189  +** This function is called as part of a SorterRewind() operation on a sorter
         2190  +** that has already written two or more level-0 PMAs to one or more temp
         2191  +** files. It builds a tree of MergeEngine/IncrMerger/PmaReader objects that 
         2192  +** can be used to incrementally merge all PMAs on disk.
         2193  +**
         2194  +** If successful, SQLITE_OK is returned and *ppOut set to point to the
         2195  +** MergeEngine object at the root of the tree before returning. Or, if an
         2196  +** error occurs, an SQLite error code is returned and the final value 
         2197  +** of *ppOut is undefined.
         2198  +*/
         2199  +static int vdbeSorterMergeTreeBuild(
         2200  +  VdbeSorter *pSorter,       /* The VDBE cursor that implements the sort */
         2201  +  MergeEngine **ppOut        /* Write the MergeEngine here */
         2202  +){
         2203  +  MergeEngine *pMain = 0;
         2204  +  int rc = SQLITE_OK;
         2205  +  int iTask;
         2206  +
         2207  +#if SQLITE_MAX_WORKER_THREADS>0
         2208  +  /* If the sorter uses more than one task, then create the top-level 
         2209  +  ** MergeEngine here. This MergeEngine will read data from exactly 
         2210  +  ** one PmaReader per sub-task.  */
         2211  +  assert( pSorter->bUseThreads || pSorter->nTask==1 );
         2212  +  if( pSorter->nTask>1 ){
         2213  +    pMain = vdbeMergeEngineNew(pSorter->nTask);
         2214  +    if( pMain==0 ) rc = SQLITE_NOMEM;
         2215  +  }
         2216  +#endif
         2217  +
         2218  +  for(iTask=0; rc==SQLITE_OK && iTask<pSorter->nTask; iTask++){
         2219  +    SortSubtask *pTask = &pSorter->aTask[iTask];
         2220  +    assert( pTask->nPMA>0 || SQLITE_MAX_WORKER_THREADS>0 );
         2221  +    if( SQLITE_MAX_WORKER_THREADS==0 || pTask->nPMA ){
         2222  +      MergeEngine *pRoot = 0;     /* Root node of tree for this task */
         2223  +      int nDepth = vdbeSorterTreeDepth(pTask->nPMA);
         2224  +      i64 iReadOff = 0;
         2225  +
         2226  +      if( pTask->nPMA<=SORTER_MAX_MERGE_COUNT ){
         2227  +        rc = vdbeMergeEngineLevel0(pTask, pTask->nPMA, &iReadOff, &pRoot);
         2228  +      }else{
         2229  +        int i;
         2230  +        int iSeq = 0;
         2231  +        pRoot = vdbeMergeEngineNew(SORTER_MAX_MERGE_COUNT);
         2232  +        if( pRoot==0 ) rc = SQLITE_NOMEM;
         2233  +        for(i=0; i<pTask->nPMA && rc==SQLITE_OK; i += SORTER_MAX_MERGE_COUNT){
         2234  +          MergeEngine *pMerger = 0; /* New level-0 PMA merger */
         2235  +          int nReader;              /* Number of level-0 PMAs to merge */
         2236  +
         2237  +          nReader = MIN(pTask->nPMA - i, SORTER_MAX_MERGE_COUNT);
         2238  +          rc = vdbeMergeEngineLevel0(pTask, nReader, &iReadOff, &pMerger);
         2239  +          if( rc==SQLITE_OK ){
         2240  +            rc = vdbeSorterAddToTree(pTask, nDepth, iSeq++, pRoot, pMerger);
         2241  +          }
         2242  +        }
         2243  +      }
         2244  +
         2245  +      if( rc==SQLITE_OK ){
         2246  +#if SQLITE_MAX_WORKER_THREADS>0
         2247  +        if( pMain!=0 ){
         2248  +          rc = vdbeIncrMergerNew(pTask, pRoot, &pMain->aReadr[iTask].pIncr);
         2249  +        }else
   815   2250   #endif
   816         -    rc = vdbeSorterListToPMA(db, pCsr);
   817         -    pSorter->nInMemory = 0;
   818         -    assert( rc!=SQLITE_OK || (nExpect==pSorter->iWriteOff) );
         2251  +        {
         2252  +          assert( pMain==0 );
         2253  +          pMain = pRoot;
         2254  +        }
         2255  +      }else{
         2256  +        vdbeMergeEngineFree(pRoot);
         2257  +      }
         2258  +    }
         2259  +  }
         2260  +
         2261  +  if( rc!=SQLITE_OK ){
         2262  +    vdbeMergeEngineFree(pMain);
         2263  +    pMain = 0;
   819   2264     }
   820         -
         2265  +  *ppOut = pMain;
   821   2266     return rc;
   822   2267   }
   823   2268   
   824   2269   /*
   825         -** Helper function for sqlite3VdbeSorterRewind(). 
         2270  +** This function is called as part of an sqlite3VdbeSorterRewind() operation
         2271  +** on a sorter that has written two or more PMAs to temporary files. It sets
         2272  +** up either VdbeSorter.pMerger (for single threaded sorters) or pReader
         2273  +** (for multi-threaded sorters) so that it can be used to iterate through
         2274  +** all records stored in the sorter.
         2275  +**
         2276  +** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
   826   2277   */
   827         -static int vdbeSorterInitMerge(
   828         -  sqlite3 *db,                    /* Database handle */
   829         -  const VdbeCursor *pCsr,         /* Cursor handle for this sorter */
   830         -  i64 *pnByte                     /* Sum of bytes in all opened PMAs */
   831         -){
   832         -  VdbeSorter *pSorter = pCsr->pSorter;
   833         -  int rc = SQLITE_OK;             /* Return code */
   834         -  int i;                          /* Used to iterator through aIter[] */
   835         -  i64 nByte = 0;                  /* Total bytes in all opened PMAs */
   836         -
   837         -  /* Initialize the iterators. */
   838         -  for(i=0; i<SORTER_MAX_MERGE_COUNT; i++){
   839         -    VdbeSorterIter *pIter = &pSorter->aIter[i];
   840         -    rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte);
   841         -    pSorter->iReadOff = pIter->iEof;
   842         -    assert( rc!=SQLITE_OK || pSorter->iReadOff<=pSorter->iWriteOff );
   843         -    if( rc!=SQLITE_OK || pSorter->iReadOff>=pSorter->iWriteOff ) break;
         2278  +static int vdbeSorterSetupMerge(VdbeSorter *pSorter){
         2279  +  int rc;                         /* Return code */
         2280  +  SortSubtask *pTask0 = &pSorter->aTask[0];
         2281  +  MergeEngine *pMain = 0;
         2282  +#if SQLITE_MAX_WORKER_THREADS
         2283  +  sqlite3 *db = pTask0->pSorter->db;
         2284  +#endif
         2285  +
         2286  +  rc = vdbeSorterMergeTreeBuild(pSorter, &pMain);
         2287  +  if( rc==SQLITE_OK ){
         2288  +#if SQLITE_MAX_WORKER_THREADS
         2289  +    assert( pSorter->bUseThreads==0 || pSorter->nTask>1 );
         2290  +    if( pSorter->bUseThreads ){
         2291  +      int iTask;
         2292  +      PmaReader *pReadr;
         2293  +      SortSubtask *pLast = &pSorter->aTask[pSorter->nTask-1];
         2294  +      rc = vdbeSortAllocUnpacked(pLast);
         2295  +      if( rc==SQLITE_OK ){
         2296  +        pReadr = (PmaReader*)sqlite3DbMallocZero(db, sizeof(PmaReader));
         2297  +        pSorter->pReader = pReadr;
         2298  +        if( pReadr==0 ) rc = SQLITE_NOMEM;
         2299  +      }
         2300  +      if( rc==SQLITE_OK ){
         2301  +        rc = vdbeIncrMergerNew(pLast, pMain, &pReadr->pIncr);
         2302  +        if( rc==SQLITE_OK ){
         2303  +          vdbeIncrMergerSetThreads(pReadr->pIncr);
         2304  +          for(iTask=0; iTask<(pSorter->nTask-1); iTask++){
         2305  +            IncrMerger *pIncr;
         2306  +            if( (pIncr = pMain->aReadr[iTask].pIncr) ){
         2307  +              vdbeIncrMergerSetThreads(pIncr);
         2308  +              assert( pIncr->pTask!=pLast );
         2309  +            }
         2310  +          }
         2311  +          for(iTask=0; rc==SQLITE_OK && iTask<pSorter->nTask; iTask++){
         2312  +            PmaReader *p = &pMain->aReadr[iTask];
         2313  +            assert( p->pIncr==0 || p->pIncr->pTask==&pSorter->aTask[iTask] );
         2314  +            if( p->pIncr ){ 
         2315  +              if( iTask==pSorter->nTask-1 ){
         2316  +                rc = vdbePmaReaderIncrMergeInit(p, INCRINIT_TASK);
         2317  +              }else{
         2318  +                rc = vdbePmaReaderBgIncrInit(p);
         2319  +              }
         2320  +            }
         2321  +          }
         2322  +        }
         2323  +        pMain = 0;
         2324  +      }
         2325  +      if( rc==SQLITE_OK ){
         2326  +        rc = vdbePmaReaderIncrMergeInit(pReadr, INCRINIT_ROOT);
         2327  +      }
         2328  +    }else
         2329  +#endif
         2330  +    {
         2331  +      rc = vdbeMergeEngineInit(pTask0, pMain, INCRINIT_NORMAL);
         2332  +      pSorter->pMerger = pMain;
         2333  +      pMain = 0;
         2334  +    }
   844   2335     }
   845   2336   
   846         -  /* Initialize the aTree[] array. */
   847         -  for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
   848         -    rc = vdbeSorterDoCompare(pCsr, i);
         2337  +  if( rc!=SQLITE_OK ){
         2338  +    vdbeMergeEngineFree(pMain);
   849   2339     }
   850         -
   851         -  *pnByte = nByte;
   852   2340     return rc;
   853   2341   }
         2342  +
   854   2343   
   855   2344   /*
   856         -** Once the sorter has been populated, this function is called to prepare
   857         -** for iterating through its contents in sorted order.
         2345  +** Once the sorter has been populated by calls to sqlite3VdbeSorterWrite,
         2346  +** this function is called to prepare for iterating through the records
         2347  +** in sorted order.
   858   2348   */
   859         -int sqlite3VdbeSorterRewind(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
         2349  +int sqlite3VdbeSorterRewind(const VdbeCursor *pCsr, int *pbEof){
   860   2350     VdbeSorter *pSorter = pCsr->pSorter;
   861         -  int rc;                         /* Return code */
   862         -  sqlite3_file *pTemp2 = 0;       /* Second temp file to use */
   863         -  i64 iWrite2 = 0;                /* Write offset for pTemp2 */
   864         -  int nIter;                      /* Number of iterators used */
   865         -  int nByte;                      /* Bytes of space required for aIter/aTree */
   866         -  int N = 2;                      /* Power of 2 >= nIter */
         2351  +  int rc = SQLITE_OK;             /* Return code */
   867   2352   
   868   2353     assert( pSorter );
   869   2354   
   870   2355     /* If no data has been written to disk, then do not do so now. Instead,
   871   2356     ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
   872   2357     ** from the in-memory list.  */
   873         -  if( pSorter->nPMA==0 ){
   874         -    *pbEof = !pSorter->pRecord;
   875         -    assert( pSorter->aTree==0 );
   876         -    return vdbeSorterSort(pCsr);
         2358  +  if( pSorter->bUsePMA==0 ){
         2359  +    if( pSorter->list.pList ){
         2360  +      *pbEof = 0;
         2361  +      rc = vdbeSorterSort(&pSorter->aTask[0], &pSorter->list);
         2362  +    }else{
         2363  +      *pbEof = 1;
         2364  +    }
         2365  +    return rc;
         2366  +  }
         2367  +
         2368  +  /* Write the current in-memory list to a PMA. When the VdbeSorterWrite() 
         2369  +  ** function flushes the contents of memory to disk, it immediately always
         2370  +  ** creates a new list consisting of a single key immediately afterwards.
         2371  +  ** So the list is never empty at this point.  */
         2372  +  assert( pSorter->list.pList );
         2373  +  rc = vdbeSorterFlushPMA(pSorter);
         2374  +
         2375  +  /* Join all threads */
         2376  +  rc = vdbeSorterJoinAll(pSorter, rc);
         2377  +
         2378  +  vdbeSorterRewindDebug("rewind");
         2379  +
         2380  +  /* Assuming no errors have occurred, set up a merger structure to 
         2381  +  ** incrementally read and merge all remaining PMAs.  */
         2382  +  assert( pSorter->pReader==0 );
         2383  +  if( rc==SQLITE_OK ){
         2384  +    rc = vdbeSorterSetupMerge(pSorter);
         2385  +    *pbEof = 0;
   877   2386     }
   878   2387   
   879         -  /* Write the current in-memory list to a PMA. */
   880         -  rc = vdbeSorterListToPMA(db, pCsr);
   881         -  if( rc!=SQLITE_OK ) return rc;
   882         -
   883         -  /* Allocate space for aIter[] and aTree[]. */
   884         -  nIter = pSorter->nPMA;
   885         -  if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
   886         -  assert( nIter>0 );
   887         -  while( N<nIter ) N += N;
   888         -  nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
   889         -  pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
   890         -  if( !pSorter->aIter ) return SQLITE_NOMEM;
   891         -  pSorter->aTree = (int *)&pSorter->aIter[N];
   892         -  pSorter->nTree = N;
   893         -
   894         -  do {
   895         -    int iNew;                     /* Index of new, merged, PMA */
   896         -
   897         -    for(iNew=0; 
   898         -        rc==SQLITE_OK && iNew*SORTER_MAX_MERGE_COUNT<pSorter->nPMA; 
   899         -        iNew++
   900         -    ){
   901         -      int rc2;                    /* Return code from fileWriterFinish() */
   902         -      FileWriter writer;          /* Object used to write to disk */
   903         -      i64 nWrite;                 /* Number of bytes in new PMA */
   904         -
   905         -      memset(&writer, 0, sizeof(FileWriter));
   906         -
   907         -      /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1,
   908         -      ** initialize an iterator for each of them and break out of the loop.
   909         -      ** These iterators will be incrementally merged as the VDBE layer calls
   910         -      ** sqlite3VdbeSorterNext().
   911         -      **
   912         -      ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
   913         -      ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
   914         -      ** are merged into a single PMA that is written to file pTemp2.
   915         -      */
   916         -      rc = vdbeSorterInitMerge(db, pCsr, &nWrite);
   917         -      assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
   918         -      if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
   919         -        break;
   920         -      }
   921         -
   922         -      /* Open the second temp file, if it is not already open. */
   923         -      if( pTemp2==0 ){
   924         -        assert( iWrite2==0 );
   925         -        rc = vdbeSorterOpenTempFile(db, &pTemp2);
   926         -      }
   927         -
   928         -      if( rc==SQLITE_OK ){
   929         -        int bEof = 0;
   930         -        fileWriterInit(db, pTemp2, &writer, iWrite2);
   931         -        fileWriterWriteVarint(&writer, nWrite);
   932         -        while( rc==SQLITE_OK && bEof==0 ){
   933         -          VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
   934         -          assert( pIter->pFile );
   935         -
   936         -          fileWriterWriteVarint(&writer, pIter->nKey);
   937         -          fileWriterWrite(&writer, pIter->aKey, pIter->nKey);
   938         -          rc = sqlite3VdbeSorterNext(db, pCsr, &bEof);
   939         -        }
   940         -        rc2 = fileWriterFinish(db, &writer, &iWrite2);
   941         -        if( rc==SQLITE_OK ) rc = rc2;
   942         -      }
   943         -    }
   944         -
   945         -    if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
   946         -      break;
   947         -    }else{
   948         -      sqlite3_file *pTmp = pSorter->pTemp1;
   949         -      pSorter->nPMA = iNew;
   950         -      pSorter->pTemp1 = pTemp2;
   951         -      pTemp2 = pTmp;
   952         -      pSorter->iWriteOff = iWrite2;
   953         -      pSorter->iReadOff = 0;
   954         -      iWrite2 = 0;
   955         -    }
   956         -  }while( rc==SQLITE_OK );
   957         -
   958         -  if( pTemp2 ){
   959         -    sqlite3OsCloseFree(pTemp2);
   960         -  }
   961         -  *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
         2388  +  vdbeSorterRewindDebug("rewinddone");
   962   2389     return rc;
   963   2390   }
   964   2391   
   965   2392   /*
   966   2393   ** Advance to the next element in the sorter.
   967   2394   */
   968   2395   int sqlite3VdbeSorterNext(sqlite3 *db, const VdbeCursor *pCsr, int *pbEof){
   969   2396     VdbeSorter *pSorter = pCsr->pSorter;
   970   2397     int rc;                         /* Return code */
   971   2398   
   972         -  if( pSorter->aTree ){
   973         -    int iPrev = pSorter->aTree[1];/* Index of iterator to advance */
   974         -    rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]);
   975         -    if( rc==SQLITE_OK ){
   976         -      int i;                      /* Index of aTree[] to recalculate */
   977         -      VdbeSorterIter *pIter1;     /* First iterator to compare */
   978         -      VdbeSorterIter *pIter2;     /* Second iterator to compare */
   979         -      u8 *pKey2;                  /* To pIter2->aKey, or 0 if record cached */
   980         -
   981         -      /* Find the first two iterators to compare. The one that was just
   982         -      ** advanced (iPrev) and the one next to it in the array.  */
   983         -      pIter1 = &pSorter->aIter[(iPrev & 0xFFFE)];
   984         -      pIter2 = &pSorter->aIter[(iPrev | 0x0001)];
   985         -      pKey2 = pIter2->aKey;
   986         -
   987         -      for(i=(pSorter->nTree+iPrev)/2; i>0; i=i/2){
   988         -        /* Compare pIter1 and pIter2. Store the result in variable iRes. */
   989         -        int iRes;
   990         -        if( pIter1->pFile==0 ){
   991         -          iRes = +1;
   992         -        }else if( pIter2->pFile==0 ){
   993         -          iRes = -1;
   994         -        }else{
   995         -          vdbeSorterCompare(pCsr, 0, 
   996         -              pIter1->aKey, pIter1->nKey, pKey2, pIter2->nKey, &iRes
   997         -          );
   998         -        }
   999         -
  1000         -        /* If pIter1 contained the smaller value, set aTree[i] to its index.
  1001         -        ** Then set pIter2 to the next iterator to compare to pIter1. In this
  1002         -        ** case there is no cache of pIter2 in pSorter->pUnpacked, so set
  1003         -        ** pKey2 to point to the record belonging to pIter2.
  1004         -        **
  1005         -        ** Alternatively, if pIter2 contains the smaller of the two values,
  1006         -        ** set aTree[i] to its index and update pIter1. If vdbeSorterCompare()
  1007         -        ** was actually called above, then pSorter->pUnpacked now contains
  1008         -        ** a value equivalent to pIter2. So set pKey2 to NULL to prevent
  1009         -        ** vdbeSorterCompare() from decoding pIter2 again.  */
  1010         -        if( iRes<=0 ){
  1011         -          pSorter->aTree[i] = (int)(pIter1 - pSorter->aIter);
  1012         -          pIter2 = &pSorter->aIter[ pSorter->aTree[i ^ 0x0001] ];
  1013         -          pKey2 = pIter2->aKey;
  1014         -        }else{
  1015         -          if( pIter1->pFile ) pKey2 = 0;
  1016         -          pSorter->aTree[i] = (int)(pIter2 - pSorter->aIter);
  1017         -          pIter1 = &pSorter->aIter[ pSorter->aTree[i ^ 0x0001] ];
  1018         -        }
  1019         -
  1020         -      }
  1021         -      *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
         2399  +  assert( pSorter->bUsePMA || (pSorter->pReader==0 && pSorter->pMerger==0) );
         2400  +  if( pSorter->bUsePMA ){
         2401  +    assert( pSorter->pReader==0 || pSorter->pMerger==0 );
         2402  +    assert( pSorter->bUseThreads==0 || pSorter->pReader );
         2403  +    assert( pSorter->bUseThreads==1 || pSorter->pMerger );
         2404  +#if SQLITE_MAX_WORKER_THREADS>0
         2405  +    if( pSorter->bUseThreads ){
         2406  +      rc = vdbePmaReaderNext(pSorter->pReader);
         2407  +      *pbEof = (pSorter->pReader->pFd==0);
         2408  +    }else
         2409  +#endif
         2410  +    /*if( !pSorter->bUseThreads )*/ {
         2411  +      assert( pSorter->pMerger->pTask==(&pSorter->aTask[0]) );
         2412  +      rc = vdbeMergeEngineStep(pSorter->pMerger, pbEof);
  1022   2413       }
  1023   2414     }else{
  1024         -    SorterRecord *pFree = pSorter->pRecord;
  1025         -    pSorter->pRecord = pFree->pNext;
  1026         -    pFree->pNext = 0;
  1027         -    vdbeSorterRecordFree(db, pFree);
  1028         -    *pbEof = !pSorter->pRecord;
         2415  +    SorterRecord *pFree = pSorter->list.pList;
         2416  +    pSorter->list.pList = pFree->u.pNext;
         2417  +    pFree->u.pNext = 0;
         2418  +    if( pSorter->list.aMemory==0 ) vdbeSorterRecordFree(db, pFree);
         2419  +    *pbEof = !pSorter->list.pList;
  1029   2420       rc = SQLITE_OK;
  1030   2421     }
  1031   2422     return rc;
  1032   2423   }
  1033   2424   
  1034   2425   /*
  1035   2426   ** Return a pointer to a buffer owned by the sorter that contains the 
................................................................................
  1036   2427   ** current key.
  1037   2428   */
  1038   2429   static void *vdbeSorterRowkey(
  1039   2430     const VdbeSorter *pSorter,      /* Sorter object */
  1040   2431     int *pnKey                      /* OUT: Size of current key in bytes */
  1041   2432   ){
  1042   2433     void *pKey;
  1043         -  if( pSorter->aTree ){
  1044         -    VdbeSorterIter *pIter;
  1045         -    pIter = &pSorter->aIter[ pSorter->aTree[1] ];
  1046         -    *pnKey = pIter->nKey;
  1047         -    pKey = pIter->aKey;
         2434  +  if( pSorter->bUsePMA ){
         2435  +    PmaReader *pReader;
         2436  +#if SQLITE_MAX_WORKER_THREADS>0
         2437  +    if( pSorter->bUseThreads ){
         2438  +      pReader = pSorter->pReader;
         2439  +    }else
         2440  +#endif
         2441  +    /*if( !pSorter->bUseThreads )*/{
         2442  +      pReader = &pSorter->pMerger->aReadr[pSorter->pMerger->aTree[1]];
         2443  +    }
         2444  +    *pnKey = pReader->nKey;
         2445  +    pKey = pReader->aKey;
  1048   2446     }else{
  1049         -    *pnKey = pSorter->pRecord->nVal;
  1050         -    pKey = pSorter->pRecord->pVal;
         2447  +    *pnKey = pSorter->list.pList->nVal;
         2448  +    pKey = SRVAL(pSorter->list.pList);
  1051   2449     }
  1052   2450     return pKey;
  1053   2451   }
  1054   2452   
  1055   2453   /*
  1056   2454   ** Copy the current sorter key into the memory cell pOut.
  1057   2455   */
................................................................................
  1070   2468     return SQLITE_OK;
  1071   2469   }
  1072   2470   
  1073   2471   /*
  1074   2472   ** Compare the key in memory cell pVal with the key that the sorter cursor
  1075   2473   ** passed as the first argument currently points to. For the purposes of
  1076   2474   ** the comparison, ignore the rowid field at the end of each record.
         2475  +**
         2476  +** If the sorter cursor key contains any NULL values, consider it to be
         2477  +** less than pVal. Even if pVal also contains NULL values.
  1077   2478   **
  1078   2479   ** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
  1079   2480   ** Otherwise, set *pRes to a negative, zero or positive value if the
  1080   2481   ** key in pVal is smaller than, equal to or larger than the current sorter
  1081   2482   ** key.
         2483  +**
         2484  +** This routine forms the core of the OP_SorterCompare opcode, which in
         2485  +** turn is used to verify uniqueness when constructing a UNIQUE INDEX.
  1082   2486   */
  1083   2487   int sqlite3VdbeSorterCompare(
  1084   2488     const VdbeCursor *pCsr,         /* Sorter cursor */
  1085   2489     Mem *pVal,                      /* Value to compare to current sorter key */
  1086         -  int nKeyCol,                    /* Only compare this many fields */
         2490  +  int nKeyCol,                    /* Compare this many columns */
  1087   2491     int *pRes                       /* OUT: Result of comparison */
  1088   2492   ){
  1089   2493     VdbeSorter *pSorter = pCsr->pSorter;
         2494  +  UnpackedRecord *r2 = pSorter->pUnpacked;
         2495  +  KeyInfo *pKeyInfo = pCsr->pKeyInfo;
         2496  +  int i;
  1090   2497     void *pKey; int nKey;           /* Sorter key to compare pVal with */
  1091   2498   
         2499  +  if( r2==0 ){
         2500  +    char *p;
         2501  +    r2 = pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pKeyInfo,0,0,&p);
         2502  +    assert( pSorter->pUnpacked==(UnpackedRecord*)p );
         2503  +    if( r2==0 ) return SQLITE_NOMEM;
         2504  +    r2->nField = nKeyCol;
         2505  +  }
         2506  +  assert( r2->nField==nKeyCol );
         2507  +
  1092   2508     pKey = vdbeSorterRowkey(pSorter, &nKey);
  1093         -  vdbeSorterCompare(pCsr, nKeyCol, pVal->z, pVal->n, pKey, nKey, pRes);
         2509  +  sqlite3VdbeRecordUnpack(pKeyInfo, nKey, pKey, r2);
         2510  +  for(i=0; i<nKeyCol; i++){
         2511  +    if( r2->aMem[i].flags & MEM_Null ){
         2512  +      *pRes = -1;
         2513  +      return SQLITE_OK;
         2514  +    }
         2515  +  }
         2516  +
         2517  +  *pRes = sqlite3VdbeRecordCompare(pVal->n, pVal->z, r2, 0);
  1094   2518     return SQLITE_OK;
  1095   2519   }

Changes to src/where.c.

  2187   2187         ** less than the upper bound of the range query. Where the upper bound
  2188   2188         ** is either ($P) or ($P:$U). Again, even if $U is available, both values
  2189   2189         ** of iUpper are requested of whereKeyStats() and the smaller used.
  2190   2190         */
  2191   2191         tRowcnt iLower;
  2192   2192         tRowcnt iUpper;
  2193   2193   
         2194  +      if( pRec ){
         2195  +        testcase( pRec->nField!=pBuilder->nRecValid );
         2196  +        pRec->nField = pBuilder->nRecValid;
         2197  +      }
  2194   2198         if( nEq==p->nKeyCol ){
  2195   2199           aff = SQLITE_AFF_INTEGER;
  2196   2200         }else{
  2197   2201           aff = p->pTable->aCol[p->aiColumn[nEq]].affinity;
  2198   2202         }
  2199   2203         /* Determine iLower and iUpper using ($P) only. */
  2200   2204         if( nEq==0 ){
................................................................................
  2246   2250             nNew = sqlite3LogEst(iUpper - iLower);
  2247   2251           }else{
  2248   2252             nNew = 10;        assert( 10==sqlite3LogEst(2) );
  2249   2253           }
  2250   2254           if( nNew<nOut ){
  2251   2255             nOut = nNew;
  2252   2256           }
  2253         -        WHERETRACE(0x10, ("range scan regions: %u..%u  est=%d\n",
         2257  +        WHERETRACE(0x10, ("STAT4 range scan: %u..%u  est=%d\n",
  2254   2258                              (u32)iLower, (u32)iUpper, nOut));
  2255   2259         }
  2256   2260       }else{
  2257   2261         int bDone = 0;
  2258   2262         rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone);
  2259   2263         if( bDone ) return rc;
  2260   2264       }
................................................................................
  2274   2278     ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to
  2275   2279     ** match 1/64 of the index. */ 
  2276   2280     if( pLower && pUpper ) nNew -= 20;
  2277   2281   
  2278   2282     nOut -= (pLower!=0) + (pUpper!=0);
  2279   2283     if( nNew<10 ) nNew = 10;
  2280   2284     if( nNew<nOut ) nOut = nNew;
         2285  +#if defined(WHERETRACE_ENABLED)
         2286  +  if( pLoop->nOut>nOut ){
         2287  +    WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n",
         2288  +                    pLoop->nOut, nOut));
         2289  +  }
         2290  +#endif
  2281   2291     pLoop->nOut = (LogEst)nOut;
  2282   2292     return rc;
  2283   2293   }
  2284   2294   
  2285   2295   #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
  2286   2296   /*
  2287   2297   ** Estimate the number of rows that will be returned based on
................................................................................
  2386   2396       nRowEst += nEst;
  2387   2397       pBuilder->nRecValid = nRecValid;
  2388   2398     }
  2389   2399   
  2390   2400     if( rc==SQLITE_OK ){
  2391   2401       if( nRowEst > nRow0 ) nRowEst = nRow0;
  2392   2402       *pnRow = nRowEst;
  2393         -    WHERETRACE(0x10,("IN row estimate: est=%g\n", nRowEst));
         2403  +    WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst));
  2394   2404     }
  2395   2405     assert( pBuilder->nRecValid==nRecValid );
  2396   2406     return rc;
  2397   2407   }
  2398   2408   #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */
  2399   2409   
  2400   2410   /*
................................................................................
  4709   4719     }
  4710   4720   #endif /* SQLITE_OMIT_AUTOMATIC_INDEX */
  4711   4721   
  4712   4722     /* Loop over all indices
  4713   4723     */
  4714   4724     for(; rc==SQLITE_OK && pProbe; pProbe=pProbe->pNext, iSortIdx++){
  4715   4725       if( pProbe->pPartIdxWhere!=0
  4716         -     && !whereUsablePartialIndex(pNew->iTab, pWC, pProbe->pPartIdxWhere) ){
         4726  +     && !whereUsablePartialIndex(pSrc->iCursor, pWC, pProbe->pPartIdxWhere) ){
         4727  +      testcase( pNew->iTab!=pSrc->iCursor );  /* See ticket [98d973b8f5] */
  4717   4728         continue;  /* Partial index inappropriate for this query */
  4718   4729       }
  4719   4730       rSize = pProbe->aiRowLogEst[0];
  4720   4731       pNew->u.btree.nEq = 0;
  4721   4732       pNew->u.btree.nSkip = 0;
  4722   4733       pNew->nLTerm = 0;
  4723   4734       pNew->iSortIdx = 0;

Changes to test/index7.test.

   243    243   do_execsql_test index7-5.0 {
   244    244     CREATE INDEX t3b ON t3(b) WHERE xyzzy.t3.b BETWEEN 5 AND 10;
   245    245                                  /* ^^^^^-- ignored */
   246    246     ANALYZE;
   247    247     SELECT count(*) FROM t3 WHERE t3.b BETWEEN 5 AND 10;
   248    248     SELECT stat+0 FROM sqlite_stat1 WHERE idx='t3b';
   249    249   } {6 6}
          250  +
          251  +# Verify that the problem identified by ticket [98d973b8f5] has been fixed.
          252  +#
          253  +do_execsql_test index7-6.1 {
          254  +  CREATE TABLE t5(a, b);
          255  +  CREATE TABLE t4(c, d);
          256  +  INSERT INTO t5 VALUES(1, 'xyz');
          257  +  INSERT INTO t4 VALUES('abc', 'not xyz');
          258  +  SELECT * FROM (SELECT * FROM t5 WHERE a=1 AND b='xyz'), t4 WHERE c='abc';
          259  +} {
          260  +  1 xyz abc {not xyz}
          261  +}
          262  +do_execsql_test index7-6.2 {
          263  +  CREATE INDEX i4 ON t4(c) WHERE d='xyz';
          264  +  SELECT * FROM (SELECT * FROM t5 WHERE a=1 AND b='xyz'), t4 WHERE c='abc';
          265  +} {
          266  +  1 xyz abc {not xyz}
          267  +}
          268  +do_execsql_test index7-6.3 {
          269  +  CREATE VIEW v4 AS SELECT * FROM t4;
          270  +  INSERT INTO t4 VALUES('def', 'xyz');
          271  +  SELECT * FROM v4 WHERE d='xyz' AND c='def'
          272  +} {
          273  +  def xyz
          274  +}
          275  +do_eqp_test index7-6.4 {
          276  +  SELECT * FROM v4 WHERE d='xyz' AND c='def'
          277  +} {
          278  +  0 0 0 {SEARCH TABLE t4 USING INDEX i4 (c=?)}
          279  +}
   250    280   
   251    281   finish_test

Changes to test/malloc.test.

   876    876   do_malloc_test 39 -tclprep {
   877    877     sqlite3 db test.db
   878    878   } -sqlbody {
   879    879     SELECT test_auxdata('abc', 'def');
   880    880   } -cleanup {
   881    881     db close
   882    882   }
          883  +
          884  +reset_db
          885  +add_test_utf16bin_collate db
          886  +do_execsql_test 40.1 {
          887  +  CREATE TABLE t1(a);
          888  +  INSERT INTO t1 VALUES('fghij');
          889  +  INSERT INTO t1 VALUES('pqrst');
          890  +  INSERT INTO t1 VALUES('abcde');
          891  +  INSERT INTO t1 VALUES('uvwxy');
          892  +  INSERT INTO t1 VALUES('klmno');
          893  +}
          894  +do_execsql_test 40.2 {
          895  +  SELECT * FROM t1 ORDER BY 1 COLLATE utf16bin;
          896  +} {abcde fghij klmno pqrst uvwxy}
          897  +do_faultsim_test 40.3 -faults oom-trans* -body {
          898  +  execsql {
          899  +    SELECT * FROM t1 ORDER BY 1 COLLATE utf16bin;
          900  +  }
          901  +} -test {
          902  +  faultsim_test_result {0 {abcde fghij klmno pqrst uvwxy}} 
          903  +  faultsim_integrity_check
          904  +}
          905  +
          906  +reset_db
          907  +add_test_utf16bin_collate db
          908  +set big [string repeat x 200]
          909  +do_execsql_test 41.1 {
          910  +  DROP TABLE IF EXISTS t1;
          911  +  CREATE TABLE t1(a COLLATE utf16bin);
          912  +  INSERT INTO t1 VALUES('fghij' || $::big);
          913  +  INSERT INTO t1 VALUES('pqrst' || $::big);
          914  +  INSERT INTO t1 VALUES('abcde' || $::big);
          915  +  INSERT INTO t1 VALUES('uvwxy' || $::big);
          916  +  INSERT INTO t1 VALUES('klmno' || $::big);
          917  +  CREATE INDEX i1 ON t1(a);
          918  +}
          919  +do_faultsim_test 41.2 -faults oom* -body {
          920  +  execsql { SELECT * FROM t1 WHERE a = ('abcde' || $::big)}
          921  +} -test {
          922  +  faultsim_test_result [list 0 "abcde$::big"]
          923  +  faultsim_integrity_check
          924  +}
   883    925   
   884    926   # Ensure that no file descriptors were leaked.
   885    927   do_test malloc-99.X {
   886    928     catch {db close}
   887    929     set sqlite_open_file_count
   888    930   } {0}
   889    931   
   890    932   puts open-file-count=$sqlite_open_file_count
   891    933   finish_test

Changes to test/mallocA.test.

    21     21   #
    22     22   if {!$MEMDEBUG} {
    23     23      puts "Skipping mallocA tests: not compiled with -DSQLITE_MEMDEBUG..."
    24     24      finish_test
    25     25      return
    26     26   }
    27     27   
    28         -
    29     28   # Construct a test database
    30     29   #
    31     30   forcedelete test.db.bu
    32     31   db eval {
    33     32     CREATE TABLE t1(a COLLATE NOCASE,b,c);
    34     33     INSERT INTO t1 VALUES(1,2,3);
    35     34     INSERT INTO t1 VALUES(1,2,4);
................................................................................
   111    110         ANALYZE sqlite_master;
   112    111         SELECT rowid FROM t1 WHERE a='abc' AND b<'y';
   113    112       }
   114    113     } -test {
   115    114       faultsim_test_result [list 0 {1 2}]
   116    115     }
   117    116   }
          117  +
          118  +do_execsql_test 7.0 {
          119  +  PRAGMA cache_size = 5;
          120  +}
          121  +do_faultsim_test 7 -faults oom-trans* -prep {
          122  +  if {$iFail < 500} { set iFail 2000 }
          123  +  if {$iFail > 1215} { set iFail 2000 }
          124  +} -body {
          125  +  execsql {
          126  +    WITH r(x,y) AS (
          127  +      SELECT 1, randomblob(100)
          128  +      UNION ALL
          129  +      SELECT x+1, randomblob(100) FROM r
          130  +      LIMIT 1000
          131  +    )
          132  +    SELECT count(x), length(y) FROM r GROUP BY (x%5)
          133  +  }
          134  +} -test {
          135  +  set res [list 200 100 200 100 200 100 200 100 200 100]
          136  +  faultsim_test_result [list 0 $res]
          137  +}
          138  +
   118    139   
   119    140   # Ensure that no file descriptors were leaked.
   120    141   do_test malloc-99.X {
   121    142     catch {db close}
   122    143     set sqlite_open_file_count
   123    144   } {0}
   124    145   
   125    146   forcedelete test.db.bu
   126    147   finish_test

Changes to test/permutations.test.

   108    108     savepoint4.test savepoint6.test select9.test 
   109    109     speed1.test speed1p.test speed2.test speed3.test speed4.test 
   110    110     speed4p.test sqllimits1.test tkt2686.test thread001.test thread002.test
   111    111     thread003.test thread004.test thread005.test trans2.test vacuum3.test 
   112    112     incrvacuum_ioerr.test autovacuum_crash.test btree8.test shared_err.test
   113    113     vtab_err.test walslow.test walcrash.test walcrash3.test
   114    114     walthread.test rtree3.test indexfault.test securedel2.test
   115         -  fts4growth.test fts4growth2.test
          115  +  sort3.test sort4.test fts4growth.test fts4growth2.test
   116    116   }]
   117    117   if {[info exists ::env(QUICKTEST_INCLUDE)]} {
   118    118     set allquicktests [concat $allquicktests $::env(QUICKTEST_INCLUDE)]
   119    119   }
   120    120   
   121    121   #############################################################################
   122    122   # Start of tests
................................................................................
   351    351     Coverage tests for file analyze.c.
   352    352   } -files {
   353    353     analyze3.test analyze4.test analyze5.test analyze6.test
   354    354     analyze7.test analyze8.test analyze9.test analyzeA.test
   355    355     analyze.test analyzeB.test mallocA.test
   356    356   } 
   357    357   
          358  +test_suite "coverage-sorter" -description {
          359  +  Coverage tests for file vdbesort.c.
          360  +} -files {
          361  +  sort.test sortfault.test
          362  +} 
          363  +
   358    364   
   359    365   lappend ::testsuitelist xxx
   360    366   #-------------------------------------------------------------------------
   361    367   # Define the permutation test suites:
   362    368   #
   363    369   
   364    370   # Run some tests using pre-allocated page and scratch blocks.
................................................................................
   482    488     sqlite3_shutdown
   483    489     catch {sqlite3_config multithread}
   484    490     sqlite3_initialize
   485    491     autoinstall_test_functions
   486    492   } -files {
   487    493     delete.test   delete2.test  insert.test  rollback.test  select1.test
   488    494     select2.test  trans.test    update.test  vacuum.test    types.test
   489         -  types2.test   types3.test
          495  +  types2.test   types3.test   sort4.test
   490    496   } -shutdown {
   491    497     catch {db close}
   492    498     sqlite3_shutdown
   493    499     catch {sqlite3_config serialized}
   494    500     sqlite3_initialize
   495    501     autoinstall_test_functions
   496    502   }

Changes to test/sort.test.

     4      4   # a legal notice, here is a blessing:
     5      5   #
     6      6   #    May you do good and not evil.
     7      7   #    May you find forgiveness for yourself and forgive others.
     8      8   #    May you share freely, never taking more than you give.
     9      9   #
    10     10   #***********************************************************************
           11  +#
    11     12   # This file implements regression tests for SQLite library.  The
    12         -# focus of this file is testing the CREATE TABLE statement.
           13  +# focus of this file is testing the sorter (code in vdbesort.c).
    13     14   #
    14         -# $Id: sort.test,v 1.25 2005/11/14 22:29:06 drh Exp $
    15     15   
    16     16   set testdir [file dirname $argv0]
    17     17   source $testdir/tester.tcl
    18     18   
    19     19   # Create a bunch of data to sort against
    20     20   #
    21     21   do_test sort-1.0 {
................................................................................
   459    459       insert into b values (2, 1, 'xxx');
   460    460       insert into b values (1, 1, 'zzz');
   461    461       insert into b values (3, 1, 'yyy');
   462    462       select a.id, b.id, b.text from a join b on (a.id = b.aId)
   463    463         order by a.id, b.text;
   464    464     }
   465    465   } {1 2 xxx 1 3 yyy 1 1 zzz}
          466  +
          467  +#-------------------------------------------------------------------------
          468  +# Check that the sorter in vdbesort.c sorts in a stable fashion.
          469  +#
          470  +do_execsql_test sort-13.0 {
          471  +  CREATE TABLE t10(a, b);
          472  +}
          473  +do_test sort-13.1 {
          474  +  db transaction {
          475  +    for {set i 0} {$i < 100000} {incr i} {
          476  +      execsql { INSERT INTO t10 VALUES( $i/10, $i%10 ) }
          477  +    }
          478  +  }
          479  +} {}
          480  +do_execsql_test sort-13.2 {
          481  +  SELECT a, b FROM t10 ORDER BY a;
          482  +} [db eval {SELECT a, b FROM t10 ORDER BY a, b}]
          483  +do_execsql_test sort-13.3 {
          484  +  PRAGMA cache_size = 5;
          485  +  SELECT a, b FROM t10 ORDER BY a;
          486  +} [db eval {SELECT a, b FROM t10 ORDER BY a, b}]
          487  +
          488  +#-------------------------------------------------------------------------
          489  +# Sort some large ( > 4KiB) records.
          490  +#
          491  +proc cksum {x} {
          492  +  set i1 1
          493  +  set i2 2
          494  +  binary scan $x c* L
          495  +  foreach {a b} $L {
          496  +    set i1 [expr (($i2<<3) + $a) & 0x7FFFFFFF]
          497  +    set i2 [expr (($i1<<3) + $b) & 0x7FFFFFFF]
          498  +  }
          499  +  list $i1 $i2
          500  +}
          501  +db func cksum cksum
          502  +
          503  +do_execsql_test sort-14.0 {
          504  +  PRAGMA cache_size = 5;
          505  +  CREATE TABLE t11(a, b);
          506  +  INSERT INTO t11 VALUES(randomblob(5000), NULL);
          507  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --2
          508  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --3
          509  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --4
          510  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --5
          511  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --6
          512  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --7
          513  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --8
          514  +  INSERT INTO t11 SELECT randomblob(5000), NULL FROM t11; --9
          515  +  UPDATE t11 SET b = cksum(a);
          516  +}
          517  +
          518  +foreach {tn mmap_limit} {
          519  +  1 0
          520  +  2 1000000
          521  +} {
          522  +  do_test sort-14.$tn {
          523  +    sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $mmap_limit
          524  +    set prev ""
          525  +    db eval { SELECT * FROM t11 ORDER BY b } {
          526  +      if {$b != [cksum $a]} {error "checksum failed"}
          527  +      if {[string compare $b $prev] < 0} {error "sort failed"}
          528  +      set prev $b
          529  +    }
          530  +    set {} {}
          531  +  } {}
          532  +}
          533  +
          534  +#-------------------------------------------------------------------------
          535  +#
          536  +foreach {tn mmap_limit nWorker tmpstore coremutex fakeheap softheaplimit} {
          537  +          1          0       3     file      true    false             0
          538  +          2          0       3     file      true     true             0
          539  +          3          0       0     file      true    false             0
          540  +          4    1000000       3     file      true    false             0
          541  +          5          0       0   memory     false     true             0
          542  +          6          0       0     file     false     true       1000000     
          543  +          7          0       0     file     false     true         10000
          544  +} {
          545  +  db close
          546  +  sqlite3_shutdown
          547  +  if {$coremutex} {
          548  +    sqlite3_config multithread
          549  +  } else {
          550  +    sqlite3_config singlethread
          551  +  }
          552  +  sqlite3_initialize
          553  +  sorter_test_fakeheap $fakeheap
          554  +  sqlite3_soft_heap_limit $softheaplimit
          555  +
          556  +  reset_db
          557  +  sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $mmap_limit
          558  +  execsql "PRAGMA temp_store = $tmpstore; PRAGMA threads = $nWorker"
          559  +  
          560  +  
          561  +  set ten [string repeat X 10300]
          562  +  set one [string repeat y   200]
          563  +
          564  +  if {$softheaplimit} {
          565  +    execsql { PRAGMA cache_size = 20 };
          566  +  } else {
          567  +    execsql { PRAGMA cache_size = 5 };
          568  +  }
          569  +
          570  +  do_execsql_test 15.$tn.1 {
          571  +    WITH rr AS (
          572  +      SELECT 4, $ten UNION ALL
          573  +      SELECT 2, $one UNION ALL
          574  +      SELECT 1, $ten UNION ALL
          575  +      SELECT 3, $one
          576  +    )
          577  +    SELECT * FROM rr ORDER BY 1;
          578  +  } [list 1 $ten 2 $one 3 $one 4 $ten]
          579  +
          580  +  do_execsql_test 15.$tn.2 {
          581  +    CREATE TABLE t1(a);
          582  +    INSERT INTO t1 VALUES(4);
          583  +    INSERT INTO t1 VALUES(5);
          584  +    INSERT INTO t1 VALUES(3);
          585  +    INSERT INTO t1 VALUES(2);
          586  +    INSERT INTO t1 VALUES(6);
          587  +    INSERT INTO t1 VALUES(1);
          588  +    CREATE INDEX i1 ON t1(a);
          589  +    SELECT * FROM t1 ORDER BY a;
          590  +  } {1 2 3 4 5 6}
          591  +
          592  +  do_execsql_test 15.$tn.3 {
          593  +    WITH rr AS (
          594  +      SELECT 4, $ten UNION ALL
          595  +      SELECT 2, $one
          596  +    )
          597  +    SELECT * FROM rr ORDER BY 1;
          598  +  } [list 2 $one 4 $ten]
          599  +
          600  +  sorter_test_fakeheap 0
          601  +}
          602  +
          603  +db close
          604  +sqlite3_shutdown
          605  +set t(0) singlethread
          606  +set t(1) multithread
          607  +set t(2) serialized
          608  +sqlite3_config $t($sqlite_options(threadsafe))
          609  +sqlite3_initialize
          610  +sqlite3_soft_heap_limit 0
          611  +
          612  +reset_db
          613  +do_catchsql_test 16.1 {
          614  +  CREATE TABLE t1(a, b, c);
          615  +  INSERT INTO t1 VALUES(1, 2, 3);
          616  +  INSERT INTO t1 VALUES(1, NULL, 3);
          617  +  INSERT INTO t1 VALUES(NULL, 2, 3);
          618  +  INSERT INTO t1 VALUES(1, 2, NULL);
          619  +  INSERT INTO t1 VALUES(4, 5, 6);
          620  +  CREATE UNIQUE INDEX i1 ON t1(b, a, c);
          621  +} {0 {}}
          622  +reset_db
          623  +do_catchsql_test 16.2 {
          624  +  CREATE TABLE t1(a, b, c);
          625  +  INSERT INTO t1 VALUES(1, 2, 3);
          626  +  INSERT INTO t1 VALUES(1, NULL, 3);
          627  +  INSERT INTO t1 VALUES(1, 2, 3);
          628  +  INSERT INTO t1 VALUES(1, 2, NULL);
          629  +  INSERT INTO t1 VALUES(4, 5, 6);
          630  +  CREATE UNIQUE INDEX i1 ON t1(b, a, c);
          631  +} {1 {UNIQUE constraint failed: t1.b, t1.a, t1.c}}
          632  +
          633  +reset_db
          634  +do_execsql_test 17.1 {
          635  +  SELECT * FROM sqlite_master ORDER BY sql;
          636  +} {}
   466    637   
   467    638   finish_test

Added test/sort2.test.

            1  +# 2014 March 25.
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +#
           13  +# Specifically, the tests in this file attempt to verify that 
           14  +# multi-threaded sorting works.
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +set testprefix sort2
           20  +
           21  +foreach {tn script} {
           22  +  1 { }
           23  +  2 {
           24  +    catch { db close }
           25  +    reset_db
           26  +    catch { db eval {PRAGMA threads=7} }
           27  +  }
           28  +} {
           29  +
           30  +  eval $script
           31  +
           32  +  do_execsql_test $tn.1 {
           33  +    PRAGMA cache_size = 5;
           34  +    WITH r(x,y) AS (
           35  +      SELECT 1, randomblob(100)
           36  +      UNION ALL
           37  +      SELECT x+1, randomblob(100) FROM r
           38  +      LIMIT 100000
           39  +    )
           40  +    SELECT count(x), length(y) FROM r GROUP BY (x%5)
           41  +  } {
           42  +    20000 100 20000 100 20000 100 20000 100 20000 100
           43  +  }
           44  +
           45  +  do_execsql_test $tn.2.1 {
           46  +    CREATE TABLE t1(a, b);
           47  +    WITH r(x,y) AS (
           48  +      SELECT 1, randomblob(100)
           49  +      UNION ALL
           50  +      SELECT x+1, randomblob(100) FROM r
           51  +      LIMIT 10000
           52  +    ) INSERT INTO t1 SELECT * FROM r;
           53  +  }
           54  +  
           55  +  do_execsql_test $tn.2.2 {
           56  +    CREATE UNIQUE INDEX i1 ON t1(b, a);
           57  +  }
           58  +  
           59  +  do_execsql_test $tn.2.3 {
           60  +    CREATE UNIQUE INDEX i2 ON t1(a);
           61  +  }
           62  +  
           63  +  do_execsql_test $tn.2.4 { PRAGMA integrity_check } {ok}
           64  +  
           65  +  breakpoint
           66  +  do_execsql_test $tn.3 {
           67  +    PRAGMA cache_size = 5;
           68  +    WITH r(x,y) AS (
           69  +      SELECT 1, randomblob(100)
           70  +      UNION ALL
           71  +      SELECT x+1, randomblob(100) FROM r
           72  +      LIMIT 1000000
           73  +    )
           74  +    SELECT count(x), length(y) FROM r GROUP BY (x%5)
           75  +  } {
           76  +    200000 100 200000 100 200000 100 200000 100 200000 100
           77  +  }
           78  +}
           79  +
           80  +finish_test

Added test/sort3.test.

            1  +# 2014 March 25.
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +#
           13  +# The tests in this file verify that sorting works when the library is
           14  +# configured to use mmap(), but the temporary files generated by the
           15  +# sorter are too large to be completely mapped.
           16  +#
           17  +
           18  +set testdir [file dirname $argv0]
           19  +source $testdir/tester.tcl
           20  +set testprefix sort3
           21  +
           22  +# Sort roughly 20MB of data. Once with a mmap limit of 5MB and once without.
           23  +#
           24  +foreach {itest limit} {
           25  +  1 5000000
           26  +  2 0x7FFFFFFF
           27  +} {
           28  +  sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $limit
           29  +  do_execsql_test 1.$itest {
           30  +    WITH r(x,y) AS (
           31  +        SELECT 1, randomblob(1000)
           32  +        UNION ALL
           33  +        SELECT x+1, randomblob(1000) FROM r
           34  +        LIMIT 20000
           35  +    )
           36  +    SELECT count(*), sum(length(y)) FROM r GROUP BY (x%5);
           37  +  } {
           38  +    4000 4000000 
           39  +    4000 4000000 
           40  +    4000 4000000 
           41  +    4000 4000000 
           42  +    4000 4000000
           43  +  }
           44  +}
           45  +
           46  +# Sort more than 2GB of data. At one point this was causing a problem.
           47  +# This test might take one minute or more to run.
           48  +#
           49  +do_execsql_test 2 {
           50  +  PRAGMA cache_size = 20000;
           51  +  WITH r(x,y) AS (
           52  +    SELECT 1, randomblob(1000)
           53  +    UNION ALL
           54  +    SELECT x+1, randomblob(1000) FROM r
           55  +    LIMIT 2200000
           56  +  )
           57  +  SELECT count(*), sum(length(y)) FROM r GROUP BY (x%5);
           58  +} {
           59  +  440000 440000000 
           60  +  440000 440000000 
           61  +  440000 440000000 
           62  +  440000 440000000 
           63  +  440000 440000000
           64  +}
           65  +
           66  +finish_test
           67  +

Added test/sort4.test.

            1  +# 2014 May 6.
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +#
           13  +# The tests in this file are brute force tests of the multi-threaded
           14  +# sorter.
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +set testprefix sort4
           20  +
           21  +# Configure the sorter to use 3 background threads.
           22  +db eval {PRAGMA threads=3}
           23  +
           24  +# Minimum number of seconds to run for. If the value is 0, each test
           25  +# is run exactly once. Otherwise, tests are repeated until the timeout
           26  +# expires.
           27  +set SORT4TIMEOUT 0
           28  +if {[permutation] == "multithread"} { set SORT4TIMEOUT 300 }
           29  +
           30  +#--------------------------------------------------------------------
           31  +# Set up a table "t1" containing $nRow rows. Each row contains also
           32  +# contains blob fields that collectively contain at least $nPayload 
           33  +# bytes of content. The table schema is as follows:
           34  +#
           35  +#   CREATE TABLE t1(a INTEGER, <extra-columns>, b INTEGER);
           36  +#
           37  +# For each row, the values of columns "a" and "b" are set to the same
           38  +# pseudo-randomly selected integer. The "extra-columns", of which there
           39  +# are at most eight, are named c0, c1, c2 etc. Column c0 contains a 4
           40  +# byte string. Column c1 an 8 byte string. Field c2 16 bytes, and so on.
           41  +#
           42  +# This table is intended to be used for testing queries of the form: 
           43  +#
           44  +#   SELECT a, <cols>, b FROM t1 ORDER BY a;
           45  +#
           46  +# The test code checks that rows are returned in order, and that the 
           47  +# values of "a" and "b" are the same for each row (the idea being that
           48  +# if field "b" at the end of the sorter record has not been corrupted, 
           49  +# the rest of the record is probably Ok as well).
           50  +#
           51  +proc populate_table {nRow nPayload} {
           52  +  set nCol 0
           53  +
           54  +  set n 0
           55  +  for {set nCol 0} {$n < $nPayload} {incr nCol} {
           56  +    incr n [expr (4 << $nCol)]
           57  +  }
           58  +
           59  +  set cols [lrange [list xxx c0 c1 c2 c3 c4 c5 c6 c7] 1 $nCol]
           60  +  set data [lrange [list xxx \
           61  +      randomblob(4) randomblob(8) randomblob(16) randomblob(32) \
           62  +      randomblob(64) randomblob(128) randomblob(256) randomblob(512) \
           63  +  ] 1 $nCol]
           64  +
           65  +  execsql { DROP TABLE IF EXISTS t1 }
           66  +
           67  +  db transaction {
           68  +    execsql "CREATE TABLE t1(a, [join $cols ,], b);"
           69  +    set insert "INSERT INTO t1 VALUES(:k, [join $data ,], :k)"
           70  +    for {set i 0} {$i < $nRow} {incr i} {
           71  +      set k [expr int(rand()*1000000000)]
           72  +      execsql $insert
           73  +    }
           74  +  }
           75  +}
           76  +
           77  +# Helper for [do_sorter_test]
           78  +#
           79  +proc sorter_test {nRow nRead nPayload} {
           80  +  set res [list]
           81  +
           82  +  set nLoad [expr ($nRow > $nRead) ? $nRead : $nRow]
           83  +
           84  +  set nPayload [expr (($nPayload+3)/4) * 4]
           85  +  set cols [list]
           86  +  foreach {mask col} { 
           87  +    0x04  c0 0x08  c1 0x10  c2 0x20  c3 
           88  +    0x40  c4 0x80  c5 0x100 c6 0x200 c7 
           89  +  } {
           90  +    if {$nPayload & $mask} { lappend cols $col }
           91  +  }
           92  +
           93  +  # Create two SELECT statements. Statement $sql1 uses the sorter to sort
           94  +  # $nRow records of a bit over $nPayload bytes each read from the "t1"
           95  +  # table created by [populate_table] proc above. Rows are sorted in order
           96  +  # of the integer field in each "t1" record.
           97  +  #
           98  +  # The second SQL statement sorts the same set of rows as the first, but
           99  +  # uses a LIMIT clause, causing SQLite to use a temp table instead of the
          100  +  # sorter for sorting.
          101  +  #
          102  +  set sql1 "SELECT a, [join $cols ,], b FROM t1 WHERE rowid<=$nRow ORDER BY a"
          103  +  set sql2 "SELECT a FROM t1 WHERE rowid<=$nRow ORDER BY a LIMIT $nRead"
          104  +
          105  +  # Pass the two SQL statements to a helper command written in C. This
          106  +  # command steps statement $sql1 $nRead times and compares the integer
          107  +  # values in the rows returned with the results of executing $sql2. If
          108  +  # the comparison fails (indicating some bug in the sorter), a Tcl
          109  +  # exception is thrown.
          110  +  #
          111  +  sorter_test_sort4_helper db $sql1 $nRead $sql2
          112  +  set {} {} 
          113  +}
          114  +
          115  +# Usage:
          116  +#
          117  +#   do_sorter_test <testname> <args>...
          118  +#
          119  +# where <args> are any of the following switches:
          120  +#
          121  +#   -rows N          (number of rows to have sorter sort)
          122  +#   -read N          (number of rows to read out of sorter)
          123  +#   -payload N       (bytes of payload to read with each row)
          124  +#   -cachesize N     (Value for "PRAGMA cache_size = ?")
          125  +#   -repeats N       (number of times to repeat test)
          126  +#   -fakeheap BOOL   (true to use separate allocations for in-memory records)
          127  +#
          128  +proc do_sorter_test {tn args} {
          129  +  set a(-rows)      1000
          130  +  set a(-repeats)   1
          131  +  set a(-read)      100
          132  +  set a(-payload)   100
          133  +  set a(-cachesize) 100
          134  +  set a(-fakeheap)  0
          135  +
          136  +  foreach {s val} $args {
          137  +    if {[info exists a($s)]==0} { 
          138  +      unset a(-cachesize)
          139  +      set optlist "[join [array names a] ,] or -cachesize"
          140  +      error "Unknown option $s, expected $optlist"
          141  +    }
          142  +    set a($s) $val
          143  +  }
          144  +  if {[permutation] == "memsys3" || [permutation] == "memsys5"} {
          145  +    set a(-fakeheap) 0
          146  +  }
          147  +  if {$a(-fakeheap)} { sorter_test_fakeheap 1 }
          148  +
          149  +
          150  +  db eval "PRAGMA cache_size = $a(-cachesize)"
          151  +  do_test $tn [subst -nocommands {
          152  +    for {set i 0} {[set i] < $a(-repeats)} {incr i} {
          153  +      sorter_test $a(-rows) $a(-read) $a(-payload)
          154  +    }
          155  +  }] {}
          156  +
          157  +  if {$a(-fakeheap)} { sorter_test_fakeheap 0 }
          158  +}
          159  +
          160  +proc clock_seconds {} {
          161  +  db one {SELECT strftime('%s')}
          162  +}
          163  +
          164  +#-------------------------------------------------------------------------
          165  +# Begin tests here.
          166  +
          167  +# Create a test database.
          168  +do_test 1 {
          169  +  execsql "PRAGMA page_size = 4096"
          170  +  populate_table 100000 500
          171  +} {}
          172  +
          173  +set iTimeLimit [expr [clock_seconds] + $SORT4TIMEOUT]
          174  +
          175  +for {set t 2} {1} {incr tn} {
          176  +  do_sorter_test $t.2 -repeats 10 -rows 1000   -read 100
          177  +  do_sorter_test $t.3 -repeats 10 -rows 100000 -read 1000
          178  +  do_sorter_test $t.4 -repeats 10 -rows 100000 -read 1000 -payload 500
          179  +  do_sorter_test $t.5 -repeats 10 -rows 100000 -read 100000 -payload 8
          180  +  do_sorter_test $t.6 -repeats 10 -rows 100000 -read 10 -payload 8
          181  +  do_sorter_test $t.7 -repeats 10 -rows 10000 -read 10000 -payload 8 -fakeheap 1
          182  +  do_sorter_test $t.8 -repeats 10 -rows 100000 -read 10000 -cachesize 250
          183  +
          184  +  set iNow [clock_seconds]
          185  +  if {$iNow>=$iTimeLimit} break
          186  +  do_test "$testprefix-([expr $iTimeLimit-$iNow] seconds remain)" {} {}
          187  +}
          188  +
          189  +finish_test

Added test/sortfault.test.

            1  +# 2014 March 25.
            2  +#
            3  +# The author disclaims copyright to this source code.  In place of
            4  +# a legal notice, here is a blessing:
            5  +#
            6  +#    May you do good and not evil.
            7  +#    May you find forgiveness for yourself and forgive others.
            8  +#    May you share freely, never taking more than you give.
            9  +#
           10  +#***********************************************************************
           11  +# This file implements regression tests for SQLite library. 
           12  +#
           13  +# Specifically, it tests the effects of fault injection on the sorter
           14  +# module (code in vdbesort.c).
           15  +#
           16  +
           17  +set testdir [file dirname $argv0]
           18  +source $testdir/tester.tcl
           19  +set testprefix sortfault
           20  +
           21  +do_execsql_test 1.0 {
           22  +  PRAGMA cache_size = 5;
           23  +}
           24  +
           25  +foreach {tn mmap_limit nWorker tmpstore threadsmode fakeheap lookaside} {
           26  +          1          0       0     file multithread    false     false
           27  +          2     100000       0     file multithread    false     false
           28  +          3     100000       1     file multithread    false     false
           29  +          4    2000000       0     file singlethread   false      true
           30  +} {
           31  +  if {$sqlite_options(threadsafe)} { set threadsmode singlethread }
           32  +
           33  +  db eval "PRAGMA threads=$nWorker"
           34  +  sqlite3_config $threadsmode
           35  +  if { $lookaside } {
           36  +    sqlite3_config_lookaside 100 500
           37  +  } else {
           38  +    sqlite3_config_lookaside 0 0
           39  +  }
           40  +  sqlite3_initialize
           41  +  sorter_test_fakeheap $fakeheap
           42  +
           43  +  set str [string repeat a 1000]
           44  +  puts $threadsmode
           45  +
           46  +  do_faultsim_test 1.$tn -prep {
           47  +    sqlite3 db test.db
           48  +    sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $::mmap_limit
           49  +    execsql { PRAGMA cache_size = 5 }
           50  +  } -body {
           51  +    execsql { 
           52  +      WITH r(x,y) AS (
           53  +          SELECT 1, $::str
           54  +          UNION ALL
           55  +          SELECT x+1, $::str FROM r
           56  +          LIMIT 200
           57  +      )
           58  +      SELECT count(x), length(y) FROM r GROUP BY (x%5)
           59  +    }
           60  +  } -test {
           61  +    faultsim_test_result {0 {40 1000 40 1000 40 1000 40 1000 40 1000}}
           62  +  }
           63  +
           64  +  do_faultsim_test 2.$tn -faults oom* -prep {
           65  +    sqlite3 db test.db
           66  +    sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $::mmap_limit
           67  +    add_test_utf16bin_collate db
           68  +    execsql { PRAGMA cache_size = 5 }
           69  +  } -body {
           70  +    execsql { 
           71  +      WITH r(x,y) AS (
           72  +          SELECT 100, $::str
           73  +          UNION ALL
           74  +          SELECT x-1, $::str FROM r
           75  +          LIMIT 100
           76  +      )
           77  +      SELECT count(x), length(y) FROM r GROUP BY y COLLATE utf16bin, (x%5)
           78  +    }
           79  +  } -test {
           80  +    faultsim_test_result {0 {20 1000 20 1000 20 1000 20 1000 20 1000}}
           81  +  }
           82  +
           83  +  if {$mmap_limit > 1000000} {
           84  +    set str2 [string repeat $str 10]
           85  +
           86  +    sqlite3_memdebug_vfs_oom_test 0
           87  +    sqlite3 db test.db
           88  +    sqlite3_test_control SQLITE_TESTCTRL_SORTER_MMAP db $::mmap_limit
           89  +    execsql { PRAGMA cache_size = 5 }
           90  +
           91  +    do_faultsim_test 3.$tn -faults oom-trans* -body {
           92  +      execsql { 
           93  +        WITH r(x,y) AS (
           94  +            SELECT 300, $::str2
           95  +            UNION ALL
           96  +            SELECT x-1, $::str2 FROM r
           97  +            LIMIT 300
           98  +        )
           99  +        SELECT count(x), length(y) FROM r GROUP BY y, (x%5)
          100  +      }
          101  +    } -test {
          102  +      faultsim_test_result {0 {60 10000 60 10000 60 10000 60 10000 60 10000}}
          103  +    }
          104  +
          105  +    sqlite3_memdebug_vfs_oom_test 1
          106  +  }
          107  +}
          108  +
          109  +catch { db close }
          110  +sqlite3_shutdown
          111  +set t(0) singlethread
          112  +set t(1) multithread
          113  +set t(2) serialized
          114  +sqlite3_config $t($sqlite_options(threadsafe))
          115  +sqlite3_config_lookaside 100 500
          116  +sqlite3_initialize
          117  +
          118  +#-------------------------------------------------------------------------
          119  +#
          120  +reset_db
          121  +do_execsql_test 4.0 { 
          122  +  CREATE TABLE t1(a, b, c); 
          123  +  INSERT INTO t1 VALUES(1, 2, 3);
          124  +}
          125  +do_test 4.1 { 
          126  +  for {set i 0} {$i < 256} {incr i} {
          127  +    execsql { 
          128  +      INSERT INTO t1 SELECT
          129  +        ((a<<3) + b) & 2147483647,
          130  +        ((b<<3) + c) & 2147483647,
          131  +        ((c<<3) + a) & 2147483647
          132  +      FROM t1 ORDER BY rowid DESC LIMIT 1;
          133  +    }
          134  +  }
          135  +} {}
          136  +
          137  +faultsim_save_and_close
          138  +
          139  +do_faultsim_test 4.2 -faults oom* -prep {
          140  +  faultsim_restore_and_reopen
          141  +} -body {
          142  +  execsql { CREATE UNIQUE INDEX i1 ON t1(a,b,c) }
          143  +} -test {
          144  +  faultsim_test_result {0 {}}
          145  +}
          146  +
          147  +#-------------------------------------------------------------------------
          148  +#
          149  +reset_db
          150  +set a [string repeat a 500]
          151  +set b [string repeat b 500]
          152  +set c [string repeat c 500]
          153  +do_execsql_test 5.0 { 
          154  +  CREATE TABLE t1(a, b, c); 
          155  +  INSERT INTO t1 VALUES($a, $b, $c); 
          156  +  INSERT INTO t1 VALUES($c, $b, $a); 
          157  +}
          158  +
          159  +do_faultsim_test 5.1 -faults oom* -body {
          160  +  execsql { SELECT * FROM t1 ORDER BY a }
          161  +} -test {
          162  +  faultsim_test_result [list 0 [list $::a $::b $::c $::c $::b $::a]]
          163  +}
          164  +
          165  +finish_test

Changes to test/speedtest1.c.

    23     23     "  --reprepare         Reprepare each statement upon every invocation\n"
    24     24     "  --scratch N SZ      Configure scratch memory for N slots of SZ bytes each\n"
    25     25     "  --sqlonly           No-op.  Only show the SQL that would have been run.\n"
    26     26     "  --size N            Relative test size.  Default=100\n"
    27     27     "  --stats             Show statistics at the end\n"
    28     28     "  --testset T         Run test-set T\n"
    29     29     "  --trace             Turn on SQL tracing\n"
           30  +  "  --threads N         Use up to N threads for sorting\n"
    30     31     "  --utf16be           Set text encoding to UTF-16BE\n"
    31     32     "  --utf16le           Set text encoding to UTF-16LE\n"
    32     33     "  --verify            Run additional verification steps.\n"
    33     34     "  --without-rowid     Use WITHOUT ROWID where appropriate\n"
    34     35   ;
    35     36   
    36     37   
................................................................................
  1137   1138     const char *zKey = 0;         /* Encryption key */
  1138   1139     int nLook = 0, szLook = 0;    /* --lookaside configuration */
  1139   1140     int noSync = 0;               /* True for --nosync */
  1140   1141     int pageSize = 0;             /* Desired page size.  0 means default */
  1141   1142     int nPCache = 0, szPCache = 0;/* --pcache configuration */
  1142   1143     int nScratch = 0, szScratch=0;/* --scratch configuration */
  1143   1144     int showStats = 0;            /* True for --stats */
         1145  +  int nThread = 0;              /* --threads value */
  1144   1146     const char *zTSet = "main";   /* Which --testset torun */
  1145   1147     int doTrace = 0;              /* True for --trace */
  1146   1148     const char *zEncoding = 0;    /* --utf16be or --utf16le */
  1147   1149     const char *zDbName = 0;      /* Name of the test database */
  1148   1150   
  1149   1151     void *pHeap = 0;              /* Allocated heap space */
  1150   1152     void *pLook = 0;              /* Allocated lookaside space */
................................................................................
  1221   1223         }else if( strcmp(z,"stats")==0 ){
  1222   1224           showStats = 1;
  1223   1225         }else if( strcmp(z,"testset")==0 ){
  1224   1226           if( i>=argc-1 ) fatal_error("missing argument on %s\n", argv[i]);
  1225   1227           zTSet = argv[++i];
  1226   1228         }else if( strcmp(z,"trace")==0 ){
  1227   1229           doTrace = 1;
         1230  +      }else if( strcmp(z,"threads")==0 ){
         1231  +        if( i>=argc-1 ) fatal_error("missing argument on %s\n", argv[i]);
         1232  +        nThread = integerValue(argv[++i]);
  1228   1233         }else if( strcmp(z,"utf16le")==0 ){
  1229   1234           zEncoding = "utf16le";
  1230   1235         }else if( strcmp(z,"utf16be")==0 ){
  1231   1236           zEncoding = "utf16be";
  1232   1237         }else if( strcmp(z,"verify")==0 ){
  1233   1238           g.bVerify = 1;
  1234   1239         }else if( strcmp(z,"without-rowid")==0 ){
................................................................................
  1286   1291       rc = sqlite3_db_config(g.db, SQLITE_DBCONFIG_LOOKASIDE, pLook, szLook,nLook);
  1287   1292       if( rc ) fatal_error("lookaside configuration failed: %d\n", rc);
  1288   1293     }
  1289   1294   
  1290   1295     /* Set database connection options */
  1291   1296     sqlite3_create_function(g.db, "random", 0, SQLITE_UTF8, 0, randomFunc, 0, 0);
  1292   1297     if( doTrace ) sqlite3_trace(g.db, traceCallback, 0);
         1298  +  speedtest1_exec("PRAGMA threads=%d", nThread);
  1293   1299     if( zKey ){
  1294   1300       speedtest1_exec("PRAGMA key('%s')", zKey);
  1295   1301     }
  1296   1302     if( zEncoding ){
  1297   1303       speedtest1_exec("PRAGMA encoding=%s", zEncoding);
  1298   1304     }
  1299   1305     if( doAutovac ){

Changes to test/tester.tcl.

  1102   1102       set G ""
  1103   1103       set B ""
  1104   1104       set D ""
  1105   1105     }
  1106   1106     foreach opcode {
  1107   1107         Seek SeekGe SeekGt SeekLe SeekLt NotFound Last Rewind
  1108   1108         NoConflict Next Prev VNext VPrev VFilter
         1109  +      SorterSort SorterNext
  1109   1110     } {
  1110   1111       set color($opcode) $B
  1111   1112     }
  1112   1113     foreach opcode {ResultRow} {
  1113   1114       set color($opcode) $G
  1114   1115     }
  1115   1116     foreach opcode {IdxInsert Insert Delete IdxDelete} {
................................................................................
  1124   1125       if {$opcode == "Goto" && ($bSeenGoto==0 || ($p2 > $addr+10))} {
  1125   1126         set linebreak($p2) 1
  1126   1127         set bSeenGoto 1
  1127   1128       }
  1128   1129   
  1129   1130       if {$opcode=="Next"  || $opcode=="Prev" 
  1130   1131        || $opcode=="VNext" || $opcode=="VPrev"
         1132  +     || $opcode=="SorterNext"
  1131   1133       } {
  1132   1134         for {set i $p2} {$i<$addr} {incr i} {
  1133   1135           incr x($i) 2
  1134   1136         }
  1135   1137       }
  1136   1138   
  1137   1139       if {$opcode == "Goto" && $p2<$addr && $op($p2)=="Yield"} {

Changes to test/whereJ.test.

   366    366        AND t0b.id=2
   367    367        AND t1b.id BETWEEN t0b.minChild AND t0b.maxChild
   368    368        AND t2b.id BETWEEN t1b.minChild AND t1b.maxChild
   369    369        AND t3b.id BETWEEN t2b.minChild AND t2b.maxChild
   370    370        AND t4.id BETWEEN t3a.minChild AND t3b.maxChild
   371    371     ORDER BY t4.x;
   372    372   } {~/SCAN/}
          373  +
          374  +############################################################################
          375  +
          376  +ifcapable stat4 {
          377  +  # Create and populate table.
          378  +  do_execsql_test 3.1 { CREATE TABLE t1(a, b, c) }
          379  +  for {set i 0} {$i < 32} {incr i 2} {
          380  +    for {set x 0} {$x < 100} {incr x} {
          381  +      execsql { INSERT INTO t1 VALUES($i, $x, $c) }
          382  +      incr c
          383  +    }
          384  +    execsql { INSERT INTO t1 VALUES($i+1, 5, $c) }
          385  +    incr c
          386  +  }
          387  +  
          388  +  do_execsql_test 3.2 {
          389  +    SELECT a, count(*) FROM t1 GROUP BY a HAVING a < 8;
          390  +  } {
          391  +    0 100 1 1 2 100 3 1 4 100 5 1 6 100 7 1
          392  +  }
          393  +  
          394  +  do_execsql_test 3.3 {
          395  +    CREATE INDEX idx_ab ON t1(a, b);
          396  +    CREATE INDEX idx_c ON t1(c);
          397  +    ANALYZE;
          398  +  } {}
          399  +  
          400  +  # This one should use index "idx_c".
          401  +  do_eqp_test 3.4 {
          402  +    SELECT * FROM t1 WHERE 
          403  +      a = 4 AND b BETWEEN 20 AND 80           -- Matches 80 rows
          404  +        AND
          405  +      c BETWEEN 150 AND 160                   -- Matches 10 rows
          406  +  } {
          407  +    0 0 0 {SEARCH TABLE t1 USING INDEX idx_c (c>? AND c<?)}
          408  +  }
          409  +  
          410  +  # This one should use index "idx_ab".
          411  +  do_eqp_test 3.5 {
          412  +    SELECT * FROM t1 WHERE 
          413  +      a = 5 AND b BETWEEN 20 AND 80           -- Matches 1 row
          414  +        AND
          415  +      c BETWEEN 150 AND 160                   -- Matches 10 rows
          416  +  } {
          417  +    0 0 0 {SEARCH TABLE t1 USING INDEX idx_ab (a=? AND b>? AND b<?)}
          418  +  }
          419  +}
   373    420   
   374    421   
   375    422   finish_test

Changes to tool/mkpragmatab.tcl.

   290    290     TYPE: HEXKEY
   291    291     IF:   defined(SQLITE_HAS_CODEC)
   292    292   
   293    293     NAME: activate_extensions
   294    294     IF:   defined(SQLITE_HAS_CODEC) || defined(SQLITE_ENABLE_CEROD)
   295    295   
   296    296     NAME: soft_heap_limit
          297  +
          298  +  NAME: threads
   297    299   }
   298    300   fconfigure stdout -translation lf
   299    301   set name {}
   300    302   set type {}
   301    303   set if {}
   302    304   set flags {}
   303    305   set arg 0

Changes to tool/mksqlite3c-noext.tcl.

   235    235      mutex.c
   236    236      mutex_noop.c
   237    237      mutex_unix.c
   238    238      mutex_w32.c
   239    239      malloc.c
   240    240      printf.c
   241    241      random.c
          242  +   threads.c
   242    243      utf.c
   243    244      util.c
   244    245      hash.c
   245    246      opcodes.c
   246    247   
   247    248      os_unix.c
   248    249      os_win.c

Changes to tool/mksqlite3c.tcl.

   250    250      mutex.c
   251    251      mutex_noop.c
   252    252      mutex_unix.c
   253    253      mutex_w32.c
   254    254      malloc.c
   255    255      printf.c
   256    256      random.c
          257  +   threads.c
   257    258      utf.c
   258    259      util.c
   259    260      hash.c
   260    261      opcodes.c
   261    262   
   262    263      os_unix.c
   263    264      os_win.c