Index: src/btree.c ================================================================== --- src/btree.c +++ src/btree.c @@ -1732,26 +1732,15 @@ assert( (flags & BTREE_UNORDERED)==0 || (flags & BTREE_SINGLE)!=0 ); /* A BTREE_SINGLE database is always a temporary and/or ephemeral */ assert( (flags & BTREE_SINGLE)==0 || isTempDb ); - /* The BTREE_SORTER flag is only used if SQLITE_OMIT_MERGE_SORT is undef */ -#ifdef SQLITE_OMIT_MERGE_SORT - assert( (flags & BTREE_SORTER)==0 ); -#endif - - /* BTREE_SORTER is always on a BTREE_SINGLE, BTREE_OMIT_JOURNAL */ - assert( (flags & BTREE_SORTER)==0 || - (flags & (BTREE_SINGLE|BTREE_OMIT_JOURNAL)) - ==(BTREE_SINGLE|BTREE_OMIT_JOURNAL) ); - if( db->flags & SQLITE_NoReadlock ){ flags |= BTREE_NO_READLOCK; } if( isMemdb ){ flags |= BTREE_MEMORY; - flags &= ~BTREE_SORTER; } if( (vfsFlags & SQLITE_OPEN_MAIN_DB)!=0 && (isMemdb || isTempDb) ){ vfsFlags = (vfsFlags & ~SQLITE_OPEN_MAIN_DB) | SQLITE_OPEN_TEMP_DB; } p = sqlite3MallocZero(sizeof(Btree)); @@ -7294,20 +7283,13 @@ releasePage(pPage); } return rc; } int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){ - BtShared *pBt = p->pBt; int rc; sqlite3BtreeEnter(p); - if( (pBt->openFlags&BTREE_SINGLE) ){ - pBt->nPage = 0; - sqlite3PagerTruncateImage(pBt->pPager, 1); - rc = newDatabase(pBt); - }else{ - rc = btreeDropTable(p, iTable, piMoved); - } + rc = btreeDropTable(p, iTable, piMoved); sqlite3BtreeLeave(p); return rc; } Index: src/btree.h ================================================================== --- src/btree.h +++ src/btree.h @@ -59,11 +59,10 @@ #define BTREE_OMIT_JOURNAL 1 /* Do not create or use a rollback journal */ #define BTREE_NO_READLOCK 2 /* Omit readlocks on readonly files */ #define BTREE_MEMORY 4 /* This is an in-memory DB */ #define BTREE_SINGLE 8 /* The file contains at most 1 b-tree */ #define BTREE_UNORDERED 16 /* Use of a hash implementation is OK */ -#define BTREE_SORTER 32 /* Used as accumulator in external merge sort */ int sqlite3BtreeClose(Btree*); int sqlite3BtreeSetCacheSize(Btree*,int); int sqlite3BtreeSetSafetyLevel(Btree*,int,int,int); int sqlite3BtreeSyncDisabled(Btree*); Index: src/build.c ================================================================== --- src/build.c +++ src/build.c @@ -2324,27 +2324,19 @@ Table *pTab = pIndex->pTable; /* The table that is indexed */ int iTab = pParse->nTab++; /* Btree cursor used for pTab */ int iIdx = pParse->nTab++; /* Btree cursor used for pIndex */ int iSorter = iTab; /* Cursor opened by OpenSorter (if in use) */ int addr1; /* Address of top of loop */ + int addr2; /* Address to jump to for next iteration */ int tnum; /* Root page of index */ Vdbe *v; /* Generate code into this virtual machine */ KeyInfo *pKey; /* KeyInfo for index */ int regIdxKey; /* Registers containing the index key */ int regRecord; /* Register holding assemblied index record */ sqlite3 *db = pParse->db; /* The database connection */ int iDb = sqlite3SchemaToIndex(db, pIndex->pSchema); - /* Set bUseSorter to use OP_OpenSorter, or clear it to insert directly - ** into the index. The sorter is used unless either OMIT_MERGE_SORT is - ** defined or the system is configured to store temp files in-memory. */ -#ifdef SQLITE_OMIT_MERGE_SORT - static const int bUseSorter = 0; -#else - const int bUseSorter = !sqlite3TempInMemory(pParse->db); -#endif - #ifndef SQLITE_OMIT_AUTHORIZATION if( sqlite3AuthCheck(pParse, SQLITE_REINDEX, pIndex->zName, 0, db->aDb[iDb].zName ) ){ return; } @@ -2366,32 +2358,44 @@ (char *)pKey, P4_KEYINFO_HANDOFF); if( memRootPage>=0 ){ sqlite3VdbeChangeP5(v, 1); } +#ifndef SQLITE_OMIT_MERGE_SORT /* Open the sorter cursor if we are to use one. */ - if( bUseSorter ){ - iSorter = pParse->nTab++; - sqlite3VdbeAddOp4(v, OP_OpenSorter, iSorter, 0, 0, (char*)pKey, P4_KEYINFO); - sqlite3VdbeChangeP5(v, BTREE_SORTER); - } + iSorter = pParse->nTab++; + sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, 0, (char*)pKey, P4_KEYINFO); +#endif /* Open the table. Loop through all rows of the table, inserting index ** records into the sorter. */ sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead); addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0); + addr2 = addr1 + 1; regRecord = sqlite3GetTempReg(pParse); regIdxKey = sqlite3GenerateIndexKey(pParse, pIndex, iTab, regRecord, 1); - if( bUseSorter ){ - sqlite3VdbeAddOp2(v, OP_IdxInsert, iSorter, regRecord); - sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1); - sqlite3VdbeJumpHere(v, addr1); - addr1 = sqlite3VdbeAddOp2(v, OP_Sort, iSorter, 0); - sqlite3VdbeAddOp2(v, OP_RowKey, iSorter, regRecord); - } - +#ifndef SQLITE_OMIT_MERGE_SORT + sqlite3VdbeAddOp2(v, OP_SorterInsert, iSorter, regRecord); + sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1); + sqlite3VdbeJumpHere(v, addr1); + addr1 = sqlite3VdbeAddOp2(v, OP_SorterSort, iSorter, 0); + if( pIndex->onError!=OE_None ){ + int j2 = sqlite3VdbeCurrentAddr(v) + 3; + sqlite3VdbeAddOp2(v, OP_Goto, 0, j2); + addr2 = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp3(v, OP_SorterCompare, iSorter, j2, regRecord); + sqlite3HaltConstraint( + pParse, OE_Abort, "indexed columns are not unique", P4_STATIC + ); + }else{ + addr2 = sqlite3VdbeCurrentAddr(v); + } + sqlite3VdbeAddOp2(v, OP_SorterData, iSorter, regRecord); + sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 1); + sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); +#else if( pIndex->onError!=OE_None ){ const int regRowid = regIdxKey + pIndex->nColumn; const int j2 = sqlite3VdbeCurrentAddr(v) + 2; void * const pRegKey = SQLITE_INT_TO_PTR(regIdxKey); @@ -2406,14 +2410,15 @@ */ sqlite3VdbeAddOp4(v, OP_IsUnique, iIdx, j2, regRowid, pRegKey, P4_INT32); sqlite3HaltConstraint( pParse, OE_Abort, "indexed columns are not unique", P4_STATIC); } - sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, bUseSorter); + sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 0); sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); +#endif sqlite3ReleaseTempReg(pParse, regRecord); - sqlite3VdbeAddOp2(v, OP_Next, iSorter, addr1+1); + sqlite3VdbeAddOp2(v, OP_SorterNext, iSorter, addr2); sqlite3VdbeJumpHere(v, addr1); sqlite3VdbeAddOp1(v, OP_Close, iTab); sqlite3VdbeAddOp1(v, OP_Close, iIdx); sqlite3VdbeAddOp1(v, OP_Close, iSorter); Index: src/expr.c ================================================================== --- src/expr.c +++ src/expr.c @@ -2285,11 +2285,11 @@ if( !pAggInfo->directMode ){ assert( pCol->iMem>0 ); inReg = pCol->iMem; break; }else if( pAggInfo->useSortingIdx ){ - sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdx, + sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdxPTab, pCol->iSorterColumn, target); break; } /* Otherwise, fall thru into the TK_COLUMN case */ } Index: src/pager.c ================================================================== --- src/pager.c +++ src/pager.c @@ -619,11 +619,10 @@ u8 syncFlags; /* SYNC_NORMAL or SYNC_FULL otherwise */ u8 tempFile; /* zFilename is a temporary file */ u8 readOnly; /* True for a read-only database */ u8 memDb; /* True to inhibit all file I/O */ u8 hasSeenStress; /* pagerStress() called one or more times */ - u8 isSorter; /* True for a PAGER_SORTER */ /************************************************************************** ** The following block contains those class members that change during ** routine opertion. Class members not in this block are either fixed ** when the pager is first created or else only change when there is a @@ -843,19 +842,10 @@ ); assert( p->eState!=PAGER_ERROR && p->eState!=PAGER_OPEN ); assert( pagerUseWal(p)==0 ); } - /* A sorter is a temp file that never spills to disk and always has - ** the doNotSpill flag set - */ - if( p->isSorter ){ - assert( p->tempFile ); - assert( p->doNotSpill ); - assert( p->fd->pMethods==0 ); - } - /* If changeCountDone is set, a RESERVED lock or greater must be held ** on the file. */ assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK ); assert( p->eLock!=PENDING_LOCK ); @@ -4555,16 +4545,10 @@ } /* pPager->xBusyHandler = 0; */ /* pPager->pBusyHandlerArg = 0; */ pPager->xReiniter = xReinit; /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */ -#ifndef SQLITE_OMIT_MERGE_SORT - if( flags & PAGER_SORTER ){ - pPager->doNotSpill = 1; - pPager->isSorter = 1; - } -#endif *ppPager = pPager; return SQLITE_OK; } @@ -6105,21 +6089,10 @@ */ int sqlite3PagerIsMemdb(Pager *pPager){ return MEMDB; } -#ifndef SQLITE_OMIT_MERGE_SORT -/* -** Return true if the pager has seen a pagerStress callback. -*/ -int sqlite3PagerUnderStress(Pager *pPager){ - assert( pPager->isSorter ); - assert( pPager->doNotSpill ); - return pPager->hasSeenStress; -} -#endif - /* ** Check that there are at least nSavepoint savepoints open. If there are ** currently less than nSavepoints open, then open one or more savepoints ** to make up the difference. If the number of savepoints is already ** equal to nSavepoint, then this function is a no-op. Index: src/pager.h ================================================================== --- src/pager.h +++ src/pager.h @@ -154,13 +154,10 @@ sqlite3_file *sqlite3PagerFile(Pager*); const char *sqlite3PagerJournalname(Pager*); int sqlite3PagerNosync(Pager*); void *sqlite3PagerTempSpace(Pager*); int sqlite3PagerIsMemdb(Pager*); -#ifndef SQLITE_OMIT_MERGE_SORT -int sqlite3PagerUnderStress(Pager*); -#endif /* Functions used to truncate the database file. */ void sqlite3PagerTruncateImage(Pager*,Pgno); #if defined(SQLITE_HAS_CODEC) && !defined(SQLITE_OMIT_WAL) Index: src/select.c ================================================================== --- src/select.c +++ src/select.c @@ -417,16 +417,22 @@ ){ Vdbe *v = pParse->pVdbe; int nExpr = pOrderBy->nExpr; int regBase = sqlite3GetTempRange(pParse, nExpr+2); int regRecord = sqlite3GetTempReg(pParse); + int op; sqlite3ExprCacheClear(pParse); sqlite3ExprCodeExprList(pParse, pOrderBy, regBase, 0); sqlite3VdbeAddOp2(v, OP_Sequence, pOrderBy->iECursor, regBase+nExpr); sqlite3ExprCodeMove(pParse, regData, regBase+nExpr+1, 1); sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nExpr + 2, regRecord); - sqlite3VdbeAddOp2(v, OP_IdxInsert, pOrderBy->iECursor, regRecord); + if( pSelect->selFlags & SF_UseSorter ){ + op = OP_SorterInsert; + }else{ + op = OP_IdxInsert; + } + sqlite3VdbeAddOp2(v, op, pOrderBy->iECursor, regRecord); sqlite3ReleaseTempReg(pParse, regRecord); sqlite3ReleaseTempRange(pParse, regBase, nExpr+2); if( pSelect->iLimit ){ int addr1, addr2; int iLimit; @@ -891,13 +897,24 @@ sqlite3VdbeAddOp3(v, OP_OpenPseudo, pseudoTab, regRow, nColumn); regRowid = 0; }else{ regRowid = sqlite3GetTempReg(pParse); } - addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak); - codeOffset(v, p, addrContinue); - sqlite3VdbeAddOp3(v, OP_Column, iTab, pOrderBy->nExpr + 1, regRow); + if( p->selFlags & SF_UseSorter ){ + int regSortOut = sqlite3GetTempReg(pParse); + int ptab2 = pParse->nTab++; + sqlite3VdbeAddOp3(v, OP_OpenPseudo, ptab2, regSortOut, pOrderBy->nExpr+2); + addr = 1 + sqlite3VdbeAddOp2(v, OP_SorterSort, iTab, addrBreak); + codeOffset(v, p, addrContinue); + sqlite3VdbeAddOp2(v, OP_SorterData, iTab, regSortOut); + sqlite3VdbeAddOp3(v, OP_Column, ptab2, pOrderBy->nExpr+1, regRow); + sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE); + }else{ + addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak); + codeOffset(v, p, addrContinue); + sqlite3VdbeAddOp3(v, OP_Column, iTab, pOrderBy->nExpr+1, regRow); + } switch( eDest ){ case SRT_Table: case SRT_EphemTab: { testcase( eDest==SRT_Table ); testcase( eDest==SRT_EphemTab ); @@ -946,11 +963,15 @@ sqlite3ReleaseTempReg(pParse, regRowid); /* The bottom of the loop */ sqlite3VdbeResolveLabel(v, addrContinue); - sqlite3VdbeAddOp2(v, OP_Next, iTab, addr); + if( p->selFlags & SF_UseSorter ){ + sqlite3VdbeAddOp2(v, OP_SorterNext, iTab, addr); + }else{ + sqlite3VdbeAddOp2(v, OP_Next, iTab, addr); + } sqlite3VdbeResolveLabel(v, addrBreak); if( eDest==SRT_Output || eDest==SRT_Coroutine ){ sqlite3VdbeAddOp2(v, OP_Close, pseudoTab, 0); } } @@ -3912,10 +3933,14 @@ /* Set the limiter. */ iEnd = sqlite3VdbeMakeLabel(v); p->nSelectRow = (double)LARGEST_INT64; computeLimitRegisters(pParse, p, iEnd); + if( p->iLimit==0 && addrSortIndex>=0 ){ + sqlite3VdbeGetOp(v, addrSortIndex)->opcode = OP_SorterOpen; + p->selFlags |= SF_UseSorter; + } /* Open a virtual index to use for the distinct set. */ if( p->selFlags & SF_Distinct ){ KeyInfo *pKeyInfo; @@ -4006,10 +4031,12 @@ ** one row of the input to the aggregator has been ** processed */ int iAbortFlag; /* Mem address which causes query abort if positive */ int groupBySort; /* Rows come from source in GROUP BY order */ int addrEnd; /* End of processing for this SELECT */ + int sortPTab = 0; /* Pseudotable used to decode sorting results */ + int sortOut = 0; /* Output register from the sorter */ /* Remove any and all aliases between the result set and the ** GROUP BY clause. */ if( pGroupBy ){ @@ -4067,16 +4094,16 @@ int addrReset; /* Subroutine for resetting the accumulator */ int regReset; /* Return address register for reset subroutine */ /* If there is a GROUP BY clause we might need a sorting index to ** implement it. Allocate that sorting index now. If it turns out - ** that we do not need it after all, the OpenEphemeral instruction + ** that we do not need it after all, the OP_SorterOpen instruction ** will be converted into a Noop. */ sAggInfo.sortingIdx = pParse->nTab++; pKeyInfo = keyInfoFromExprList(pParse, pGroupBy); - addrSortingIdx = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, + addrSortingIdx = sqlite3VdbeAddOp4(v, OP_SorterOpen, sAggInfo.sortingIdx, sAggInfo.nSortingColumn, 0, (char*)pKeyInfo, P4_KEYINFO_HANDOFF); /* Initialize memory locations used by GROUP BY aggregate processing */ @@ -4153,15 +4180,18 @@ j++; } } regRecord = sqlite3GetTempReg(pParse); sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nCol, regRecord); - sqlite3VdbeAddOp2(v, OP_IdxInsert, sAggInfo.sortingIdx, regRecord); + sqlite3VdbeAddOp2(v, OP_SorterInsert, sAggInfo.sortingIdx, regRecord); sqlite3ReleaseTempReg(pParse, regRecord); sqlite3ReleaseTempRange(pParse, regBase, nCol); sqlite3WhereEnd(pWInfo); - sqlite3VdbeAddOp2(v, OP_Sort, sAggInfo.sortingIdx, addrEnd); + sAggInfo.sortingIdxPTab = sortPTab = pParse->nTab++; + sortOut = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_OpenPseudo, sortPTab, sortOut, nCol); + sqlite3VdbeAddOp2(v, OP_SorterSort, sAggInfo.sortingIdx, addrEnd); VdbeComment((v, "GROUP BY sort")); sAggInfo.useSortingIdx = 1; sqlite3ExprCacheClear(pParse); } @@ -4170,13 +4200,17 @@ ** Then compare the current GROUP BY terms against the GROUP BY terms ** from the previous row currently stored in a0, a1, a2... */ addrTopOfLoop = sqlite3VdbeCurrentAddr(v); sqlite3ExprCacheClear(pParse); + if( groupBySort ){ + sqlite3VdbeAddOp2(v, OP_SorterData, sAggInfo.sortingIdx, sortOut); + } for(j=0; jnExpr; j++){ if( groupBySort ){ - sqlite3VdbeAddOp3(v, OP_Column, sAggInfo.sortingIdx, j, iBMem+j); + sqlite3VdbeAddOp3(v, OP_Column, sortPTab, j, iBMem+j); + if( j==0 ) sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE); }else{ sAggInfo.directMode = 1; sqlite3ExprCode(pParse, pGroupBy->a[j].pExpr, iBMem+j); } } @@ -4211,11 +4245,11 @@ VdbeComment((v, "indicate data in accumulator")); /* End of the loop */ if( groupBySort ){ - sqlite3VdbeAddOp2(v, OP_Next, sAggInfo.sortingIdx, addrTopOfLoop); + sqlite3VdbeAddOp2(v, OP_SorterNext, sAggInfo.sortingIdx, addrTopOfLoop); }else{ sqlite3WhereEnd(pWInfo); sqlite3VdbeChangeToNoop(v, addrSortingIdx, 1); } Index: src/sqliteInt.h ================================================================== --- src/sqliteInt.h +++ src/sqliteInt.h @@ -370,18 +370,10 @@ */ #ifndef SQLITE_TEMP_STORE # define SQLITE_TEMP_STORE 1 #endif -/* -** If all temporary storage is in-memory, then omit the external merge-sort -** logic since it is superfluous. -*/ -#if SQLITE_TEMP_STORE==3 && !defined(SQLITE_OMIT_MERGE_SORT) -# define SQLITE_OMIT_MERGE_SORT -#endif - /* ** GCC does not define the offsetof() macro so we'll have to do it ** ourselves. */ #ifndef offsetof @@ -1548,10 +1540,11 @@ u8 directMode; /* Direct rendering mode means take data directly ** from source tables rather than from accumulators */ u8 useSortingIdx; /* In direct mode, reference the sorting index rather ** than the source table */ int sortingIdx; /* Cursor number of the sorting index */ + int sortingIdxPTab; /* Cursor number of pseudo-table */ ExprList *pGroupBy; /* The group by clause */ int nSortingColumn; /* Number of columns in the sorting index */ struct AggInfo_col { /* For each column used in source tables */ Table *pTab; /* Source table */ int iTable; /* Cursor number of the source table */ @@ -2080,10 +2073,11 @@ #define SF_Resolved 0x0002 /* Identifiers have been resolved */ #define SF_Aggregate 0x0004 /* Contains aggregate functions */ #define SF_UsesEphemeral 0x0008 /* Uses the OpenEphemeral opcode */ #define SF_Expanded 0x0010 /* sqlite3SelectExpand() called on this */ #define SF_HasTypeInfo 0x0020 /* FROM subqueries have Table metadata */ +#define SF_UseSorter 0x0040 /* Sort using a sorter */ /* ** The results of a select can be distributed in several ways. The ** "SRT" prefix means "SELECT Result Type". Index: src/vdbe.c ================================================================== --- src/vdbe.c +++ src/vdbe.c @@ -3160,17 +3160,10 @@ ** This opcode works the same as OP_OpenEphemeral. It has a ** different name to distinguish its use. Tables created using ** by this opcode will be used for automatically created transient ** indices in joins. */ -/* Opcode: OpenSorter P1 P2 * P4 * -** -** This opcode works like OP_OpenEphemeral except that it opens -** a transient index that is specifically designed to sort large -** tables using an external merge-sort algorithm. -*/ -case OP_OpenSorter: case OP_OpenAutoindex: case OP_OpenEphemeral: { VdbeCursor *pCx; static const int vfsFlags = SQLITE_OPEN_READWRITE | @@ -3178,11 +3171,10 @@ SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE | SQLITE_OPEN_TRANSIENT_DB; assert( pOp->p1>=0 ); - assert( (pOp->opcode==OP_OpenSorter)==((pOp->p5 & BTREE_SORTER)!=0) ); pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1); if( pCx==0 ) goto no_mem; pCx->nullRow = 1; rc = sqlite3BtreeOpen(db->pVfs, 0, db, &pCx->pBt, BTREE_OMIT_JOURNAL | BTREE_SINGLE | pOp->p5, vfsFlags); @@ -3212,14 +3204,31 @@ pCx->isTable = 1; } } pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED); pCx->isIndex = !pCx->isTable; + break; +} + +/* Opcode: OpenSorter P1 P2 * P4 * +** +** This opcode works like OP_OpenEphemeral except that it opens +** a transient index that is specifically designed to sort large +** tables using an external merge-sort algorithm. +*/ +case OP_SorterOpen: { + VdbeCursor *pCx; #ifndef SQLITE_OMIT_MERGE_SORT - if( rc==SQLITE_OK && pOp->opcode==OP_OpenSorter ){ - rc = sqlite3VdbeSorterInit(db, pCx); - } + pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1); + if( pCx==0 ) goto no_mem; + pCx->pKeyInfo = pOp->p4.pKeyInfo; + pCx->pKeyInfo->enc = ENC(p->db); + pCx->isSorter = 1; + rc = sqlite3VdbeSorterInit(db, pCx); +#else + pOp->opcode = OP_OpenEphemeral; + pc--; #endif break; } /* Opcode: OpenPseudo P1 P2 P3 * * @@ -4067,10 +4076,49 @@ case OP_ResetCount: { sqlite3VdbeSetChanges(db, p->nChange); p->nChange = 0; break; } + +/* Opcode: SorterCompare P1 P2 P3 +** +** P1 is a sorter cursor. This instruction compares the record blob in +** register P3 with the entry that the sorter cursor currently points to. +** If, excluding the rowid fields at the end, the two records are a match, +** fall through to the next instruction. Otherwise, jump to instruction P2. +*/ +case OP_SorterCompare: { + VdbeCursor *pC; + int res; + + pC = p->apCsr[pOp->p1]; + assert( isSorter(pC) ); + pIn3 = &aMem[pOp->p3]; + rc = sqlite3VdbeSorterCompare(pC, pIn3, &res); + if( res ){ + pc = pOp->p2-1; + } + break; +}; + +/* Opcode: SorterData P1 P2 * * * +** +** Write into register P2 the current sorter data for sorter cursor P1. +*/ +case OP_SorterData: { + VdbeCursor *pC; +#ifndef SQLITE_OMIT_MERGE_SORT + pOut = &aMem[pOp->p2]; + pC = p->apCsr[pOp->p1]; + assert( pC->isSorter ); + rc = sqlite3VdbeSorterRowkey(pC, pOut); +#else + pOp->opcode = OP_RowKey; + pc--; +#endif + break; +} /* Opcode: RowData P1 P2 * * * ** ** Write into register P2 the complete row data for cursor P1. ** There is no interpretation of the data. @@ -4101,22 +4149,17 @@ memAboutToChange(p, pOut); /* Note that RowKey and RowData are really exactly the same instruction */ assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; - assert( pC->isTable || pOp->opcode==OP_RowKey ); + assert( pC->isSorter==0 ); + assert( pC->isTable || pOp->opcode!=OP_RowData ); assert( pC->isIndex || pOp->opcode==OP_RowData ); assert( pC!=0 ); assert( pC->nullRow==0 ); assert( pC->pseudoTableReg==0 ); - - if( isSorter(pC) ){ - assert( pOp->opcode==OP_RowKey ); - rc = sqlite3VdbeSorterRowkey(pC, pOut); - break; - } - + assert( !pC->isSorter ); assert( pC->pCursor!=0 ); pCrsr = pC->pCursor; assert( sqlite3BtreeCursorIsValid(pCrsr) ); /* The OP_RowKey and OP_RowData opcodes always follow OP_NotExists or @@ -4269,10 +4312,14 @@ ** end. We use the OP_Sort opcode instead of OP_Rewind to do the ** rewinding so that the global variable will be incremented and ** regression tests can determine whether or not the optimizer is ** correctly optimizing out sorts. */ +case OP_SorterSort: /* jump */ +#ifdef SQLITE_OMIT_MERGE_SORT + pOp->opcode = OP_Sort; +#endif case OP_Sort: { /* jump */ #ifdef SQLITE_TEST sqlite3_sort_count++; sqlite3_search_count--; #endif @@ -4293,10 +4340,11 @@ int res; assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); + assert( pC->isSorter==(pOp->opcode==OP_SorterSort) ); res = 1; if( isSorter(pC) ){ rc = sqlite3VdbeSorterRewind(db, pC, &res); }else{ pCrsr = pC->pCursor; @@ -4345,10 +4393,14 @@ ** sqlite3BtreePrevious(). ** ** If P5 is positive and the jump is taken, then event counter ** number P5-1 in the prepared statement is incremented. */ +case OP_SorterNext: /* jump */ +#ifdef SQLITE_OMIT_MERGE_SORT + pOp->opcode = OP_Next; +#endif case OP_Prev: /* jump */ case OP_Next: { /* jump */ VdbeCursor *pC; int res; @@ -4357,12 +4409,13 @@ assert( pOp->p5<=ArraySize(p->aCounter) ); pC = p->apCsr[pOp->p1]; if( pC==0 ){ break; /* See ticket #2273 */ } + assert( pC->isSorter==(pOp->opcode==OP_SorterNext) ); if( isSorter(pC) ){ - assert( pOp->opcode==OP_Next ); + assert( pOp->opcode==OP_SorterNext ); rc = sqlite3VdbeSorterNext(db, pC, &res); }else{ res = 1; assert( pC->deferredMoveto==0 ); assert( pC->pCursor ); @@ -4393,36 +4446,42 @@ ** insert is likely to be an append. ** ** This instruction only works for indices. The equivalent instruction ** for tables is OP_Insert. */ +case OP_SorterInsert: /* in2 */ +#ifdef SQLITE_OMIT_MERGE_SORT + pOp->opcode = OP_IdxInsert; +#endif case OP_IdxInsert: { /* in2 */ VdbeCursor *pC; BtCursor *pCrsr; int nKey; const char *zKey; assert( pOp->p1>=0 && pOp->p1nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); + assert( pC->isSorter==(pOp->opcode==OP_SorterInsert) ); pIn2 = &aMem[pOp->p2]; assert( pIn2->flags & MEM_Blob ); pCrsr = pC->pCursor; if( ALWAYS(pCrsr!=0) ){ assert( pC->isTable==0 ); rc = ExpandBlob(pIn2); if( rc==SQLITE_OK ){ - nKey = pIn2->n; - zKey = pIn2->z; - rc = sqlite3VdbeSorterWrite(db, pC, nKey); - if( rc==SQLITE_OK ){ + if( isSorter(pC) ){ + rc = sqlite3VdbeSorterWrite(db, pC, pIn2); + }else{ + nKey = pIn2->n; + zKey = pIn2->z; rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) - ); + ); assert( pC->deferredMoveto==0 ); + pC->cacheStatus = CACHE_STALE; } - pC->cacheStatus = CACHE_STALE; } } break; } Index: src/vdbeInt.h ================================================================== --- src/vdbeInt.h +++ src/vdbeInt.h @@ -57,16 +57,17 @@ Bool nullRow; /* True if pointing to a row with no data */ Bool deferredMoveto; /* A call to sqlite3BtreeMoveto() is needed */ Bool isTable; /* True if a table requiring integer keys */ Bool isIndex; /* True if an index containing keys only - no data */ Bool isOrdered; /* True if the underlying table is BTREE_UNORDERED */ + Bool isSorter; /* True if a new-style sorter */ sqlite3_vtab_cursor *pVtabCursor; /* The cursor for a virtual table */ const sqlite3_module *pModule; /* Module for cursor pVtabCursor */ i64 seqCount; /* Sequence counter */ i64 movetoTarget; /* Argument to the deferred sqlite3BtreeMoveto() */ i64 lastRowid; /* Last rowid from a Next or NextIdx operation */ - VdbeSorter *pSorter; /* Sorter object for OP_OpenSorter cursors */ + VdbeSorter *pSorter; /* Sorter object for OP_SorterOpen cursors */ /* Result of last sqlite3BtreeMoveto() done by an OP_NotExists or ** OP_IsUnique opcode on this cursor. */ int seekResult; @@ -400,17 +401,19 @@ # define sqlite3VdbeSorterWrite(X,Y,Z) SQLITE_OK # define sqlite3VdbeSorterClose(Y,Z) # define sqlite3VdbeSorterRowkey(Y,Z) SQLITE_OK # define sqlite3VdbeSorterRewind(X,Y,Z) SQLITE_OK # define sqlite3VdbeSorterNext(X,Y,Z) SQLITE_OK +# define sqlite3VdbeSorterCompare(X,Y,Z) SQLITE_OK #else int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *); -int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, int); void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *); int sqlite3VdbeSorterRowkey(VdbeCursor *, Mem *); -int sqlite3VdbeSorterRewind(sqlite3 *, VdbeCursor *, int *); int sqlite3VdbeSorterNext(sqlite3 *, VdbeCursor *, int *); +int sqlite3VdbeSorterRewind(sqlite3 *, VdbeCursor *, int *); +int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, Mem *); +int sqlite3VdbeSorterCompare(VdbeCursor *, Mem *, int *); #endif #if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0 void sqlite3VdbeEnter(Vdbe*); void sqlite3VdbeLeave(Vdbe*); Index: src/vdbeaux.c ================================================================== --- src/vdbeaux.c +++ src/vdbeaux.c @@ -431,11 +431,11 @@ assert( p->nOp - i >= 3 ); assert( pOp[-1].opcode==OP_Integer ); n = pOp[-1].p1; if( n>nMaxArgs ) nMaxArgs = n; #endif - }else if( opcode==OP_Next ){ + }else if( opcode==OP_Next || opcode==OP_SorterNext ){ pOp->p4.xAdvance = sqlite3BtreeNext; pOp->p4type = P4_ADVANCE; }else if( opcode==OP_Prev ){ pOp->p4.xAdvance = sqlite3BtreePrevious; pOp->p4type = P4_ADVANCE; Index: src/vdbesort.c ================================================================== --- src/vdbesort.c +++ src/vdbesort.c @@ -19,10 +19,11 @@ #include "vdbeInt.h" #ifndef SQLITE_OMIT_MERGE_SORT typedef struct VdbeSorterIter VdbeSorterIter; +typedef struct SorterRecord SorterRecord; /* ** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES: ** ** As keys are added to the sorter, they are written to disk in a series @@ -90,19 +91,23 @@ ** In other words, each time we advance to the next sorter element, log2(N) ** key comparison operations are required, where N is the number of segments ** being merged (rounded up to the next power of 2). */ struct VdbeSorter { - int nWorking; /* Start a new b-tree after this many pages */ - int nBtree; /* Current size of b-tree contents as PMA */ + int nInMemory; /* Current size of pRecord list as PMA */ int nTree; /* Used size of aTree/aIter (power of 2) */ VdbeSorterIter *aIter; /* Array of iterators to merge */ int *aTree; /* Current state of incremental merge */ i64 iWriteOff; /* Current write offset within file pTemp1 */ i64 iReadOff; /* Current read offset within file pTemp1 */ sqlite3_file *pTemp1; /* PMA file 1 */ int nPMA; /* Number of PMAs stored in pTemp1 */ + SorterRecord *pRecord; /* Head of in-memory record list */ + int mnPmaSize; /* Minimum PMA size, in bytes */ + int mxPmaSize; /* Maximum PMA size, in bytes. 0==no limit */ + char *aSpace; /* Space for UnpackRecord() */ + int nSpace; /* Size of aSpace in bytes */ }; /* ** The following type is an iterator for a PMA. It caches the current key in ** variables nKey/aKey. If the iterator is at EOF, pFile==0. @@ -114,10 +119,21 @@ int nAlloc; /* Bytes of space at aAlloc */ u8 *aAlloc; /* Allocated space */ int nKey; /* Number of bytes in key */ u8 *aKey; /* Pointer to current key */ }; + +/* +** A structure to store a single record. All in-memory records are connected +** together into a linked list headed at VdbeSorter.pRecord using the +** SorterRecord.pNext pointer. +*/ +struct SorterRecord { + void *pVal; + int nVal; + SorterRecord *pNext; +}; /* Minimum allowable value for the VdbeSorter.nWorking variable */ #define SORTER_MIN_WORKING 10 /* Maximum number of segments to merge in a single pass. */ @@ -273,10 +289,74 @@ rc = vdbeSorterIterNext(db, pIter); } return rc; } + +/* +** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, +** size nKey2 bytes). Argument pKeyInfo supplies the collation functions +** used by the comparison. If an error occurs, return an SQLite error code. +** Otherwise, return SQLITE_OK and set *pRes to a negative, zero or positive +** value, depending on whether key1 is smaller, equal to or larger than key2. +** +** If the bOmitRowid argument is non-zero, assume both keys end in a rowid +** field. For the purposes of the comparison, ignore it. Also, if bOmitRowid +** is true and key1 contains even a single NULL value, it is considered to +** be less than key2. Even if key2 also contains NULL values. +** +** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace +** has been allocated and contains an unpacked record that is used as key2. +*/ +static int vdbeSorterCompare( + VdbeCursor *pCsr, /* Cursor object (for pKeyInfo) */ + int bOmitRowid, /* Ignore rowid field at end of keys */ + void *pKey1, int nKey1, /* Left side of comparison */ + void *pKey2, int nKey2, /* Right side of comparison */ + int *pRes /* OUT: Result of comparison */ +){ + KeyInfo *pKeyInfo = pCsr->pKeyInfo; + VdbeSorter *pSorter = pCsr->pSorter; + char *aSpace = pSorter->aSpace; + int nSpace = pSorter->nSpace; + UnpackedRecord *r2; + int i; + + if( aSpace==0 ){ + nSpace = ROUND8(sizeof(UnpackedRecord))+(pKeyInfo->nField+1)*sizeof(Mem); + aSpace = (char *)sqlite3Malloc(nSpace); + if( aSpace==0 ) return SQLITE_NOMEM; + pSorter->aSpace = aSpace; + pSorter->nSpace = nSpace; + } + + if( pKey2 ){ + /* This call cannot fail. As the memory is already allocated. */ + r2 = sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, aSpace, nSpace); + assert( r2 && (r2->flags & UNPACKED_NEED_FREE)==0 ); + assert( r2==aSpace ); + }else{ + r2 = (UnpackedRecord *)aSpace; + assert( !bOmitRowid ); + } + + if( bOmitRowid ){ + for(i=0; inField-1; i++){ + if( r2->aMem[i].flags & MEM_Null ){ + *pRes = -1; + return SQLITE_OK; + } + } + r2->flags |= UNPACKED_PREFIX_MATCH; + r2->nField--; + assert( r2->nField>0 ); + } + + *pRes = sqlite3VdbeRecordCompare(nKey1, pKey1, r2); + return SQLITE_OK; +} + /* ** This function is called to compare two iterator keys when merging ** multiple b-tree segments. Parameter iOut is the index of the aTree[] ** value to recalculate. */ @@ -304,24 +384,20 @@ if( p1->pFile==0 ){ iRes = i2; }else if( p2->pFile==0 ){ iRes = i1; }else{ - char aSpace[150]; - UnpackedRecord *r1; - - r1 = sqlite3VdbeRecordUnpack( - pCsr->pKeyInfo, p1->nKey, p1->aKey, aSpace, sizeof(aSpace) + int res; + int rc = vdbeSorterCompare( + pCsr, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res ); - if( r1==0 ) return SQLITE_NOMEM; - - if( sqlite3VdbeRecordCompare(p2->nKey, p2->aKey, r1)>=0 ){ + if( rc!=SQLITE_OK ) return rc; + if( res<=0 ){ iRes = i1; }else{ iRes = i2; } - sqlite3VdbeDeleteUnpackedRecord(r1); } pSorter->aTree[iOut] = iRes; return SQLITE_OK; } @@ -328,13 +404,41 @@ /* ** Initialize the temporary index cursor just opened as a sorter cursor. */ int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){ - assert( pCsr->pKeyInfo && pCsr->pBt ); - pCsr->pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter)); - return (pCsr->pSorter ? SQLITE_OK : SQLITE_NOMEM); + int pgsz; /* Page size of main database */ + int mxCache; /* Cache size */ + VdbeSorter *pSorter; /* The new sorter */ + + assert( pCsr->pKeyInfo && pCsr->pBt==0 ); + pCsr->pSorter = pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter)); + if( pSorter==0 ){ + return SQLITE_NOMEM; + } + + if( !sqlite3TempInMemory(db) ){ + pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt); + pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz; + mxCache = db->aDb[0].pSchema->cache_size; + if( mxCachemxPmaSize = mxCache * pgsz; + } + + return SQLITE_OK; +} + +/* +** Free the list of sorted records starting at pRecord. +*/ +static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){ + SorterRecord *p; + SorterRecord *pNext; + for(p=pRecord; p; p=pNext){ + pNext = p->pNext; + sqlite3DbFree(db, p); + } } /* ** Free any cursor components allocated by sqlite3VdbeSorterXXX routines. */ @@ -349,10 +453,12 @@ sqlite3DbFree(db, pSorter->aIter); } if( pSorter->pTemp1 ){ sqlite3OsCloseFree(pSorter->pTemp1); } + vdbeSorterRecordFree(db, pSorter->pRecord); + sqlite3_free(pSorter->aSpace); sqlite3DbFree(db, pSorter); pCsr->pSorter = 0; } } @@ -368,14 +474,107 @@ SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE, &dummy ); } +/* +** Attemp to merge the two sorted lists p1 and p2 into a single list. If no +** error occurs set *ppOut to the head of the new list and return SQLITE_OK. +*/ +static int vdbeSorterMerge( + sqlite3 *db, /* Database handle */ + VdbeCursor *pCsr, /* For pKeyInfo */ + SorterRecord *p1, /* First list to merge */ + SorterRecord *p2, /* Second list to merge */ + SorterRecord **ppOut /* OUT: Head of merged list */ +){ + int rc = SQLITE_OK; + SorterRecord *pFinal = 0; + SorterRecord **pp = &pFinal; + void *pVal2 = p2 ? p2->pVal : 0; + + while( p1 && p2 ){ + int res; + rc = vdbeSorterCompare(pCsr, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res); + if( rc!=SQLITE_OK ){ + *pp = 0; + vdbeSorterRecordFree(db, p1); + vdbeSorterRecordFree(db, p2); + vdbeSorterRecordFree(db, pFinal); + *ppOut = 0; + return rc; + } + if( res<=0 ){ + *pp = p1; + pp = &p1->pNext; + p1 = p1->pNext; + pVal2 = 0; + }else{ + *pp = p2; + pp = &p2->pNext; + p2 = p2->pNext; + if( p2==0 ) break; + pVal2 = p2->pVal; + } + } + *pp = p1 ? p1 : p2; + + *ppOut = pFinal; + return SQLITE_OK; +} + +/* +** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK +** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error +** occurs. +*/ +static int vdbeSorterSort(sqlite3 *db, VdbeCursor *pCsr){ + int rc = SQLITE_OK; + int i; + SorterRecord **aSlot; + SorterRecord *p; + VdbeSorter *pSorter = pCsr->pSorter; + + aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *)); + if( !aSlot ){ + return SQLITE_NOMEM; + } + + p = pSorter->pRecord; + while( p ){ + SorterRecord *pNext = p->pNext; + p->pNext = 0; + for(i=0; rc==SQLITE_OK && aSlot[i]; i++){ + rc = vdbeSorterMerge(db, pCsr, p, aSlot[i], &p); + aSlot[i] = 0; + } + if( rc!=SQLITE_OK ){ + vdbeSorterRecordFree(db, pNext); + break; + } + aSlot[i] = p; + p = pNext; + } + + p = 0; + for(i=0; i<64; i++){ + if( rc==SQLITE_OK ){ + rc = vdbeSorterMerge(db, pCsr, p, aSlot[i], &p); + }else{ + vdbeSorterRecordFree(db, aSlot[i]); + } + } + pSorter->pRecord = p; + + sqlite3_free(aSlot); + return rc; +} + /* -** Write the current contents of the b-tree to a PMA. Return SQLITE_OK -** if successful, or an SQLite error code otherwise. +** Write the current contents of the in-memory linked-list to a PMA. Return +** SQLITE_OK if successful, or an SQLite error code otherwise. ** ** The format of a PMA is: ** ** * A varint. This varint contains the total number of bytes of content ** in the PMA (not including the varint itself). @@ -382,153 +581,105 @@ ** ** * One or more records packed end-to-end in order of ascending keys. ** Each record consists of a varint followed by a blob of data (the ** key). The varint is the number of bytes in the blob of data. */ -static int vdbeSorterBtreeToPMA(sqlite3 *db, VdbeCursor *pCsr){ +static int vdbeSorterListToPMA(sqlite3 *db, VdbeCursor *pCsr){ int rc = SQLITE_OK; /* Return code */ VdbeSorter *pSorter = pCsr->pSorter; - int res = 0; + + if( pSorter->nInMemory==0 ){ + assert( pSorter->pRecord==0 ); + return rc; + } - /* sqlite3BtreeFirst() cannot fail because sorter btrees are always held - ** in memory and so an I/O error is not possible. */ - rc = sqlite3BtreeFirst(pCsr->pCursor, &res); - if( NEVER(rc!=SQLITE_OK) || res ) return rc; - assert( pSorter->nBtree>0 ); + rc = vdbeSorterSort(db, pCsr); /* If the first temporary PMA file has not been opened, open it now. */ - if( pSorter->pTemp1==0 ){ + if( rc==SQLITE_OK && pSorter->pTemp1==0 ){ rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1); assert( rc!=SQLITE_OK || pSorter->pTemp1 ); assert( pSorter->iWriteOff==0 ); assert( pSorter->nPMA==0 ); } if( rc==SQLITE_OK ){ - i64 iWriteOff = pSorter->iWriteOff; - void *aMalloc = 0; /* Array used to hold a single record */ - int nMalloc = 0; /* Allocated size of aMalloc[] in bytes */ + i64 iOff = pSorter->iWriteOff; + SorterRecord *p; + SorterRecord *pNext = 0; pSorter->nPMA++; - for( - rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nBtree, &iWriteOff); - rc==SQLITE_OK && res==0; - rc = sqlite3BtreeNext(pCsr->pCursor, &res) - ){ - i64 nKey; /* Size of this key in bytes */ - - /* Write the size of the record in bytes to the output file */ - (void)sqlite3BtreeKeySize(pCsr->pCursor, &nKey); - rc = vdbeSorterWriteVarint(pSorter->pTemp1, nKey, &iWriteOff); - - /* Make sure the aMalloc[] buffer is large enough for the record */ - if( rc==SQLITE_OK && nKey>nMalloc ){ - aMalloc = sqlite3DbReallocOrFree(db, aMalloc, nKey); - if( !aMalloc ){ - rc = SQLITE_NOMEM; - }else{ - nMalloc = nKey; - } - } - - /* Write the record itself to the output file */ + rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nInMemory, &iOff); + for(p=pSorter->pRecord; rc==SQLITE_OK && p; p=pNext){ + pNext = p->pNext; + rc = vdbeSorterWriteVarint(pSorter->pTemp1, p->nVal, &iOff); + if( rc==SQLITE_OK ){ - /* sqlite3BtreeKey() cannot fail because sorter btrees held in memory */ - rc = sqlite3BtreeKey(pCsr->pCursor, 0, nKey, aMalloc); - if( ALWAYS(rc==SQLITE_OK) ){ - rc = sqlite3OsWrite(pSorter->pTemp1, aMalloc, nKey, iWriteOff); - iWriteOff += nKey; - } + rc = sqlite3OsWrite(pSorter->pTemp1, p->pVal, p->nVal, iOff); + iOff += p->nVal; } - if( rc!=SQLITE_OK ) break; + sqlite3DbFree(db, p); } /* This assert verifies that unless an error has occurred, the size of ** the PMA on disk is the same as the expected size stored in - ** pSorter->nBtree. */ - assert( rc!=SQLITE_OK || pSorter->nBtree==( - iWriteOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nBtree) + ** pSorter->nInMemory. */ + assert( rc!=SQLITE_OK || pSorter->nInMemory==( + iOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nInMemory) )); - pSorter->iWriteOff = iWriteOff; - sqlite3DbFree(db, aMalloc); + pSorter->iWriteOff = iOff; + pSorter->pRecord = p; } - pSorter->nBtree = 0; return rc; } /* -** This function is called on a sorter cursor by the VDBE before each row -** is inserted into VdbeCursor.pCsr. Argument nKey is the size of the key, in -** bytes, about to be inserted. -** -** If it is determined that the temporary b-tree accessed via VdbeCursor.pCsr -** is large enough, its contents are written to a sorted PMA on disk and the -** tree emptied. This prevents the b-tree (which must be small enough to -** fit entirely in the cache in order to support efficient inserts) from -** growing too large. -** -** An SQLite error code is returned if an error occurs. Otherwise, SQLITE_OK. +** Add a record to the sorter. */ -int sqlite3VdbeSorterWrite(sqlite3 *db, VdbeCursor *pCsr, int nKey){ - int rc = SQLITE_OK; /* Return code */ +int sqlite3VdbeSorterWrite( + sqlite3 *db, /* Database handle */ + VdbeCursor *pCsr, /* Sorter cursor */ + Mem *pVal /* Memory cell containing record */ +){ VdbeSorter *pSorter = pCsr->pSorter; - if( pSorter ){ - Pager *pPager = sqlite3BtreePager(pCsr->pBt); - int nPage; /* Current size of temporary file in pages */ - - /* Sorters never spill to disk */ - assert( sqlite3PagerFile(pPager)->pMethods==0 ); - - /* Determine how many pages the temporary b-tree has grown to */ - sqlite3PagerPagecount(pPager, &nPage); - - /* If pSorter->nWorking is still zero, but the temporary file has been - ** created in the file-system, then the most recent insert into the - ** current b-tree segment probably caused the cache to overflow (it is - ** also possible that sqlite3_release_memory() was called). So set the - ** size of the working set to a little less than the current size of the - ** file in pages. */ - if( pSorter->nWorking==0 && sqlite3PagerUnderStress(pPager) ){ - pSorter->nWorking = nPage-5; - if( pSorter->nWorkingnWorking = SORTER_MIN_WORKING; - } - } - - /* If the number of pages used by the current b-tree segment is greater - ** than the size of the working set (VdbeSorter.nWorking), start a new - ** segment b-tree. */ - if( pSorter->nWorking && nPage>=pSorter->nWorking ){ - BtCursor *p = pCsr->pCursor;/* Cursor structure to close and reopen */ - int iRoot; /* Root page of new tree */ - - /* Copy the current contents of the b-tree into a PMA in sorted order. - ** Close the currently open b-tree cursor. */ - rc = vdbeSorterBtreeToPMA(db, pCsr); - sqlite3BtreeCloseCursor(p); - - if( rc==SQLITE_OK ){ - rc = sqlite3BtreeDropTable(pCsr->pBt, 2, 0); -#ifdef SQLITE_DEBUG - sqlite3PagerPagecount(pPager, &nPage); - assert( rc!=SQLITE_OK || nPage==1 ); -#endif - } - if( rc==SQLITE_OK ){ - rc = sqlite3BtreeCreateTable(pCsr->pBt, &iRoot, BTREE_BLOBKEY); - } - if( rc==SQLITE_OK ){ - assert( iRoot==2 ); - rc = sqlite3BtreeCursor(pCsr->pBt, iRoot, 1, pCsr->pKeyInfo, p); - } - } - - pSorter->nBtree += sqlite3VarintLen(nKey) + nKey; - } + int rc = SQLITE_OK; /* Return Code */ + SorterRecord *pNew; /* New list element */ + + assert( pSorter ); + pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n; + + pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord)); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + pNew->pVal = (void *)&pNew[1]; + memcpy(pNew->pVal, pVal->z, pVal->n); + pNew->nVal = pVal->n; + pNew->pNext = pSorter->pRecord; + pSorter->pRecord = pNew; + } + + /* See if the contents of the sorter should now be written out. They + ** are written out when either of the following are true: + ** + ** * The total memory allocated for the in-memory list is greater + ** than (page-size * cache-size), or + ** + ** * The total memory allocated for the in-memory list is greater + ** than (page-size * 10) and sqlite3HeapNearlyFull() returns true. + */ + if( rc==SQLITE_OK && pSorter->mxPmaSize>0 && ( + (pSorter->nInMemory>pSorter->mxPmaSize) + || (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull()) + )){ + rc = vdbeSorterListToPMA(db, pCsr); + pSorter->nInMemory = 0; + } + return rc; } /* ** Helper function for sqlite3VdbeSorterRewind(). @@ -574,18 +725,22 @@ int nByte; /* Bytes of space required for aIter/aTree */ int N = 2; /* Power of 2 >= nIter */ assert( pSorter ); - /* Write the current b-tree to a PMA. Close the b-tree cursor. */ - rc = vdbeSorterBtreeToPMA(db, pCsr); - sqlite3BtreeCloseCursor(pCsr->pCursor); - if( rc!=SQLITE_OK ) return rc; + /* If no data has been written to disk, then do not do so now. Instead, + ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly + ** from the in-memory list. */ if( pSorter->nPMA==0 ){ - *pbEof = 1; - return SQLITE_OK; + *pbEof = !pSorter->pRecord; + assert( pSorter->aTree==0 ); + return vdbeSorterSort(db, pCsr); } + + /* Write the current b-tree to a PMA. Close the b-tree cursor. */ + rc = vdbeSorterListToPMA(db, pCsr); + if( rc!=SQLITE_OK ) return rc; /* Allocate space for aIter[] and aTree[]. */ nIter = pSorter->nPMA; if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT; assert( nIter>0 ); @@ -669,44 +824,93 @@ /* ** Advance to the next element in the sorter. */ int sqlite3VdbeSorterNext(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){ VdbeSorter *pSorter = pCsr->pSorter; - int iPrev = pSorter->aTree[1]; /* Index of iterator to advance */ - int i; /* Index of aTree[] to recalculate */ int rc; /* Return code */ - rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]); - for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){ - rc = vdbeSorterDoCompare(pCsr, i); - } + if( pSorter->aTree ){ + int iPrev = pSorter->aTree[1];/* Index of iterator to advance */ + int i; /* Index of aTree[] to recalculate */ + + rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]); + for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){ + rc = vdbeSorterDoCompare(pCsr, i); + } - *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); + *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0); + }else{ + SorterRecord *pFree = pSorter->pRecord; + pSorter->pRecord = pFree->pNext; + pFree->pNext = 0; + vdbeSorterRecordFree(db, pFree); + *pbEof = !pSorter->pRecord; + rc = SQLITE_OK; + } return rc; } + +/* +** Return a pointer to a buffer owned by the sorter that contains the +** current key. +*/ +static void *vdbeSorterRowkey( + VdbeSorter *pSorter, /* Sorter object */ + int *pnKey /* OUT: Size of current key in bytes */ +){ + void *pKey; + if( pSorter->aTree ){ + VdbeSorterIter *pIter; + pIter = &pSorter->aIter[ pSorter->aTree[1] ]; + *pnKey = pIter->nKey; + pKey = pIter->aKey; + }else{ + *pnKey = pSorter->pRecord->nVal; + pKey = pSorter->pRecord->pVal; + } + return pKey; +} /* ** Copy the current sorter key into the memory cell pOut. */ int sqlite3VdbeSorterRowkey(VdbeCursor *pCsr, Mem *pOut){ VdbeSorter *pSorter = pCsr->pSorter; - VdbeSorterIter *pIter; - - pIter = &pSorter->aIter[ pSorter->aTree[1] ]; - - /* Coverage testing note: As things are currently, this call will always - ** succeed. This is because the memory cell passed by the VDBE layer - ** happens to be the same one as was used to assemble the keys before they - ** were passed to the sorter - meaning it is always large enough for the - ** largest key. But this could change very easily, so we leave the call - ** to sqlite3VdbeMemGrow() in. */ - if( NEVER(sqlite3VdbeMemGrow(pOut, pIter->nKey, 0)) ){ + void *pKey; int nKey; /* Sorter key to copy into pOut */ + + pKey = vdbeSorterRowkey(pSorter, &nKey); + if( sqlite3VdbeMemGrow(pOut, nKey, 0) ){ return SQLITE_NOMEM; } - pOut->n = pIter->nKey; + pOut->n = nKey; MemSetTypeFlag(pOut, MEM_Blob); - memcpy(pOut->z, pIter->aKey, pIter->nKey); + memcpy(pOut->z, pKey, nKey); return SQLITE_OK; } + +/* +** Compare the key in memory cell pVal with the key that the sorter cursor +** passed as the first argument currently points to. For the purposes of +** the comparison, ignore the rowid field at the end of each record. +** +** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM). +** Otherwise, set *pRes to a negative, zero or positive value if the +** key in pVal is smaller than, equal to or larger than the current sorter +** key. +*/ +int sqlite3VdbeSorterCompare( + VdbeCursor *pCsr, /* Sorter cursor */ + Mem *pVal, /* Value to compare to current sorter key */ + int *pRes /* OUT: Result of comparison */ +){ + int rc; + VdbeSorter *pSorter = pCsr->pSorter; + void *pKey; int nKey; /* Sorter key to compare pVal with */ + + pKey = vdbeSorterRowkey(pSorter, &nKey); + rc = vdbeSorterCompare(pCsr, 1, pVal->z, pVal->n, pKey, nKey, pRes); + assert( rc!=SQLITE_OK || pVal->db->mallocFailed || (*pRes)<=0 ); + return rc; +} #endif /* #ifndef SQLITE_OMIT_MERGE_SORT */ Index: test/distinct.test ================================================================== --- test/distinct.test +++ test/distinct.test @@ -43,11 +43,11 @@ proc do_temptables_test {tn sql temptables} { uplevel [list do_test $tn [subst -novar { set ret "" db eval "EXPLAIN [set sql]" { - if {$opcode == "OpenEphemeral"} { + if {$opcode == "OpenEphemeral" || $opcode == "SorterOpen"} { if {$p5 != "10" && $p5!="00"} { error "p5 = $p5" } if {$p5 == "10"} { lappend ret hash } else { lappend ret btree Index: test/index4.test ================================================================== --- test/index4.test +++ test/index4.test @@ -106,7 +106,21 @@ COMMIT; CREATE INDEX i1 ON t1(x); PRAGMA integrity_check } {ok} +do_execsql_test 2.1 { + BEGIN; + CREATE TABLE t2(x); + INSERT INTO t2 VALUES(14); + INSERT INTO t2 VALUES(35); + INSERT INTO t2 VALUES(15); + INSERT INTO t2 VALUES(35); + INSERT INTO t2 VALUES(16); + COMMIT; +} +do_catchsql_test 2.2 { + CREATE UNIQUE INDEX i3 ON t2(x); +} {1 {indexed columns are not unique}} + finish_test