/ Check-in [0229cba6]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Buffer updates per-transaction rather than per-update. If lots of updates happen within a single transaction, there was a lot of wasted encode/decode overhead due to segment merges. This code buffers updates in memory and writes out larger level-0 segments. It only works when documents are presented in ascending order by docid. Comparing a test set running 100 documents per transaction, the total runtime is cut almost in half. (CVS 3751)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 0229cba69698ab4b44f8583ef50a87c49422f8ec
User & Date: shess 2007-03-29 18:41:04
Context
2007-03-29
18:46
Add the sqlite3_clear_bindings() API to the loadable extension interface. Ticket #2135. (CVS 3752) check-in: 3111b43e user: drh tags: trunk
18:41
Buffer updates per-transaction rather than per-update. If lots of updates happen within a single transaction, there was a lot of wasted encode/decode overhead due to segment merges. This code buffers updates in memory and writes out larger level-0 segments. It only works when documents are presented in ascending order by docid. Comparing a test set running 100 documents per transaction, the total runtime is cut almost in half. (CVS 3751) check-in: 0229cba6 user: shess tags: trunk
18:19
Change the name of PAGER_SECTOR_SIZE to SQLITE_DEFAULT_SECTOR_SIZE. Make the new OS-layer interface routine for finding sector size optional. (CVS 3750) check-in: 0fb9af1d user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Show Whitespace Changes Patch

Changes to ext/fts2/fts2.c.

   987    987   /* DLCollector wraps PLWriter and DLWriter to provide a
   988    988   ** dynamically-allocated doclist area to use during tokenization.
   989    989   **
   990    990   ** dlcNew - malloc up and initialize a collector.
   991    991   ** dlcDelete - destroy a collector and all contained items.
   992    992   ** dlcAddPos - append position and offset information.
   993    993   ** dlcAddDoclist - add the collected doclist to the given buffer.
          994  +** dlcNext - terminate the current document and open another.
   994    995   */
   995    996   typedef struct DLCollector {
   996    997     DataBuffer b;
   997    998     DLWriter dlw;
   998    999     PLWriter plw;
   999   1000   } DLCollector;
  1000   1001   
................................................................................
  1010   1011     if( pCollector->dlw.iType>DL_DOCIDS ){
  1011   1012       char c[VARINT_MAX];
  1012   1013       int n = putVarint(c, POS_END);
  1013   1014       dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n);
  1014   1015     }else{
  1015   1016       dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData);
  1016   1017     }
         1018  +}
         1019  +static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){
         1020  +  plwTerminate(&pCollector->plw);
         1021  +  plwDestroy(&pCollector->plw);
         1022  +  plwInit(&pCollector->plw, &pCollector->dlw, iDocid);
  1017   1023   }
  1018   1024   static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,
  1019   1025                         int iStartOffset, int iEndOffset){
  1020   1026     plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);
  1021   1027   }
  1022   1028   
  1023   1029   static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){
................................................................................
  1650   1656     /* Precompiled statements used for segment merges.  We run a
  1651   1657     ** separate select across the leaf level of each tree being merged.
  1652   1658     */
  1653   1659     sqlite3_stmt *pLeafSelectStmts[MERGE_COUNT];
  1654   1660     /* The statement used to prepare pLeafSelectStmts. */
  1655   1661   #define LEAF_SELECT \
  1656   1662     "select block from %_segments where rowid between ? and ? order by rowid"
         1663  +
         1664  +  /* These buffer pending index updates during transactions.
         1665  +  ** nPendingData estimates the memory size of the pending data.  It
         1666  +  ** doesn't include the hash-bucket overhead, nor any malloc
         1667  +  ** overhead.  When nPendingData exceeds kPendingThreshold, the
         1668  +  ** buffer is flushed even before the transaction closes.
         1669  +  ** pendingTerms stores the data, and is only valid when nPendingData
         1670  +  ** is >=0 (nPendingData<0 means pendingTerms has not been
         1671  +  ** initialized).  iPrevDocid is the last docid written, used to make
         1672  +  ** certain we're inserting in sorted order.
         1673  +  */
         1674  +  int nPendingData;
         1675  +#define kPendingThreshold (1*1024*1024)
         1676  +  sqlite_int64 iPrevDocid;
         1677  +  fts2Hash pendingTerms;
  1657   1678   };
  1658   1679   
  1659   1680   /*
  1660   1681   ** When the core wants to do a query, it create a cursor using a
  1661   1682   ** call to xOpen.  This structure is an instance of a cursor.  It
  1662   1683   ** is destroyed by xClose.
  1663   1684   */
................................................................................
  2129   2150   
  2130   2151     rc = sqlite3_bind_int64(s, 1, iLevel);
  2131   2152     if( rc!=SQLITE_OK ) return rc;
  2132   2153   
  2133   2154     return sql_single_step_statement(v, SEGDIR_DELETE_STMT, &s);
  2134   2155   }
  2135   2156   
         2157  +/* TODO(shess) clearPendingTerms() is far down the file because
         2158  +** writeZeroSegment() is far down the file because LeafWriter is far
         2159  +** down the file.  Consider refactoring the code to move the non-vtab
         2160  +** code above the vtab code so that we don't need this forward
         2161  +** reference.
         2162  +*/
         2163  +static int clearPendingTerms(fulltext_vtab *v);
         2164  +
  2136   2165   /*
  2137   2166   ** Free the memory used to contain a fulltext_vtab structure.
  2138   2167   */
  2139   2168   static void fulltext_vtab_destroy(fulltext_vtab *v){
  2140   2169     int iStmt, i;
  2141   2170   
  2142   2171     TRACE(("FTS2 Destroy %p\n", v));
................................................................................
  2154   2183       }
  2155   2184     }
  2156   2185   
  2157   2186     if( v->pTokenizer!=NULL ){
  2158   2187       v->pTokenizer->pModule->xDestroy(v->pTokenizer);
  2159   2188       v->pTokenizer = NULL;
  2160   2189     }
         2190  +
         2191  +  clearPendingTerms(v);
  2161   2192     
  2162   2193     free(v->azColumn);
  2163   2194     for(i = 0; i < v->nColumn; ++i) {
  2164   2195       sqlite3_free(v->azContentColumn[i]);
  2165   2196     }
  2166   2197     free(v->azContentColumn);
  2167   2198     free(v);
................................................................................
  2627   2658     schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
  2628   2659                             spec->zName);
  2629   2660     rc = sqlite3_declare_vtab(db, schema);
  2630   2661     sqlite3_free(schema);
  2631   2662     if( rc!=SQLITE_OK ) goto err;
  2632   2663   
  2633   2664     memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
         2665  +
         2666  +  /* Indicate that the buffer is not live. */
         2667  +  v->nPendingData = -1;
  2634   2668   
  2635   2669     *ppVTab = &v->base;
  2636   2670     TRACE(("FTS2 Connect %p\n", v));
  2637   2671   
  2638   2672     return rc;
  2639   2673   
  2640   2674   err:
................................................................................
  3204   3238   ){
  3205   3239     DataBuffer left, right, new;
  3206   3240     int i, rc;
  3207   3241   
  3208   3242     /* No phrase search if no position info. */
  3209   3243     assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );
  3210   3244   
         3245  +  /* This code should never be called with buffered updates. */
         3246  +  assert( v->nPendingData<0 );
         3247  +
  3211   3248     dataBufferInit(&left, 0);
  3212   3249     rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm,
  3213   3250                     0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left);
  3214   3251     if( rc ) return rc;
  3215   3252     for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
  3216   3253       dataBufferInit(&right, 0);
  3217   3254       rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
................................................................................
  3375   3412     if( inPhrase ){
  3376   3413       /* unmatched quote */
  3377   3414       queryClear(pQuery);
  3378   3415       return SQLITE_ERROR;
  3379   3416     }
  3380   3417     return SQLITE_OK;
  3381   3418   }
         3419  +
         3420  +/* TODO(shess) Refactor the code to remove this forward decl. */
         3421  +static int flushPendingTerms(fulltext_vtab *v);
  3382   3422   
  3383   3423   /* Perform a full-text query using the search expression in
  3384   3424   ** zInput[0..nInput-1].  Return a list of matching documents
  3385   3425   ** in pResult.
  3386   3426   **
  3387   3427   ** Queries must match column iColumn.  Or if iColumn>=nColumn
  3388   3428   ** they are allowed to match against any column.
................................................................................
  3395   3435     DataBuffer *pResult,   /* Write the result doclist here */
  3396   3436     Query *pQuery          /* Put parsed query string here */
  3397   3437   ){
  3398   3438     int i, iNext, rc;
  3399   3439     DataBuffer left, right, or, new;
  3400   3440     int nNot = 0;
  3401   3441     QueryTerm *aTerm;
         3442  +
         3443  +  /* TODO(shess) Instead of flushing pendingTerms, we could query for
         3444  +  ** the relevant term and merge the doclist into what we receive from
         3445  +  ** the database.  Wait and see if this is a common issue, first.
         3446  +  **
         3447  +  ** A good reason not to flush is to not generate update-related
         3448  +  ** error codes from here.
         3449  +  */
         3450  +
         3451  +  /* Flush any buffered updates before executing the query. */
         3452  +  rc = flushPendingTerms(v);
         3453  +  if( rc!=SQLITE_OK ) return rc;
  3402   3454   
  3403   3455     /* TODO(shess) I think that the queryClear() calls below are not
  3404   3456     ** necessary, because fulltextClose() already clears the query.
  3405   3457     */
  3406   3458     rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
  3407   3459     if( rc!=SQLITE_OK ) return rc;
  3408   3460   
................................................................................
  3594   3646   static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  3595   3647     fulltext_cursor *c = (fulltext_cursor *) pCursor;
  3596   3648   
  3597   3649     *pRowid = sqlite3_column_int64(c->pStmt, 0);
  3598   3650     return SQLITE_OK;
  3599   3651   }
  3600   3652   
  3601         -/* Add all terms in [zText] to the given hash table.  If [iColumn] > 0,
  3602         - * we also store positions and offsets in the hash table using the given
  3603         - * column number. */
  3604         -static int buildTerms(fulltext_vtab *v, fts2Hash *terms, sqlite_int64 iDocid,
         3653  +/* Add all terms in [zText] to pendingTerms table.  If [iColumn] > 0,
         3654  +** we also store positions and offsets in the hash table using that
         3655  +** column number.
         3656  +*/
         3657  +static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid,
  3605   3658                         const char *zText, int iColumn){
  3606   3659     sqlite3_tokenizer *pTokenizer = v->pTokenizer;
  3607   3660     sqlite3_tokenizer_cursor *pCursor;
  3608   3661     const char *pToken;
  3609   3662     int nTokenBytes;
  3610   3663     int iStartOffset, iEndOffset, iPosition;
  3611   3664     int rc;
................................................................................
  3615   3668   
  3616   3669     pCursor->pTokenizer = pTokenizer;
  3617   3670     while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
  3618   3671                                                  &pToken, &nTokenBytes,
  3619   3672                                                  &iStartOffset, &iEndOffset,
  3620   3673                                                  &iPosition) ){
  3621   3674       DLCollector *p;
         3675  +    int nData;                   /* Size of doclist before our update. */
  3622   3676   
  3623   3677       /* Positions can't be negative; we use -1 as a terminator internally. */
  3624   3678       if( iPosition<0 ){
  3625   3679         pTokenizer->pModule->xClose(pCursor);
  3626   3680         return SQLITE_ERROR;
  3627   3681       }
  3628   3682   
  3629         -    p = fts2HashFind(terms, pToken, nTokenBytes);
         3683  +    p = fts2HashFind(&v->pendingTerms, pToken, nTokenBytes);
  3630   3684       if( p==NULL ){
         3685  +      nData = 0;
  3631   3686         p = dlcNew(iDocid, DL_DEFAULT);
  3632         -      fts2HashInsert(terms, pToken, nTokenBytes, p);
         3687  +      fts2HashInsert(&v->pendingTerms, pToken, nTokenBytes, p);
         3688  +
         3689  +      /* Overhead for our hash table entry, the key, and the value. */
         3690  +      v->nPendingData += sizeof(struct fts2HashElem)+sizeof(*p)+nTokenBytes;
         3691  +    }else{
         3692  +      nData = p->b.nData;
         3693  +      if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid);
  3633   3694       }
  3634   3695       if( iColumn>=0 ){
  3635   3696         dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset);
  3636   3697       }
         3698  +
         3699  +    /* Accumulate data added by dlcNew or dlcNext, and dlcAddPos. */
         3700  +    v->nPendingData += p->b.nData-nData;
  3637   3701     }
  3638   3702   
  3639   3703     /* TODO(shess) Check return?  Should this be able to cause errors at
  3640   3704     ** this point?  Actually, same question about sqlite3_finalize(),
  3641   3705     ** though one could argue that failure there means that the data is
  3642   3706     ** not durable.  *ponder*
  3643   3707     */
  3644   3708     pTokenizer->pModule->xClose(pCursor);
  3645   3709     return rc;
  3646   3710   }
  3647   3711   
  3648         -/* Add doclists for all terms in [pValues] to the hash table [terms]. */
  3649         -static int insertTerms(fulltext_vtab *v, fts2Hash *terms, sqlite_int64 iRowid,
         3712  +/* Add doclists for all terms in [pValues] to pendingTerms table. */
         3713  +static int insertTerms(fulltext_vtab *v, sqlite_int64 iRowid,
  3650   3714                   sqlite3_value **pValues){
  3651   3715     int i;
  3652   3716     for(i = 0; i < v->nColumn ; ++i){
  3653   3717       char *zText = (char*)sqlite3_value_text(pValues[i]);
  3654         -    int rc = buildTerms(v, terms, iRowid, zText, i);
         3718  +    int rc = buildTerms(v, iRowid, zText, i);
  3655   3719       if( rc!=SQLITE_OK ) return rc;
  3656   3720     }
  3657   3721     return SQLITE_OK;
  3658   3722   }
  3659   3723   
  3660         -/* Add empty doclists for all terms in the given row's content to the hash
  3661         - * table [pTerms]. */
  3662         -static int deleteTerms(fulltext_vtab *v, fts2Hash *pTerms, sqlite_int64 iRowid){
         3724  +/* Add empty doclists for all terms in the given row's content to
         3725  +** pendingTerms.
         3726  +*/
         3727  +static int deleteTerms(fulltext_vtab *v, sqlite_int64 iRowid){
  3663   3728     const char **pValues;
  3664   3729     int i, rc;
  3665   3730   
  3666   3731     /* TODO(shess) Should we allow such tables at all? */
  3667   3732     if( DL_DEFAULT==DL_DOCIDS ) return SQLITE_ERROR;
  3668   3733   
  3669   3734     rc = content_select(v, iRowid, &pValues);
  3670   3735     if( rc!=SQLITE_OK ) return rc;
  3671   3736   
  3672   3737     for(i = 0 ; i < v->nColumn; ++i) {
  3673         -    rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
         3738  +    rc = buildTerms(v, iRowid, pValues[i], -1);
  3674   3739       if( rc!=SQLITE_OK ) break;
  3675   3740     }
  3676   3741   
  3677   3742     freeStringArray(v->nColumn, pValues);
  3678   3743     return SQLITE_OK;
  3679   3744   }
  3680   3745   
         3746  +/* TODO(shess) Refactor the code to remove this forward decl. */
         3747  +static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid);
         3748  +
  3681   3749   /* Insert a row into the %_content table; set *piRowid to be the ID of the
  3682         - * new row.  Fill [pTerms] with new doclists for the %_term table. */
         3750  +** new row.  Add doclists for terms to pendingTerms.
         3751  +*/
  3683   3752   static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
  3684         -                        sqlite3_value **pValues,
  3685         -                        sqlite_int64 *piRowid, fts2Hash *pTerms){
         3753  +                        sqlite3_value **pValues, sqlite_int64 *piRowid){
  3686   3754     int rc;
  3687   3755   
  3688   3756     rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
  3689   3757     if( rc!=SQLITE_OK ) return rc;
         3758  +
  3690   3759     *piRowid = sqlite3_last_insert_rowid(v->db);
  3691         -  return insertTerms(v, pTerms, *piRowid, pValues);
         3760  +  rc = initPendingTerms(v, *piRowid);
         3761  +  if( rc!=SQLITE_OK ) return rc;
         3762  +
         3763  +  return insertTerms(v, *piRowid, pValues);
  3692   3764   }
  3693   3765   
  3694         -/* Delete a row from the %_content table; fill [pTerms] with empty doclists
  3695         - * to be written to the %_term table. */
  3696         -static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts2Hash *pTerms){
  3697         -  int rc = deleteTerms(v, pTerms, iRow);
         3766  +/* Delete a row from the %_content table; add empty doclists for terms
         3767  +** to pendingTerms.
         3768  +*/
         3769  +static int index_delete(fulltext_vtab *v, sqlite_int64 iRow){
         3770  +  int rc = initPendingTerms(v, iRow);
  3698   3771     if( rc!=SQLITE_OK ) return rc;
         3772  +
         3773  +  rc = deleteTerms(v, iRow);
         3774  +  if( rc!=SQLITE_OK ) return rc;
         3775  +
  3699   3776     return content_delete(v, iRow);  /* execute an SQL DELETE */
  3700   3777   }
  3701   3778   
  3702         -/* Update a row in the %_content table; fill [pTerms] with new doclists for the
  3703         - * %_term table. */
         3779  +/* Update a row in the %_content table; add delete doclists to
         3780  +** pendingTerms for old terms not in the new data, add insert doclists
         3781  +** to pendingTerms for terms in the new data.
         3782  +*/
  3704   3783   static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
  3705         -                        sqlite3_value **pValues, fts2Hash *pTerms){
         3784  +                        sqlite3_value **pValues){
         3785  +  int rc = initPendingTerms(v, iRow);
         3786  +  if( rc!=SQLITE_OK ) return rc;
         3787  +
  3706   3788     /* Generate an empty doclist for each term that previously appeared in this
  3707   3789      * row. */
  3708         -  int rc = deleteTerms(v, pTerms, iRow);
         3790  +  rc = deleteTerms(v, iRow);
  3709   3791     if( rc!=SQLITE_OK ) return rc;
  3710   3792   
  3711   3793     rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
  3712   3794     if( rc!=SQLITE_OK ) return rc;
  3713   3795   
  3714   3796     /* Now add positions for terms which appear in the updated row. */
  3715         -  return insertTerms(v, pTerms, iRow, pValues);
         3797  +  return insertTerms(v, iRow, pValues);
  3716   3798   }
  3717   3799   
  3718   3800   /*******************************************************************/
  3719   3801   /* InteriorWriter is used to collect terms and block references into
  3720   3802   ** interior nodes in %_segments.  See commentary at top of file for
  3721   3803   ** format.
  3722   3804   */
................................................................................
  4991   5073   ** read from pData will overwrite those in *out).
  4992   5074   */
  4993   5075   static int loadSegmentLeaf(fulltext_vtab *v, const char *pData, int nData,
  4994   5076                              const char *pTerm, int nTerm, DataBuffer *out){
  4995   5077     LeafReader reader;
  4996   5078     assert( nData>1 );
  4997   5079     assert( *pData=='\0' );
         5080  +
         5081  +  /* This code should never be called with buffered updates. */
         5082  +  assert( v->nPendingData<0 );
  4998   5083   
  4999   5084     leafReaderInit(pData, nData, &reader);
  5000   5085     while( !leafReaderAtEnd(&reader) ){
  5001   5086       int c = leafReaderTermCmp(&reader, pTerm, nTerm);
  5002   5087       if( c==0 ){
  5003   5088         if( out->nData==0 ){
  5004   5089           dataBufferReplace(out,
................................................................................
  5029   5114   */
  5030   5115   static int loadSegment(fulltext_vtab *v, const char *pData, int nData,
  5031   5116                          const char *pTerm, int nTerm, DataBuffer *out){
  5032   5117     int rc;
  5033   5118     sqlite3_stmt *s = NULL;
  5034   5119   
  5035   5120     assert( nData>1 );
         5121  +
         5122  +  /* This code should never be called with buffered updates. */
         5123  +  assert( v->nPendingData<0 );
  5036   5124   
  5037   5125     /* Process data as an interior node until we reach a leaf. */
  5038   5126     while( *pData!='\0' ){
  5039   5127       sqlite_int64 iBlockid;
  5040   5128       InteriorReader reader;
  5041   5129   
  5042   5130       /* Scan the node data until we find a term greater than our term.
................................................................................
  5091   5179   static int termSelect(fulltext_vtab *v, int iColumn,
  5092   5180                         const char *pTerm, int nTerm,
  5093   5181                         DocListType iType, DataBuffer *out){
  5094   5182     DataBuffer doclist;
  5095   5183     sqlite3_stmt *s;
  5096   5184     int rc = sql_get_statement(v, SEGDIR_SELECT_ALL_STMT, &s);
  5097   5185     if( rc!=SQLITE_OK ) return rc;
         5186  +
         5187  +  /* This code should never be called with buffered updates. */
         5188  +  assert( v->nPendingData<0 );
  5098   5189   
  5099   5190     dataBufferInit(&doclist, 0);
  5100   5191   
  5101   5192     /* Traverse the segments from oldest to newest so that newer doclist
  5102   5193     ** elements for given docids overwrite older elements.
  5103   5194     */
  5104   5195     while( (rc=sql_step_statement(v, SEGDIR_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
................................................................................
  5191   5282   
  5192   5283    err:
  5193   5284     dataBufferDestroy(&dl);
  5194   5285     free(pData);
  5195   5286     leafWriterDestroy(&writer);
  5196   5287     return rc;
  5197   5288   }
         5289  +
         5290  +/* If pendingTerms has data, free it. */
         5291  +static int clearPendingTerms(fulltext_vtab *v){
         5292  +  if( v->nPendingData>=0 ){
         5293  +    fts2HashElem *e;
         5294  +    for(e=fts2HashFirst(&v->pendingTerms); e; e=fts2HashNext(e)){
         5295  +      dlcDelete(fts2HashData(e));
         5296  +    }
         5297  +    fts2HashClear(&v->pendingTerms);
         5298  +    v->nPendingData = -1;
         5299  +  }
         5300  +  return SQLITE_OK;
         5301  +}
         5302  +
         5303  +/* If pendingTerms has data, flush it to a level-zero segment, and
         5304  +** free it.
         5305  +*/
         5306  +static int flushPendingTerms(fulltext_vtab *v){
         5307  +  if( v->nPendingData>=0 ){
         5308  +    int rc = writeZeroSegment(v, &v->pendingTerms);
         5309  +    clearPendingTerms(v);
         5310  +    return rc;
         5311  +  }
         5312  +  return SQLITE_OK;
         5313  +}
         5314  +
         5315  +/* If pendingTerms is "too big", or docid is out of order, flush it.
         5316  +** Regardless, be certain that pendingTerms is initialized for use.
         5317  +*/
         5318  +static int initPendingTerms(fulltext_vtab *v, sqlite_int64 iDocid){
         5319  +  /* TODO(shess) Explore whether partially flushing the buffer on
         5320  +  ** forced-flush would provide better performance.  I suspect that if
         5321  +  ** we ordered the doclists by size and flushed the largest until the
         5322  +  ** buffer was half empty, that would let the less frequent terms
         5323  +  ** generate longer doclists.
         5324  +  */
         5325  +  if( iDocid<=v->iPrevDocid || v->nPendingData>kPendingThreshold ){
         5326  +    int rc = flushPendingTerms(v);
         5327  +    if( rc!=SQLITE_OK ) return rc;
         5328  +  }
         5329  +  if( v->nPendingData<0 ){
         5330  +    fts2HashInit(&v->pendingTerms, FTS2_HASH_STRING, 1);
         5331  +    v->nPendingData = 0;
         5332  +  }
         5333  +  v->iPrevDocid = iDocid;
         5334  +  return SQLITE_OK;
         5335  +}
  5198   5336   
  5199   5337   /* This function implements the xUpdate callback; it's the top-level entry
  5200   5338    * point for inserting, deleting or updating a row in a full-text table. */
  5201   5339   static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
  5202   5340                      sqlite_int64 *pRowid){
  5203   5341     fulltext_vtab *v = (fulltext_vtab *) pVtab;
  5204         -  fts2Hash terms;   /* maps term string -> PosList */
  5205   5342     int rc;
  5206         -  fts2HashElem *e;
  5207   5343   
  5208   5344     TRACE(("FTS2 Update %p\n", pVtab));
  5209   5345     
  5210         -  fts2HashInit(&terms, FTS2_HASH_STRING, 1);
  5211         -
  5212   5346     if( nArg<2 ){
  5213         -    rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
         5347  +    rc = index_delete(v, sqlite3_value_int64(ppArg[0]));
  5214   5348     } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
  5215   5349       /* An update:
  5216   5350        * ppArg[0] = old rowid
  5217   5351        * ppArg[1] = new rowid
  5218   5352        * ppArg[2..2+v->nColumn-1] = values
  5219   5353        * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  5220   5354        */
  5221   5355       sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
  5222   5356       if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
  5223   5357         sqlite3_value_int64(ppArg[1]) != rowid ){
  5224   5358         rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
  5225   5359       } else {
  5226   5360         assert( nArg==2+v->nColumn+1);
  5227         -      rc = index_update(v, rowid, &ppArg[2], &terms);
         5361  +      rc = index_update(v, rowid, &ppArg[2]);
  5228   5362       }
  5229   5363     } else {
  5230   5364       /* An insert:
  5231   5365        * ppArg[1] = requested rowid
  5232   5366        * ppArg[2..2+v->nColumn-1] = values
  5233   5367        * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  5234   5368        */
  5235   5369       assert( nArg==2+v->nColumn+1);
  5236         -    rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
         5370  +    rc = index_insert(v, ppArg[1], &ppArg[2], pRowid);
  5237   5371     }
  5238   5372   
  5239         -  if( rc==SQLITE_OK ) rc = writeZeroSegment(v, &terms);
  5240         -
  5241         -  /* clean up */
  5242         -  for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){
  5243         -    dlcDelete(fts2HashData(e));
  5244         -  }
  5245         -  fts2HashClear(&terms);
  5246         -
  5247   5373     return rc;
  5248   5374   }
         5375  +
         5376  +static int fulltextSync(sqlite3_vtab *pVtab){
         5377  +  TRACE(("FTS2 xSync()\n"));
         5378  +  return flushPendingTerms((fulltext_vtab *)pVtab);
         5379  +}
         5380  +
         5381  +static int fulltextBegin(sqlite3_vtab *pVtab){
         5382  +  fulltext_vtab *v = (fulltext_vtab *) pVtab;
         5383  +  TRACE(("FTS2 xBegin()\n"));
         5384  +
         5385  +  /* Any buffered updates should have been cleared by the previous
         5386  +  ** transaction.
         5387  +  */
         5388  +  assert( v->nPendingData<0 );
         5389  +  return clearPendingTerms(v);
         5390  +}
         5391  +
         5392  +static int fulltextCommit(sqlite3_vtab *pVtab){
         5393  +  fulltext_vtab *v = (fulltext_vtab *) pVtab;
         5394  +  TRACE(("FTS2 xCommit()\n"));
         5395  +
         5396  +  /* Buffered updates should have been cleared by fulltextSync(). */
         5397  +  assert( v->nPendingData<0 );
         5398  +  return clearPendingTerms(v);
         5399  +}
         5400  +
         5401  +static int fulltextRollback(sqlite3_vtab *pVtab){
         5402  +  TRACE(("FTS2 xRollback()\n"));
         5403  +  return clearPendingTerms((fulltext_vtab *)pVtab);
         5404  +}
  5249   5405   
  5250   5406   /*
  5251   5407   ** Implementation of the snippet() function for FTS2
  5252   5408   */
  5253   5409   static void snippetFunc(
  5254   5410     sqlite3_context *pContext,
  5255   5411     int argc,
................................................................................
  5336   5492     /* xClose        */ fulltextClose,
  5337   5493     /* xFilter       */ fulltextFilter,
  5338   5494     /* xNext         */ fulltextNext,
  5339   5495     /* xEof          */ fulltextEof,
  5340   5496     /* xColumn       */ fulltextColumn,
  5341   5497     /* xRowid        */ fulltextRowid,
  5342   5498     /* xUpdate       */ fulltextUpdate,
  5343         -  /* xBegin        */ 0, 
  5344         -  /* xSync         */ 0,
  5345         -  /* xCommit       */ 0,
  5346         -  /* xRollback     */ 0,
         5499  +  /* xBegin        */ fulltextBegin,
         5500  +  /* xSync         */ fulltextSync,
         5501  +  /* xCommit       */ fulltextCommit,
         5502  +  /* xRollback     */ fulltextRollback,
  5347   5503     /* xFindFunction */ fulltextFindFunction,
  5348   5504   };
  5349   5505   
  5350   5506   int sqlite3Fts2Init(sqlite3 *db){
  5351   5507     sqlite3_overload_function(db, "snippet", -1);
  5352   5508     sqlite3_overload_function(db, "offsets", -1);
  5353   5509     return sqlite3_create_module(db, "fts2", &fulltextModule, 0);

Added test/fts2k.test.

            1  +# 2007 March 9
            2  +#
            3  +# The author disclaims copyright to this source code.
            4  +#
            5  +#*************************************************************************
            6  +# This file implements regression tests for SQLite library.  These
            7  +# make sure that inserted documents are visible to selects within the
            8  +# transaction.
            9  +#
           10  +# $Id: fts2k.test,v 1.1 2007/03/29 18:41:05 shess Exp $
           11  +#
           12  +
           13  +set testdir [file dirname $argv0]
           14  +source $testdir/tester.tcl
           15  +
           16  +# If SQLITE_ENABLE_FTS2 is defined, omit this file.
           17  +ifcapable !fts2 {
           18  +  finish_test
           19  +  return
           20  +}
           21  +
           22  +db eval {
           23  +  CREATE VIRTUAL TABLE t1 USING fts2(content);
           24  +  INSERT INTO t1 (rowid, content) VALUES(1, "hello world");
           25  +  INSERT INTO t1 (rowid, content) VALUES(2, "hello there");
           26  +  INSERT INTO t1 (rowid, content) VALUES(3, "cruel world");
           27  +}
           28  +
           29  +# Test that possibly-buffered inserts went through after commit.
           30  +do_test fts2k-1.1 {
           31  +  execsql {
           32  +    BEGIN TRANSACTION;
           33  +    INSERT INTO t1 (rowid, content) VALUES(4, "false world");
           34  +    INSERT INTO t1 (rowid, content) VALUES(5, "false door");
           35  +    COMMIT TRANSACTION;
           36  +    SELECT rowid FROM t1 WHERE t1 MATCH 'world';
           37  +  }
           38  +} {1 3 4}
           39  +
           40  +# Test that buffered inserts are seen by selects in the same
           41  +# transaction.
           42  +do_test fts2k-1.2 {
           43  +  execsql {
           44  +    BEGIN TRANSACTION;
           45  +    INSERT INTO t1 (rowid, content) VALUES(6, "another world");
           46  +    INSERT INTO t1 (rowid, content) VALUES(7, "another test");
           47  +    SELECT rowid FROM t1 WHERE t1 MATCH 'world';
           48  +    COMMIT TRANSACTION;
           49  +  }
           50  +} {1 3 4 6}
           51  +
           52  +# Test that buffered inserts are seen within a transaction.  This is
           53  +# really the same test as 1.2.
           54  +do_test fts2k-1.3 {
           55  +  execsql {
           56  +    BEGIN TRANSACTION;
           57  +    INSERT INTO t1 (rowid, content) VALUES(8, "second world");
           58  +    INSERT INTO t1 (rowid, content) VALUES(9, "second sight");
           59  +    SELECT rowid FROM t1 WHERE t1 MATCH 'world';
           60  +    ROLLBACK TRANSACTION;
           61  +  }
           62  +} {1 3 4 6 8}
           63  +
           64  +# Double-check that the previous result doesn't persist past the
           65  +# rollback!
           66  +do_test fts2k-1.4 {
           67  +  execsql {
           68  +    SELECT rowid FROM t1 WHERE t1 MATCH 'world';
           69  +  }
           70  +} {1 3 4 6}
           71  +
           72  +# Test it all together.
           73  +do_test fts2k-1.5 {
           74  +  execsql {
           75  +    BEGIN TRANSACTION;
           76  +    INSERT INTO t1 (rowid, content) VALUES(10, "second world");
           77  +    INSERT INTO t1 (rowid, content) VALUES(11, "second sight");
           78  +    ROLLBACK TRANSACTION;
           79  +    SELECT rowid FROM t1 WHERE t1 MATCH 'world';
           80  +  }
           81  +} {1 3 4 6}
           82  +
           83  +finish_test