/ Check-in [8e3ca632]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Optimize copying data from fts5 in-memory hash tables to top level segments.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | fts5
Files: files | file ages | folders
SHA1: 8e3ca6323a2beab5f04250e24ae15b159d2aa0ac
User & Date: dan 2015-02-26 20:49:09
Context
2015-02-27
07:23
Fix suffix and prefix compression of terms in top-level fts5 segments. And a crash that could follow an OOM condition. check-in: bb104b36 user: dan tags: fts5
2015-02-26
20:49
Optimize copying data from fts5 in-memory hash tables to top level segments. check-in: 8e3ca632 user: dan tags: fts5
14:54
Fix an fts5 bug in large incremental merges. check-in: 208e3cb6 user: dan tags: fts5
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_hash.c.

   389    389   
   390    390     pHash->nEntry = 0;
   391    391     sqlite3_free(ap);
   392    392     *ppSorted = pList;
   393    393     return SQLITE_OK;
   394    394   }
   395    395   
   396         -int sqlite3Fts5HashIterate(
   397         -  Fts5Hash *pHash,
   398         -  void *pCtx,
   399         -  int (*xTerm)(void*, const char*, int),
   400         -  int (*xEntry)(void*, i64, const u8*, int),
   401         -  int (*xTermDone)(void*)
   402         -){
   403         -  Fts5HashEntry *pList;
   404         -  int rc;
   405         -
   406         -  rc = fts5HashEntrySort(pHash, 0, 0, &pList);
   407         -  if( rc==SQLITE_OK ){
   408         -    memset(pHash->aSlot, 0, sizeof(Fts5HashEntry*) * pHash->nSlot);
   409         -    while( pList ){
   410         -      Fts5HashEntry *pNext = pList->pScanNext;
   411         -      if( rc==SQLITE_OK ){
   412         -        const int nKey = strlen(pList->zKey);
   413         -        i64 iRowid = 0;
   414         -        u8 *pPtr = (u8*)pList;
   415         -        int iOff = sizeof(Fts5HashEntry) + nKey + 1;
   416         -
   417         -        /* Fill in the final poslist size field */
   418         -        fts5HashAddPoslistSize(pList);
   419         -        
   420         -        /* Issue the new-term callback */
   421         -        rc = xTerm(pCtx, pList->zKey, nKey);
   422         -
   423         -        /* Issue the xEntry callbacks */
   424         -        while( rc==SQLITE_OK && iOff<pList->nData ){
   425         -          i64 iDelta;             /* Rowid delta value */
   426         -          int nPoslist;           /* Size of position list in bytes */
   427         -          int nVarint;
   428         -          iOff += getVarint(&pPtr[iOff], (u64*)&iDelta);
   429         -          iRowid += iDelta;
   430         -          nVarint = fts5GetVarint32(&pPtr[iOff], nPoslist);
   431         -          rc = xEntry(pCtx, iRowid, &pPtr[iOff], nPoslist+nVarint);
   432         -          iOff += nVarint+nPoslist;
   433         -        }
   434         -
   435         -        /* Issue the term-done callback */
   436         -        if( rc==SQLITE_OK ) rc = xTermDone(pCtx);
   437         -      }
   438         -      sqlite3_free(pList);
   439         -      pList = pNext;
   440         -    }
   441         -  }
   442         -  return rc;
   443         -}
   444         -
   445    396   /*
   446    397   ** Query the hash table for a doclist associated with term pTerm/nTerm.
   447    398   */
   448    399   int sqlite3Fts5HashQuery(
   449    400     Fts5Hash *pHash,                /* Hash table to query */
   450    401     const char *pTerm, int nTerm,   /* Query term */
   451    402     const char **ppDoclist,         /* OUT: Pointer to doclist for pTerm */
................................................................................
   474    425     Fts5Hash *p,                    /* Hash table to query */
   475    426     const char *pTerm, int nTerm    /* Query prefix */
   476    427   ){
   477    428     fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
   478    429   }
   479    430   
   480    431   void sqlite3Fts5HashScanNext(Fts5Hash *p){
   481         -  if( p->pScan ){
   482         -    p->pScan = p->pScan->pScanNext;
   483         -  }
          432  +  Fts5HashEntry *pScan = p->pScan;
          433  +  if( pScan ) p->pScan = pScan->pScanNext;
   484    434   }
   485    435   
   486    436   int sqlite3Fts5HashScanEof(Fts5Hash *p){
   487    437     return (p->pScan==0);
   488    438   }
   489    439   
   490    440   void sqlite3Fts5HashScanEntry(

Changes to ext/fts5/fts5_index.c.

   109    109   **
   110    110   **     doclist format:
   111    111   **
   112    112   **         varint:  first rowid
   113    113   **         poslist: first poslist
   114    114   **         zero-or-more {
   115    115   **           varint:  rowid delta (always > 0)
   116         -**           poslist: first poslist
          116  +**           poslist: next poslist
   117    117   **         }
   118    118   **         0x00 byte
   119    119   **
   120    120   **     poslist format:
   121    121   **
   122    122   **         varint: size of poslist in bytes. not including this field.
   123    123   **         collist: collist for column 0
................................................................................
  2673   2673   ** term, write it now.
  2674   2674   */
  2675   2675   static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
  2676   2676     if( pWriter->nEmpty ){
  2677   2677       int bFlag = 0;
  2678   2678       Fts5PageWriter *pPg;
  2679   2679       pPg = &pWriter->aWriter[1];
  2680         -    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
         2680  +    if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE && pWriter->cdlidx.n ){
  2681   2681         i64 iKey = FTS5_DOCLIST_IDX_ROWID(
  2682   2682             pWriter->iIdx, pWriter->iSegid, 
  2683   2683             pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
  2684   2684         );
  2685   2685         assert( pWriter->cdlidx.n>0 );
  2686   2686         fts5DataWrite(p, iKey, pWriter->cdlidx.p, pWriter->cdlidx.n);
  2687   2687         bFlag = 1;
................................................................................
  3000   3000     Fts5Index *p, 
  3001   3001     Fts5SegWriter *pWriter,         /* Writer object */
  3002   3002     int *pnHeight,                  /* OUT: Height of the b-tree */
  3003   3003     int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
  3004   3004   ){
  3005   3005     int i;
  3006   3006     if( p->rc==SQLITE_OK ){
  3007         -    *pnLeaf = pWriter->aWriter[0].pgno;
  3008         -    if( *pnLeaf==1 && pWriter->aWriter[0].buf.n==0 ){
         3007  +    Fts5PageWriter *pLeaf = &pWriter->aWriter[0];
         3008  +    if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
  3009   3009         *pnLeaf = 0;
  3010   3010         *pnHeight = 0;
  3011   3011       }else{
  3012         -      fts5WriteFlushLeaf(p, pWriter);
         3012  +      if( pLeaf->buf.n>4 ){
         3013  +        fts5WriteFlushLeaf(p, pWriter);
         3014  +      }
         3015  +      *pnLeaf = pLeaf->pgno-1;
  3013   3016         if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
  3014   3017           fts5WriteBtreeGrow(p, pWriter);
  3015   3018         }
  3016   3019         if( pWriter->nWriter>1 ){
  3017   3020           fts5WriteBtreeNEmpty(p, pWriter);
  3018   3021         }
  3019   3022         *pnHeight = pWriter->nWriter;
................................................................................
  3377   3380   
  3378   3381   typedef struct Fts5FlushCtx Fts5FlushCtx;
  3379   3382   struct Fts5FlushCtx {
  3380   3383     Fts5Index *pIdx;
  3381   3384     Fts5SegWriter writer; 
  3382   3385   };
  3383   3386   
  3384         -static int fts5FlushNewTerm(void *pCtx, const char *zTerm, int nTerm){
  3385         -  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  3386         -  int rc = SQLITE_OK;
  3387         -  fts5WriteAppendTerm(p->pIdx, &p->writer, nTerm, (const u8*)zTerm);
  3388         -  return rc;
  3389         -}
  3390         -
  3391         -static int fts5FlushTermDone(void *pCtx){
  3392         -  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  3393         -  int rc = SQLITE_OK;
  3394         -  /* Write the doclist terminator */
  3395         -  fts5WriteAppendZerobyte(p->pIdx, &p->writer);
  3396         -  return rc;
  3397         -}
  3398         -
  3399         -static int fts5FlushNewEntry(
  3400         -  void *pCtx, 
  3401         -  i64 iRowid, 
  3402         -  const u8 *aPoslist, 
  3403         -  int nPoslist
  3404         -){
  3405         -  Fts5FlushCtx *p = (Fts5FlushCtx*)pCtx;
  3406         -  Fts5Index *pIdx = p->pIdx;
  3407         -
  3408         -#ifdef SQLITE_DEBUG
  3409         -  /* The poslist-size varint should already be at the start of the 
  3410         -  ** aPoslist/nPoslist buffer. This assert verifies that. */
  3411         -  int n, i;
  3412         -  i = fts5GetVarint32(aPoslist, n);
  3413         -  assert( nPoslist==(n+i) );
  3414         -#endif
  3415         -
  3416         -  /* Append the rowid itself */
  3417         -  fts5WriteAppendRowid(pIdx, &p->writer, iRowid);
  3418         -
  3419         -  /* And the poslist data */
  3420         -  fts5WriteAppendPoslistData(pIdx, &p->writer, aPoslist, nPoslist);
  3421         -  return pIdx->rc;
         3387  +/*
         3388  +** Buffer aBuf[] contains a list of varints, all small enough to fit
         3389  +** in a 32-bit integer. Return the size of the largest prefix of this 
         3390  +** list nMax bytes or less in size.
         3391  +*/
         3392  +static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
         3393  +  int ret = 0;
         3394  +  while( 1 ){
         3395  +    u32 dummy;
         3396  +    int i = fts5GetVarint32(&aBuf[ret], dummy);
         3397  +    if( (ret + i) > nMax ) break;
         3398  +    ret += i;
         3399  +  }
         3400  +  return ret;
  3422   3401   }
  3423   3402   
  3424   3403   /*
  3425   3404   ** Flush the contents of in-memory hash table iHash to a new level-0 
  3426   3405   ** segment on disk. Also update the corresponding structure record.
  3427   3406   **
  3428   3407   ** If an error occurs, set the Fts5Index.rc error code. If an error has 
  3429   3408   ** already occurred, this function is a no-op.
  3430   3409   */
  3431   3410   static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){
         3411  +  Fts5Hash *pHash = p->apHash[iHash];
  3432   3412     Fts5Structure *pStruct;
  3433   3413     int iSegid;
  3434   3414     int pgnoLast = 0;                 /* Last leaf page number in segment */
  3435   3415   
  3436   3416     /* Obtain a reference to the index structure and allocate a new segment-id
  3437   3417     ** for the new level-0 segment.  */
  3438   3418     pStruct = fts5StructureRead(p, iHash);
  3439   3419     iSegid = fts5AllocateSegid(p, pStruct);
  3440   3420   
  3441   3421     if( iSegid ){
         3422  +    const int pgsz = p->pConfig->pgsz;
         3423  +
  3442   3424       Fts5StructureSegment *pSeg;   /* New segment within pStruct */
  3443   3425       int nHeight;                  /* Height of new segment b-tree */
  3444         -    int rc;
  3445         -    Fts5FlushCtx ctx;
  3446         -
  3447         -    fts5WriteInit(p, &ctx.writer, iHash, iSegid);
  3448         -    ctx.pIdx = p;
  3449         -
  3450         -    rc = sqlite3Fts5HashIterate( p->apHash[iHash], (void*)&ctx, 
  3451         -        fts5FlushNewTerm, fts5FlushNewEntry, fts5FlushTermDone
  3452         -    );
  3453         -    if( p->rc==SQLITE_OK ) p->rc = rc;
  3454         -    fts5WriteFinish(p, &ctx.writer, &nHeight, &pgnoLast);
         3426  +    Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
         3427  +
         3428  +    Fts5SegWriter writer;
         3429  +    fts5WriteInit(p, &writer, iHash, iSegid);
         3430  +
         3431  +    /* Pre-allocate the buffer used to assemble leaf pages to the target
         3432  +    ** page size.  */
         3433  +    assert( pgsz>0 );
         3434  +    pBuf = &writer.aWriter[0].buf;
         3435  +    fts5BufferGrow(&p->rc, pBuf, pgsz + 20);
         3436  +
         3437  +    /* Begin scanning through hash table entries. */
         3438  +    if( p->rc==SQLITE_OK ){
         3439  +      memset(pBuf->p, 0, 4);
         3440  +      pBuf->n = 4;
         3441  +      sqlite3Fts5HashScanInit(pHash, 0, 0);
         3442  +    }
         3443  +
         3444  +    while( 0==sqlite3Fts5HashScanEof(pHash) ){
         3445  +      const char *zTerm;
         3446  +      int nTerm;
         3447  +      const u8 *pDoclist;
         3448  +      int nDoclist;
         3449  +
         3450  +      sqlite3Fts5HashScanEntry(pHash, &zTerm,(const char**)&pDoclist,&nDoclist);
         3451  +      nTerm = strlen(zTerm);
         3452  +
         3453  +      /* Decide if the term fits on the current leaf. If not, flush it
         3454  +      ** to disk.  */
         3455  +      if( (pBuf->n + nTerm + 2) > pgsz ){
         3456  +        fts5WriteFlushLeaf(p, &writer);
         3457  +        pBuf = &writer.aWriter[0].buf;
         3458  +        if( (nTerm + 32) > pBuf->nSpace ){
         3459  +          fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
         3460  +        }
         3461  +      }
         3462  +
         3463  +      /* Write the term to the leaf. And push it up into the b-tree hierarchy */
         3464  +      if( writer.bFirstTermInPage==0 ){
         3465  +        pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], 0);
         3466  +      }else{
         3467  +        fts5PutU16(&pBuf->p[2], pBuf->n);
         3468  +        writer.bFirstTermInPage = 0;
         3469  +        if( writer.aWriter[0].pgno!=1 ){
         3470  +          fts5WriteBtreeTerm(p, &writer, nTerm, (const u8*)zTerm);
         3471  +          pBuf = &writer.aWriter[0].buf;
         3472  +        }
         3473  +      }
         3474  +      pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], nTerm);
         3475  +      fts5BufferAppendBlob(&p->rc, pBuf, nTerm, (const u8*)zTerm);
         3476  +
         3477  +      if( pgsz>=(pBuf->n + nDoclist + 1) ){
         3478  +        /* The entire doclist will fit on the current leaf. */
         3479  +        fts5BufferAppendBlob(&p->rc, pBuf, nDoclist, pDoclist);
         3480  +      }else{
         3481  +        i64 iRowid = 0;
         3482  +        i64 iDelta = 0;
         3483  +        int iOff = 0;
         3484  +        int bFirstDocid = 0;
         3485  +
         3486  +        /* The entire doclist will not fit on this leaf. The following 
         3487  +        ** loop iterates through the poslists that make up the current 
         3488  +        ** doclist.  */
         3489  +        while( iOff<nDoclist ){
         3490  +          u32 nPos;
         3491  +          int nCopy;
         3492  +          iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta);
         3493  +          nCopy = fts5GetVarint32(&pDoclist[iOff], nPos);
         3494  +          nCopy += nPos;
         3495  +          iRowid += iDelta;
         3496  +          
         3497  +          if( bFirstDocid ){
         3498  +            fts5PutU16(&pBuf->p[0], pBuf->n);   /* first docid on page */
         3499  +            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid);
         3500  +            bFirstDocid = 0;
         3501  +          }else{
         3502  +            pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta);
         3503  +          }
         3504  +          assert( pBuf->n<=pBuf->nSpace );
         3505  +
         3506  +          if( (pBuf->n + nCopy) <= pgsz ){
         3507  +            /* The entire poslist will fit on the current leaf. So copy
         3508  +            ** it in one go. */
         3509  +            fts5BufferAppendBlob(&p->rc, pBuf, nCopy, &pDoclist[iOff]);
         3510  +          }else{
         3511  +            /* The entire poslist will not fit on this leaf. So it needs
         3512  +            ** to be broken into sections. The only qualification being
         3513  +            ** that each varint must be stored contiguously.  */
         3514  +            const u8 *pPoslist = &pDoclist[iOff];
         3515  +            int iPos = 0;
         3516  +            while( 1 ){
         3517  +              int nSpace = pgsz - pBuf->n;
         3518  +              int n;
         3519  +              if( (nCopy - iPos)<=nSpace ){
         3520  +                n = nCopy - iPos;
         3521  +              }else{
         3522  +                n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
         3523  +              }
         3524  +              fts5BufferAppendBlob(&p->rc, pBuf, n, &pPoslist[iPos]);
         3525  +              iPos += n;
         3526  +              if( iPos>=nCopy ) break;
         3527  +              fts5WriteFlushLeaf(p, &writer);
         3528  +              pBuf = &writer.aWriter[0].buf;
         3529  +            }
         3530  +            bFirstDocid = 1;
         3531  +          }
         3532  +          assert( pBuf->n<=pgsz );
         3533  +          iOff += nCopy;
         3534  +        }
         3535  +      }
         3536  +
         3537  +      pBuf->p[pBuf->n++] = '\0';
         3538  +      assert( pBuf->n<=pBuf->nSpace );
         3539  +      sqlite3Fts5HashScanNext(pHash);
         3540  +    }
         3541  +    sqlite3Fts5HashClear(pHash);
         3542  +    fts5WriteFinish(p, &writer, &nHeight, &pgnoLast);
  3455   3543   
  3456   3544       /* Update the Fts5Structure. It is written back to the database by the
  3457   3545       ** fts5StructureRelease() call below.  */
  3458   3546       if( pStruct->nLevel==0 ){
  3459   3547         fts5StructureAddLevel(&p->rc, &pStruct);
  3460   3548       }
  3461   3549       fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);