/ Check-in [1b9918e2]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Refactor PLWriter in preparation for buffered-document change. Currently, PLWriter (Position List Writer) creates a locally-owned DataBuffer to write into. This is necessary to support doclist collection during tokenization, where there is no obvious buffer to write output to, but is not necessary for the other users of PLWriter. This change adds a DLCollector (Doc List Collector) structure to handle the tokenization case.

Also fix a potential memory leak in writeZeroSegment(). In case of error from leafWriterStep(), the DataBuffer dl was being leaked. (CVS 3706)

Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 1b9918e20767aebc9c1e7523027139e5fbc12688
User & Date: shess 2007-03-20 23:52:38
Context
2007-03-22
00:14
Refactor PLWriter to remove owned buffer. DLCollector (Document List Collector) now handles the case where PLWriter (Position List Writer) needed a local buffer. Change to using the associated DLWriter (Document List Writer) buffer, which reduces the number of memory copies needed in doclist processing, and brings PLWriter operation in line with DLWriter operation. (CVS 3707) check-in: d04fa3a1 user: shess tags: trunk
2007-03-20
23:52
Refactor PLWriter in preparation for buffered-document change. Currently, PLWriter (Position List Writer) creates a locally-owned DataBuffer to write into. This is necessary to support doclist collection during tokenization, where there is no obvious buffer to write output to, but is not necessary for the other users of PLWriter. This change adds a DLCollector (Doc List Collector) structure to handle the tokenization case.

Also fix a potential memory leak in writeZeroSegment(). In case of error from leafWriterStep(), the DataBuffer dl was being leaked. (CVS 3706) check-in: 1b9918e2 user: shess tags: trunk

2007-03-19
17:44
Modify the interface to the pager sub-system in preparation for performing IO in blocks based on sector-size, not database page-size. (CVS 3705) check-in: 7dc76588 user: danielk1977 tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts2/fts2.c.

   938    938     pWriter->iPos = 0;
   939    939     pWriter->iOffset = 0;
   940    940   }
   941    941   static void plwInit(PLWriter *pWriter, sqlite_int64 iDocid, DocListType iType){
   942    942     dataBufferInit(&pWriter->b, 0);
   943    943     plwReset(pWriter, iDocid, iType);
   944    944   }
   945         -static PLWriter *plwNew(sqlite_int64 iDocid, DocListType iType){
   946         -  PLWriter *pWriter = malloc(sizeof(PLWriter));
   947         -  plwInit(pWriter, iDocid, iType);
   948         -  return pWriter;
   949         -}
   950    945   static void plwDestroy(PLWriter *pWriter){
   951    946     dataBufferDestroy(&pWriter->b);
   952    947     SCRAMBLE(pWriter);
   953    948   }
   954         -static void plwDelete(PLWriter *pWriter){
   955         -  plwDestroy(pWriter);
   956         -  free(pWriter);
          949  +
          950  +/*******************************************************************/
          951  +/* DLCollector wraps PLWriter and DLWriter to provide a
          952  +** dynamically-allocated doclist area to use during tokenization.
          953  +**
          954  +** dlcNew - malloc up and initialize a collector.
          955  +** dlcDelete - destroy a collector and all contained items.
          956  +** dlcAddPos - append position and offset information.
          957  +** dlcAddDoclist - add the collected doclist to the given buffer.
          958  +*/
          959  +typedef struct DLCollector {
          960  +  PLWriter plw;
          961  +} DLCollector;
          962  +
          963  +static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){
          964  +  DLWriter dlw;
          965  +  dlwInit(&dlw, pCollector->plw.iType, b);
          966  +  plwDlwAdd(&pCollector->plw, &dlw);
          967  +  dlwDestroy(&dlw);
          968  +}
          969  +static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,
          970  +                      int iStartOffset, int iEndOffset){
          971  +  plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);
          972  +}
          973  +
          974  +static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){
          975  +  DLCollector *pCollector = malloc(sizeof(DLCollector));
          976  +  plwInit(&pCollector->plw, iDocid, iType);
          977  +  return pCollector;
          978  +}
          979  +static void dlcDelete(DLCollector *pCollector){
          980  +  plwDestroy(&pCollector->plw);
          981  +  SCRAMBLE(pCollector);
          982  +  free(pCollector);
   957    983   }
   958    984   
   959    985   
   960    986   /* Copy the doclist data of iType in pData/nData into *out, trimming
   961    987   ** unnecessary data as we go.  Only columns matching iColumn are
   962    988   ** copied, all columns copied if iColimn is -1.  Elements with no
   963    989   ** matching columns are dropped.  The output is an iOutType doclist.
................................................................................
  3529   3555     if( rc!=SQLITE_OK ) return rc;
  3530   3556   
  3531   3557     pCursor->pTokenizer = pTokenizer;
  3532   3558     while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
  3533   3559                                                  &pToken, &nTokenBytes,
  3534   3560                                                  &iStartOffset, &iEndOffset,
  3535   3561                                                  &iPosition) ){
  3536         -    PLWriter *p;
         3562  +    DLCollector *p;
  3537   3563   
  3538   3564       /* Positions can't be negative; we use -1 as a terminator internally. */
  3539   3565       if( iPosition<0 ){
  3540   3566         pTokenizer->pModule->xClose(pCursor);
  3541   3567         return SQLITE_ERROR;
  3542   3568       }
  3543   3569   
  3544   3570       p = fts2HashFind(terms, pToken, nTokenBytes);
  3545   3571       if( p==NULL ){
  3546         -      p = plwNew(iDocid, DL_DEFAULT);
         3572  +      p = dlcNew(iDocid, DL_DEFAULT);
  3547   3573         fts2HashInsert(terms, pToken, nTokenBytes, p);
  3548   3574       }
  3549   3575       if( iColumn>=0 ){
  3550         -      plwAdd(p, iColumn, iPosition, iStartOffset, iEndOffset);
         3576  +      dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset);
  3551   3577       }
  3552   3578     }
  3553   3579   
  3554   3580     /* TODO(shess) Check return?  Should this be able to cause errors at
  3555   3581     ** this point?  Actually, same question about sqlite3_finalize(),
  3556   3582     ** though one could argue that failure there means that the data is
  3557   3583     ** not durable.  *ponder*
................................................................................
  5041   5067   }
  5042   5068   
  5043   5069   /****************************************************************/
  5044   5070   /* Used to hold hashtable data for sorting. */
  5045   5071   typedef struct TermData {
  5046   5072     const char *pTerm;
  5047   5073     int nTerm;
  5048         -  PLWriter *pWriter;
         5074  +  DLCollector *pCollector;
  5049   5075   } TermData;
  5050   5076   
  5051   5077   /* Orders TermData elements in strcmp fashion ( <0 for less-than, 0
  5052   5078   ** for equal, >0 for greater-than).
  5053   5079   */
  5054   5080   static int termDataCmp(const void *av, const void *bv){
  5055   5081     const TermData *a = (const TermData *)av;
................................................................................
  5077   5103     n = fts2HashCount(pTerms);
  5078   5104     pData = malloc(n*sizeof(TermData));
  5079   5105   
  5080   5106     for(i = 0, e = fts2HashFirst(pTerms); e; i++, e = fts2HashNext(e)){
  5081   5107       assert( i<n );
  5082   5108       pData[i].pTerm = fts2HashKey(e);
  5083   5109       pData[i].nTerm = fts2HashKeysize(e);
  5084         -    pData[i].pWriter = fts2HashData(e);
         5110  +    pData[i].pCollector = fts2HashData(e);
  5085   5111     }
  5086   5112     assert( i==n );
  5087   5113   
  5088   5114     /* TODO(shess) Should we allow user-defined collation sequences,
  5089   5115     ** here?  I think we only need that once we support prefix searches.
  5090   5116     */
  5091   5117     if( n>1 ) qsort(pData, n, sizeof(*pData), termDataCmp);
................................................................................
  5092   5118   
  5093   5119     /* TODO(shess) Refactor so that we can write directly to the segment
  5094   5120     ** DataBuffer, as happens for segment merges.
  5095   5121     */
  5096   5122     leafWriterInit(0, idx, &writer);
  5097   5123     dataBufferInit(&dl, 0);
  5098   5124     for(i=0; i<n; i++){
  5099         -    DLWriter dlw;
  5100   5125       dataBufferReset(&dl);
  5101         -    dlwInit(&dlw, DL_DEFAULT, &dl);
  5102         -    plwDlwAdd(pData[i].pWriter, &dlw);
         5126  +    dlcAddDoclist(pData[i].pCollector, &dl);
  5103   5127       rc = leafWriterStep(v, &writer,
  5104   5128                           pData[i].pTerm, pData[i].nTerm, dl.pData, dl.nData);
  5105         -    dlwDestroy(&dlw);
  5106   5129       if( rc!=SQLITE_OK ) goto err;
  5107   5130     }
  5108         -  dataBufferDestroy(&dl);
  5109   5131     rc = leafWriterFinalize(v, &writer);
  5110   5132   
  5111   5133    err:
         5134  +  dataBufferDestroy(&dl);
  5112   5135     free(pData);
  5113   5136     leafWriterDestroy(&writer);
  5114   5137     return rc;
  5115   5138   }
  5116   5139   
  5117   5140   /* This function implements the xUpdate callback; it's the top-level entry
  5118   5141    * point for inserting, deleting or updating a row in a full-text table. */
................................................................................
  5154   5177       rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
  5155   5178     }
  5156   5179   
  5157   5180     if( rc==SQLITE_OK ) rc = writeZeroSegment(v, &terms);
  5158   5181   
  5159   5182     /* clean up */
  5160   5183     for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){
  5161         -    plwDelete(fts2HashData(e));
         5184  +    dlcDelete(fts2HashData(e));
  5162   5185     }
  5163   5186     fts2HashClear(&terms);
  5164   5187   
  5165   5188     return rc;
  5166   5189   }
  5167   5190   
  5168   5191   /*