/ Check-in [fdcea7b1]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the option to omit offset information from posting lists in FTS1. (CVS 3456)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: fdcea7b1ffd821f3f2b6d30997d3957f705a6d0c
User & Date: drh 2006-10-03 11:42:29
Context
2006-10-03
12:04
Fix sqlite3_analyzer so that it works on databases containing virtual tables. (CVS 3457) check-in: 47c8567f user: drh tags: trunk
11:42
Add the option to omit offset information from posting lists in FTS1. (CVS 3456) check-in: fdcea7b1 user: drh tags: trunk
2006-10-01
20:41
Another typo in the Porter stemmer check-in. (CVS 3455) check-in: 6696bda1 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts1/fts1.c.

   175    175   
   176    176   typedef enum DocListType {
   177    177     DL_DOCIDS,              /* docids only */
   178    178     DL_POSITIONS,           /* docids + positions */
   179    179     DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
   180    180   } DocListType;
   181    181   
          182  +/*
          183  +** By default, positions and offsets are stored in the doclists.
          184  +** To change this so that only positions are stored, compile
          185  +** with
          186  +**
          187  +**          -DDL_DEFAULT=DL_POSITIONS
          188  +**
          189  +*/
          190  +#ifndef DL_DEFAULT
          191  +# define DL_DEFAULT DL_POSITIONS_OFFSETS
          192  +#endif
          193  +
   182    194   typedef struct DocList {
   183    195     char *pData;
   184    196     int nData;
   185    197     DocListType iType;
   186    198     int iLastColumn;    /* the last column written */
   187    199     int iLastPos;       /* the last position written */
   188    200     int iLastOffset;    /* the last start offset written */
................................................................................
   269    281   /* Add a position to the last position list in a doclist. */
   270    282   static void docListAddPos(DocList *d, int iColumn, int iPos){
   271    283     assert( d->iType==DL_POSITIONS );
   272    284     addPos(d, iColumn, iPos);
   273    285     appendVarint(d, POS_END);  /* add new terminator */
   274    286   }
   275    287   
   276         -static void docListAddPosOffset(DocList *d, int iColumn, int iPos,
   277         -                                int iStartOffset, int iEndOffset){
   278         -  assert( d->iType==DL_POSITIONS_OFFSETS );
          288  +/*
          289  +** Add a position and starting and ending offsets to a doclist.
          290  +**
          291  +** If the doclist is setup to handle only positions, then insert
          292  +** the position only and ignore the offsets.
          293  +*/
          294  +static void docListAddPosOffset(
          295  +  DocList *d,             /* Doclist under construction */
          296  +  int iColumn,            /* Column the inserted term is part of */
          297  +  int iPos,               /* Position of the inserted term */
          298  +  int iStartOffset,       /* Starting offset of inserted term */
          299  +  int iEndOffset          /* Ending offset of inserted term */
          300  +){
          301  +  assert( d->iType>=DL_POSITIONS );
   279    302     addPos(d, iColumn, iPos);
   280         -
   281         -  assert( iStartOffset>=d->iLastOffset );
   282         -  appendVarint(d, iStartOffset-d->iLastOffset);
   283         -  d->iLastOffset = iStartOffset;
   284         -
   285         -  assert( iEndOffset>=iStartOffset );
   286         -  appendVarint(d, iEndOffset-iStartOffset);
   287         -
          303  +  if( d->iType==DL_POSITIONS_OFFSETS ){
          304  +    assert( iStartOffset>=d->iLastOffset );
          305  +    appendVarint(d, iStartOffset-d->iLastOffset);
          306  +    d->iLastOffset = iStartOffset;
          307  +    assert( iEndOffset>=iStartOffset );
          308  +    appendVarint(d, iEndOffset-iStartOffset);
          309  +  }
   288    310     appendVarint(d, POS_END);  /* add new terminator */
   289    311   }
   290    312   
   291    313   /*
   292    314   ** A DocListReader object is a cursor into a doclist.  Initialize
   293    315   ** the cursor to the beginning of the doclist by calling readerInit().
   294    316   ** Then use routines
................................................................................
  1295   1317     rc = sqlite3_bind_int(s, 2, iSegment);
  1296   1318     if( rc!=SQLITE_OK ) return rc;
  1297   1319   
  1298   1320     rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
  1299   1321     if( rc!=SQLITE_ROW ) return rc;
  1300   1322   
  1301   1323     *rowid = sqlite3_column_int64(s, 0);
  1302         -  docListInit(out, DL_POSITIONS_OFFSETS,
         1324  +  docListInit(out, DL_DEFAULT,
  1303   1325                 sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
  1304   1326   
  1305   1327     /* We expect only one row.  We must execute another sqlite3_step()
  1306   1328      * to complete the iteration; otherwise the table will remain locked. */
  1307   1329     rc = sqlite3_step(s);
  1308   1330     return rc==SQLITE_DONE ? SQLITE_ROW : rc;
  1309   1331   }
................................................................................
  1330   1352     sqlite3_stmt *s;
  1331   1353     int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
  1332   1354     if( rc!=SQLITE_OK ) return rc;
  1333   1355   
  1334   1356     rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  1335   1357     if( rc!=SQLITE_OK ) return rc;
  1336   1358   
  1337         -  docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);
         1359  +  docListInit(&doclist, DL_DEFAULT, 0, 0);
  1338   1360   
  1339   1361     /* TODO(shess) Handle schema and busy errors. */
  1340   1362     while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
  1341   1363       DocList old;
  1342   1364   
  1343   1365       /* TODO(shess) If we processed doclists from oldest to newest, we
  1344   1366       ** could skip the malloc() involved with the following call.  For
................................................................................
  2913   2935       if( iPosition<0 ){
  2914   2936         pTokenizer->pModule->xClose(pCursor);
  2915   2937         return SQLITE_ERROR;
  2916   2938       }
  2917   2939   
  2918   2940       p = fts1HashFind(terms, pToken, nTokenBytes);
  2919   2941       if( p==NULL ){
  2920         -      p = docListNew(DL_POSITIONS_OFFSETS);
         2942  +      p = docListNew(DL_DEFAULT);
  2921   2943         docListAddDocid(p, iDocid);
  2922   2944         fts1HashInsert(terms, pToken, nTokenBytes, p);
  2923   2945       }
  2924   2946       if( iColumn>=0 ){
  2925   2947         docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
  2926   2948       }
  2927   2949     }
................................................................................
  2940   2962                                DocList *d){
  2941   2963     sqlite_int64 iIndexRow;
  2942   2964     DocList doclist;
  2943   2965     int iSegment = 0, rc;
  2944   2966   
  2945   2967     rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
  2946   2968     if( rc==SQLITE_DONE ){
  2947         -    docListInit(&doclist, DL_POSITIONS_OFFSETS, 0, 0);
         2969  +    docListInit(&doclist, DL_DEFAULT, 0, 0);
  2948   2970       docListUpdate(&doclist, d);
  2949   2971       /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
  2950   2972       rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
  2951   2973       goto err;
  2952   2974     }
  2953   2975     if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
  2954   2976