/ Check-in [d44c84c0]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Do not display matches against the right-hand side of a NOT operator in the output of the FTS snippet() or offsets() functions. (CVS 6097)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: d44c84c0f77bd0fc4a9942177b6cae6d109b89b7
User & Date: drh 2009-01-02 01:10:42
Context
2009-01-02
12:35
Increment the version number to 3.6.8. (CVS 6098) check-in: 7509641a user: drh tags: trunk
01:10
Do not display matches against the right-hand side of a NOT operator in the output of the FTS snippet() or offsets() functions. (CVS 6097) check-in: d44c84c0 user: drh tags: trunk
2009-01-01
15:20
Fix a (benign) valgrind error that can occur following malloc failure while executing a 'ROLLBACK TO savepoint' command. (CVS 6096) check-in: 9ff8598f user: danielk1977 tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts3/fts3.c.

3132
3133
3134
3135
3136
3137
3138





3139
3140
3141
3142
3143
3144
3145
3146
3147
....
3156
3157
3158
3159
3160
3161
3162

3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
















3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
....
3213
3214
3215
3216
3217
3218
3219
3220





3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
....
3260
3261
3262
3263
3264
3265
3266


3267
3268
3269
3270
3271

3272
3273
3274
3275
3276
3277
3278
....
3381
3382
3383
3384
3385
3386
3387

3388
3389
3390

3391
3392
3393
3394
3395
3396
3397
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
*/
#define FTS3_ROTOR_SZ   (32)
#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)

/*
** Function to iterate through the tokens of a compiled expression.





*/
static int nextExprToken(Fts3Expr **ppExpr, int *piToken){
  Fts3Expr *p = *ppExpr;
  int iToken = *piToken;
  if( iToken<0 ){
    /* In this case the expression p is the root of an expression tree.
    ** Move to the first token in the expression tree.
    */
    while( p->pLeft ){
................................................................................
      iToken = 0;
      while( p->pParent && p->pParent->pLeft!=p ){
        assert( p->pParent->pRight==p );
        p = p->pParent;
      }
      p = p->pParent;
      if( p ){

        p = p->pRight;
        while( p->pLeft ){
          p = p->pLeft;
        }
      }
    }
  }

  *ppExpr = p;
  *piToken = iToken;
  return p?1:0;
}

















/*
** Add entries to pSnippet->aMatch[] for every match that occurs against
** document zDoc[0..nDoc-1] which is stored in column iColumn.
*/
static void snippetOffsetsOfColumn(
  fulltext_cursor *pCur,
  Snippet *pSnippet,
  int iColumn,
  const char *zDoc,
  int nDoc
){
  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
  fulltext_vtab *pVtab;                /* The full text index */
  int nColumn;                         /* Number of columns in the index */
  int i, j;                            /* Loop counters */
................................................................................
  prevMatch = 0;
  while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){
    Fts3Expr *pIter = pCur->pExpr;
    int iIter = -1;
    iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
    iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
    match = 0;
    for(i=0; i<(FTS3_ROTOR_SZ-1) && nextExprToken(&pIter, &iIter); i++){





      int nPhrase = pIter->pPhrase->nToken;   /* Tokens in current phrase */
      struct PhraseToken *pToken = &pIter->pPhrase->aToken[iIter];
      int iCol = pIter->pPhrase->iColumn;
      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
      if( pToken->n>nToken ) continue;
      if( !pToken->isPrefix && pToken->n<nToken ) continue;
      assert( pToken->n<=nToken );
      if( memcmp(pToken->z, zToken, pToken->n) ) continue;
      if( iIter>0 && (prevMatch & (1<<i))==0 ) continue;
      match |= 1<<i;
................................................................................
** 0, 4 and 5. This function removes the "0" entry (because the first A
** is not near enough to an E).
**
** When this function is called, the value pointed to by parameter piLeft is
** the integer id of the left-most token in the expression tree headed by
** pExpr. This function increments *piLeft by the total number of tokens
** in the expression tree headed by pExpr.


*/
static int trimSnippetOffsets(
  Fts3Expr *pExpr, 
  Snippet *pSnippet,
  int *piLeft

){
  if( pExpr ){
    if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
      return 1;
    }

    switch( pExpr->eType ){
................................................................................

  if( p->snippet.nMatch || p->pExpr==0 ){
    return;
  }
  nColumn = pFts->nColumn;
  iColumn = (p->iCursorType - QUERY_FULLTEXT);
  if( iColumn<0 || iColumn>=nColumn ){

    iFirst = 0;
    iLast = nColumn-1;
  }else{

    iFirst = iColumn;
    iLast = iColumn;
  }
  for(i=iFirst; i<=iLast; i++){
    const char *zDoc;
    int nDoc;
    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);







>
>
>
>
>

|







 







>












>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|
|
|
|
|







 







|
>
>
>
>
>
|
|
|







 







>
>


|
|
<
>







 







>



>







3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
....
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
....
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
....
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299

3300
3301
3302
3303
3304
3305
3306
3307
....
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
*/
#define FTS3_ROTOR_SZ   (32)
#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1)

/*
** Function to iterate through the tokens of a compiled expression.
**
** Except, skip all tokens on the right-hand side of a NOT operator.
** This function is used to find tokens as part of snippet and offset
** generation and we do nt want snippets and offsets to report matches
** for tokens on the RHS of a NOT.
*/
static int fts3NextExprToken(Fts3Expr **ppExpr, int *piToken){
  Fts3Expr *p = *ppExpr;
  int iToken = *piToken;
  if( iToken<0 ){
    /* In this case the expression p is the root of an expression tree.
    ** Move to the first token in the expression tree.
    */
    while( p->pLeft ){
................................................................................
      iToken = 0;
      while( p->pParent && p->pParent->pLeft!=p ){
        assert( p->pParent->pRight==p );
        p = p->pParent;
      }
      p = p->pParent;
      if( p ){
        assert( p->pRight!=0 );
        p = p->pRight;
        while( p->pLeft ){
          p = p->pLeft;
        }
      }
    }
  }

  *ppExpr = p;
  *piToken = iToken;
  return p?1:0;
}

/*
** Return TRUE if the expression node pExpr is located beneath the
** RHS of a NOT operator.
*/
static int fts3ExprBeneathNot(Fts3Expr *p){
  Fts3Expr *pParent;
  while( p ){
    pParent = p->pParent;
    if( pParent && pParent->eType==FTSQUERY_NOT && pParent->pRight==p ){
      return 1;
    }
    p = pParent;
  }
  return 0;
}

/*
** Add entries to pSnippet->aMatch[] for every match that occurs against
** document zDoc[0..nDoc-1] which is stored in column iColumn.
*/
static void snippetOffsetsOfColumn(
  fulltext_cursor *pCur,         /* The fulltest search cursor */
  Snippet *pSnippet,             /* The Snippet object to be filled in */
  int iColumn,                   /* Index of fulltext table column */
  const char *zDoc,              /* Text of the fulltext table column */
  int nDoc                       /* Length of zDoc in bytes */
){
  const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
  sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
  sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
  fulltext_vtab *pVtab;                /* The full text index */
  int nColumn;                         /* Number of columns in the index */
  int i, j;                            /* Loop counters */
................................................................................
  prevMatch = 0;
  while( !pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos) ){
    Fts3Expr *pIter = pCur->pExpr;
    int iIter = -1;
    iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
    iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
    match = 0;
    for(i=0; i<(FTS3_ROTOR_SZ-1) && fts3NextExprToken(&pIter, &iIter); i++){
      int nPhrase;                    /* Number of tokens in current phrase */
      struct PhraseToken *pToken;     /* Current token */
      int iCol;                       /* Column index */

      if( fts3ExprBeneathNot(pIter) ) continue;
      nPhrase = pIter->pPhrase->nToken;
      pToken = &pIter->pPhrase->aToken[iIter];
      iCol = pIter->pPhrase->iColumn;
      if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
      if( pToken->n>nToken ) continue;
      if( !pToken->isPrefix && pToken->n<nToken ) continue;
      assert( pToken->n<=nToken );
      if( memcmp(pToken->z, zToken, pToken->n) ) continue;
      if( iIter>0 && (prevMatch & (1<<i))==0 ) continue;
      match |= 1<<i;
................................................................................
** 0, 4 and 5. This function removes the "0" entry (because the first A
** is not near enough to an E).
**
** When this function is called, the value pointed to by parameter piLeft is
** the integer id of the left-most token in the expression tree headed by
** pExpr. This function increments *piLeft by the total number of tokens
** in the expression tree headed by pExpr.
**
** Return 1 if any trimming occurs.  Return 0 if no trimming is required.
*/
static int trimSnippetOffsets(
  Fts3Expr *pExpr,      /* The search expression */
  Snippet *pSnippet,    /* The set of snippet offsets to be trimmed */

  int *piLeft           /* Index of left-most token in pExpr */
){
  if( pExpr ){
    if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){
      return 1;
    }

    switch( pExpr->eType ){
................................................................................

  if( p->snippet.nMatch || p->pExpr==0 ){
    return;
  }
  nColumn = pFts->nColumn;
  iColumn = (p->iCursorType - QUERY_FULLTEXT);
  if( iColumn<0 || iColumn>=nColumn ){
    /* Look for matches over all columns of the full-text index */
    iFirst = 0;
    iLast = nColumn-1;
  }else{
    /* Look for matches in the iColumn-th column of the index only */
    iFirst = iColumn;
    iLast = iColumn;
  }
  for(i=iFirst; i<=iLast; i++){
    const char *zDoc;
    int nDoc;
    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);