SQLite

Check-in [aef7720e0b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add the NEAR operator to fts3. (CVS 4502)
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: aef7720e0bb49d52332ddebe6f698feb926ef7d7
User & Date: danielk1977 2007-10-22 18:02:20.000
Context
2007-10-23
08:17
Fix an error message in the tcl interface. (CVS 4503) (check-in: 2449e08069 user: danielk1977 tags: trunk)
2007-10-22
18:02
Add the NEAR operator to fts3. (CVS 4502) (check-in: aef7720e0b user: danielk1977 tags: trunk)
2007-10-21
22:59
We need an extra define to activate OS/2 semaphores for compiling/linking. (CVS 4501) (check-in: 0604dace0e user: pweilbacher tags: trunk)
Changes
Unified Diff Ignore Whitespace Patch
Changes to ext/fts3/fts3.c.
284
285
286
287
288
289
290

291
292

293
294
295
296
297
298
299
#include <string.h>
#include <ctype.h>

#include "fts3.h"
#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#include "sqlite3.h"

#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1



/* TODO(shess) MAN, this thing needs some refactoring.  At minimum, it
** would be nice to order the file better, perhaps something along the
** lines of:
**
**  - utility functions







>
|
|
>







284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
#include <string.h>
#include <ctype.h>

#include "fts3.h"
#include "fts3_hash.h"
#include "fts3_tokenizer.h"
#include "sqlite3.h"
#ifndef SQLITE_CORE 
  #include "sqlite3ext.h"
  SQLITE_EXTENSION_INIT1
#endif


/* TODO(shess) MAN, this thing needs some refactoring.  At minimum, it
** would be nice to order the file better, perhaps something along the
** lines of:
**
**  - utility functions
307
308
309
310
311
312
313





314
315
316
317
318
319
320

#if 0
# define TRACE(A)  printf A; fflush(stdout)
#else
# define TRACE(A)
#endif






/* It is not safe to call isspace(), tolower(), or isalnum() on
** hi-bit-set characters.  This is the same solution used in the
** tokenizer.
*/
/* TODO(shess) The snippet-generation code should be using the
** tokenizer-generated tokens rather than doing its own local
** tokenization.







>
>
>
>
>







309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327

#if 0
# define TRACE(A)  printf A; fflush(stdout)
#else
# define TRACE(A)
#endif

/*
** Default span for NEAR operators.
*/
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10

/* It is not safe to call isspace(), tolower(), or isalnum() on
** hi-bit-set characters.  This is the same solution used in the
** tokenizer.
*/
/* TODO(shess) The snippet-generation code should be using the
** tokenizer-generated tokens rather than doing its own local
** tokenization.
1361
1362
1363
1364
1365
1366
1367




1368





1369
1370
1371
1372
1373
1374
1375
1376
1377

1378


1379
1380




1381

1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401

1402
1403
1404
1405

1406



1407

1408

1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420




























1421

1422

1423
1424
1425












1426
1427
1428
1429
1430
1431
1432


1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452


















1453




































1454
1455
1456
1457
1458
1459
1460
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
  dlwDestroy(&writer);
}





/* pLeft and pRight are DLReaders positioned to the same docid.





**
** If there are no instances in pLeft or pRight where the position
** of pLeft is one less than the position of pRight, then this
** routine adds nothing to pOut.
**
** If there are one or more instances where positions from pLeft
** are exactly one less than positions from pRight, then add a new
** document record to pOut.  If pOut wants to hold positions, then
** include the positions from pRight that are one more than a

** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.


*/
static void posListPhraseMerge(DLReader *pLeft, DLReader *pRight,




                               DLWriter *pOut){

  PLReader left, right;
  PLWriter writer;
  int match = 0;

  assert( dlrDocid(pLeft)==dlrDocid(pRight) );
  assert( pOut->iType!=DL_POSITIONS_OFFSETS );

  plrInit(&left, pLeft);
  plrInit(&right, pRight);

  while( !plrAtEnd(&left) && !plrAtEnd(&right) ){
    if( plrColumn(&left)<plrColumn(&right) ){
      plrStep(&left);
    }else if( plrColumn(&left)>plrColumn(&right) ){
      plrStep(&right);
    }else if( plrPosition(&left)+1<plrPosition(&right) ){
      plrStep(&left);
    }else if( plrPosition(&left)+1>plrPosition(&right) ){
      plrStep(&right);
    }else{

      if( !match ){
        plwInit(&writer, pOut, dlrDocid(pLeft));
        match = 1;
      }

      plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);



      plrStep(&left);

      plrStep(&right);

    }
  }

  if( match ){
    plwTerminate(&writer);
    plwDestroy(&writer);
  }

  plrDestroy(&left);
  plrDestroy(&right);
}





























/* We have two doclists with positions:  pLeft and pRight.

** Write the phrase intersection of these two doclists into pOut.

**
** A phrase intersection means that two documents only match
** if pLeft.iPos+1==pRight.iPos.












**
** iType controls the type of data written to pOut.  If iType is
** DL_POSITIONS, the positions are those from pRight.
*/
static void docListPhraseMerge(
  const char *pLeft, int nLeft,
  const char *pRight, int nRight,


  DocListType iType,
  DataBuffer *pOut      /* Write the combined doclist here */
){
  DLReader left, right;
  DLWriter writer;

  if( nLeft==0 || nRight==0 ) return;

  assert( iType!=DL_POSITIONS_OFFSETS );

  dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
  dlrInit(&right, DL_POSITIONS, pRight, nRight);
  dlwInit(&writer, iType, pOut);

  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
    if( dlrDocid(&left)<dlrDocid(&right) ){
      dlrStep(&left);
    }else if( dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }else{


















      posListPhraseMerge(&left, &right, &writer);




































      dlrStep(&left);
      dlrStep(&right);
    }
  }

  dlrDestroy(&left);
  dlrDestroy(&right);







>
>
>
>
|
>
>
>
>
>

|
|
<

<
<
|
|
>
|
>
>

|
>
>
>
>
|
>















|
<
<


>
|
|
|
|
>
|
>
>
>
|
>
|
>












>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
|
>



>
>
>
>
>
>
>
>
>
>
>
>







>
>
|



















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387

1388


1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418


1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
  dlwDestroy(&writer);
}

/* 
** This function is used as part of the implementation of phrase and
** NEAR matching.
**
** pLeft and pRight are DLReaders positioned to the same docid in
** lists of type DL_POSITION. This function writes an entry to the
** DLWriter pOut for each position in pRight that is less than
** (nNear+1) greater (but not equal to or smaller) than a position 
** in pLeft. For example, if nNear is 0, and the positions contained
** by pLeft and pRight are:
**
**    pLeft:  5 10 15 20
**    pRight: 6  9 17 21

**


** then the docid is added to pOut. If pOut is of type DL_POSITIONS,
** then a positionids "6" and "21" are also added to pOut.
**
** If boolean argument isSaveLeft is true, then positionids are copied
** from pLeft instead of pRight. In the example above, the positions "5"
** and "20" would be added instead of "6" and "21".
*/
static void posListPhraseMerge(
  DLReader *pLeft, 
  DLReader *pRight,
  int nNear,
  int isSaveLeft,
  DLWriter *pOut
){
  PLReader left, right;
  PLWriter writer;
  int match = 0;

  assert( dlrDocid(pLeft)==dlrDocid(pRight) );
  assert( pOut->iType!=DL_POSITIONS_OFFSETS );

  plrInit(&left, pLeft);
  plrInit(&right, pRight);

  while( !plrAtEnd(&left) && !plrAtEnd(&right) ){
    if( plrColumn(&left)<plrColumn(&right) ){
      plrStep(&left);
    }else if( plrColumn(&left)>plrColumn(&right) ){
      plrStep(&right);
    }else if( plrPosition(&left)>=plrPosition(&right) ){


      plrStep(&right);
    }else{
      if( (plrPosition(&right)-plrPosition(&left))<=(nNear+1) ){
        if( !match ){
          plwInit(&writer, pOut, dlrDocid(pLeft));
          match = 1;
        }
        if( !isSaveLeft ){
          plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
        }else{
          plwAdd(&writer, plrColumn(&left), plrPosition(&left), 0, 0);
        }
        plrStep(&right);
      }else{
        plrStep(&left);
      }
    }
  }

  if( match ){
    plwTerminate(&writer);
    plwDestroy(&writer);
  }

  plrDestroy(&left);
  plrDestroy(&right);
}

/*
** Compare the values pointed to by the PLReaders passed as arguments. 
** Return -1 if the value pointed to by pLeft is considered less than
** the value pointed to by pRight, +1 if it is considered greater
** than it, or 0 if it is equal. i.e.
**
**     (*pLeft - *pRight)
**
** A PLReader that is in the EOF condition is considered greater than
** any other. If neither argument is in EOF state, the return value of
** plrColumn() is used. If the plrColumn() values are equal, the
** comparison is on the basis of plrPosition().
*/
static int plrCompare(PLReader *pLeft, PLReader *pRight){
  assert(!plrAtEnd(pLeft) || !plrAtEnd(pRight));

  if( plrAtEnd(pRight) || plrAtEnd(pLeft) ){
    return (plrAtEnd(pRight) ? -1 : 1);
  }
  if( plrColumn(pLeft)!=plrColumn(pRight) ){
    return ((plrColumn(pLeft)<plrColumn(pRight)) ? -1 : 1);
  }
  if( plrPosition(pLeft)!=plrPosition(pRight) ){
    return ((plrPosition(pLeft)<plrPosition(pRight)) ? -1 : 1);
  }
  return 0;
}

/* We have two doclists with positions:  pLeft and pRight. Depending
** on the value of the nNear parameter, perform either a phrase
** intersection (if nNear==0) or a NEAR intersection (if nNear>0)
** and write the results into pOut.
**
** A phrase intersection means that two documents only match
** if pLeft.iPos+1==pRight.iPos.
**
** A NEAR intersection means that two documents only match if 
** (abs(pLeft.iPos-pRight.iPos)<nNear).
**
** If a NEAR intersection is requested, then the nPhrase argument should
** be passed the number of tokens in the two operands to the NEAR operator
** combined. For example:
**
**       Query syntax               nPhrase
**      ------------------------------------
**       "A B C" NEAR "D E"         5
**       A NEAR B                   2
**
** iType controls the type of data written to pOut.  If iType is
** DL_POSITIONS, the positions are those from pRight.
*/
static void docListPhraseMerge(
  const char *pLeft, int nLeft,
  const char *pRight, int nRight,
  int nNear,            /* 0 for a phrase merge, non-zero for a NEAR merge */
  int nPhrase,          /* Number of tokens in left+right operands to NEAR */
  DocListType iType,    /* Type of doclist to write to pOut */
  DataBuffer *pOut      /* Write the combined doclist here */
){
  DLReader left, right;
  DLWriter writer;

  if( nLeft==0 || nRight==0 ) return;

  assert( iType!=DL_POSITIONS_OFFSETS );

  dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
  dlrInit(&right, DL_POSITIONS, pRight, nRight);
  dlwInit(&writer, iType, pOut);

  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
    if( dlrDocid(&left)<dlrDocid(&right) ){
      dlrStep(&left);
    }else if( dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }else{
      if( nNear==0 ){
        posListPhraseMerge(&left, &right, 0, 0, &writer);
      }else{
        /* This case occurs when two terms (simple terms or phrases) are
         * connected by a NEAR operator, span (nNear+1). i.e.
         *
         *     '"terrible company" NEAR widget'
         */
        DataBuffer one = {0, 0, 0};
        DataBuffer two = {0, 0, 0};

        DLWriter dlwriter2;
        DLReader dr1 = {0, 0, 0, 0, 0}; 
        DLReader dr2 = {0, 0, 0, 0, 0};

        dlwInit(&dlwriter2, iType, &one);
        posListPhraseMerge(&right, &left, nNear-3+nPhrase, 1, &dlwriter2);
        dlwInit(&dlwriter2, iType, &two);
        posListPhraseMerge(&left, &right, nNear-1, 0, &dlwriter2);

        if( one.nData) dlrInit(&dr1, iType, one.pData, one.nData);
        if( two.nData) dlrInit(&dr2, iType, two.pData, two.nData);

        if( !dlrAtEnd(&dr1) || !dlrAtEnd(&dr2) ){
          PLReader pr1 = {0};
          PLReader pr2 = {0};

          PLWriter plwriter;
          plwInit(&plwriter, &writer, dlrDocid(dlrAtEnd(&dr1)?&dr2:&dr1));

          if( one.nData ) plrInit(&pr1, &dr1);
          if( two.nData ) plrInit(&pr2, &dr2);
          while( !plrAtEnd(&pr1) || !plrAtEnd(&pr2) ){
            int iCompare = plrCompare(&pr1, &pr2);
            switch( iCompare ){
              case -1:
                plwCopy(&plwriter, &pr1);
                plrStep(&pr1);
                break;
              case 1:
                plwCopy(&plwriter, &pr2);
                plrStep(&pr2);
                break;
              case 0:
                plwCopy(&plwriter, &pr1);
                plrStep(&pr1);
                plrStep(&pr2);
                break;
            }
          }
          plwTerminate(&plwriter);
        }
        dataBufferDestroy(&one);
        dataBufferDestroy(&two);
      }
      dlrStep(&left);
      dlrStep(&right);
    }
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
1656
1657
1658
1659
1660
1661
1662
1663





























1664
1665
1666
1667
1668

1669
1670
1671
1672
1673
1674
1675

/* end utility functions */

/* Forward reference */
typedef struct fulltext_vtab fulltext_vtab;

/* A single term in a query is represented by an instances of
** the following structure.





























*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */
  short int iPhrase; /* This is the i-th term of a phrase. */
  short int iColumn; /* Column of the index that must match this term */

  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */
  signed char isPrefix; /* this term is followed by "*" */
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;








|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





>







1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829

/* end utility functions */

/* Forward reference */
typedef struct fulltext_vtab fulltext_vtab;

/* A single term in a query is represented by an instances of
** the following structure. Each word which may match against
** document content is a term. Operators, like NEAR or OR, are
** not terms. Query terms are organized as a flat list stored
** in the Query.pTerms array.
**
** If the QueryTerm.nPhrase variable is non-zero, then the QueryTerm
** is the first in a contiguous string of terms that are either part
** of the same phrase, or connected by the NEAR operator.
**
** If the QueryTerm.nNear variable is non-zero, then the token is followed 
** by a NEAR operator with span set to (nNear-1). For example, the 
** following query:
**
** The QueryTerm.iPhrase variable stores the index of the token within
** it's phrase, indexed starting at 1, or 1 if the token is not part 
** of any phrase.
**
** For example, the data structure used to represent the following query:
**
**     ... MATCH 'sqlite NEAR/5 google NEAR/2 "search engine"'
**
** is:
**
**     {nPhrase=4, iPhrase=1, nNear=6, pTerm="sqlite"},
**     {nPhrase=0, iPhrase=1, nNear=3, pTerm="google"},
**     {nPhrase=0, iPhrase=1, nNear=0, pTerm="search"},
**     {nPhrase=0, iPhrase=2, nNear=0, pTerm="engine"},
**
** compiling the FTS3 syntax to Query structures is done by the parseQuery()
** function.
*/
typedef struct QueryTerm {
  short int nPhrase; /* How many following terms are part of the same phrase */
  short int iPhrase; /* This is the i-th term of a phrase. */
  short int iColumn; /* Column of the index that must match this term */
  signed char nNear; /* term followed by a NEAR operator with span=(nNear-1) */
  signed char isOr;  /* this term is preceded by "OR" */
  signed char isNot; /* this term is preceded by "-" */
  signed char isPrefix; /* this term is followed by "*" */
  char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
  int nTerm;         /* Number of bytes in pTerm[] */
} QueryTerm;

1703
1704
1705
1706
1707
1708
1709

1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725

1726
1727
1728
1729
1730
1731
1732
 *
 */
typedef struct Query {
  fulltext_vtab *pFts;  /* The full text index */
  int nTerms;           /* Number of terms in the query */
  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
  int nextIsOr;         /* Set the isOr flag on the next inserted term */

  int nextColumn;       /* Next word parsed must be in this column */
  int dfltColumn;       /* The default column */
} Query;


/*
** An instance of the following structure keeps track of generated
** matching-word offset information and snippets.
*/
typedef struct Snippet {
  int nMatch;     /* Total number of matches */
  int nAlloc;     /* Space allocated for aMatch[] */
  struct snippetMatch { /* One entry for each matching term */
    char snStatus;       /* Status flag for use while constructing snippets */
    short int iCol;      /* The column that contains the match */
    short int iTerm;     /* The index in Query.pTerms[] of the matching term */

    short int nByte;     /* Number of bytes in the term */
    int iStart;          /* The offset to the first character of the term */
  } *aMatch;      /* Points to space obtained from malloc */
  char *zOffset;  /* Text rendering of aMatch[] */
  int nOffset;    /* strlen(zOffset) */
  char *zSnippet; /* Snippet text */
  int nSnippet;   /* strlen(zSnippet) */







>
















>







1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
 *
 */
typedef struct Query {
  fulltext_vtab *pFts;  /* The full text index */
  int nTerms;           /* Number of terms in the query */
  QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
  int nextIsOr;         /* Set the isOr flag on the next inserted term */
  int nextIsNear;       /* Set the isOr flag on the next inserted term */
  int nextColumn;       /* Next word parsed must be in this column */
  int dfltColumn;       /* The default column */
} Query;


/*
** An instance of the following structure keeps track of generated
** matching-word offset information and snippets.
*/
typedef struct Snippet {
  int nMatch;     /* Total number of matches */
  int nAlloc;     /* Space allocated for aMatch[] */
  struct snippetMatch { /* One entry for each matching term */
    char snStatus;       /* Status flag for use while constructing snippets */
    short int iCol;      /* The column that contains the match */
    short int iTerm;     /* The index in Query.pTerms[] of the matching term */
    int iToken;          /* The index of the matching document token */
    short int nByte;     /* Number of bytes in the term */
    int iStart;          /* The offset to the first character of the term */
  } *aMatch;      /* Points to space obtained from malloc */
  char *zOffset;  /* Text rendering of aMatch[] */
  int nOffset;    /* strlen(zOffset) */
  char *zSnippet; /* Snippet text */
  int nSnippet;   /* strlen(zSnippet) */
2948
2949
2950
2951
2952
2953
2954

2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971

2972
2973
2974
2975
2976
2977
2978
}
/*
** Append a single entry to the p->aMatch[] log.
*/
static void snippetAppendMatch(
  Snippet *p,               /* Append the entry to this snippet */
  int iCol, int iTerm,      /* The column and query term */

  int iStart, int nByte     /* Offset and size of the match */
){
  int i;
  struct snippetMatch *pMatch;
  if( p->nMatch+1>=p->nAlloc ){
    p->nAlloc = p->nAlloc*2 + 10;
    p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
    if( p->aMatch==0 ){
      p->nMatch = 0;
      p->nAlloc = 0;
      return;
    }
  }
  i = p->nMatch++;
  pMatch = &p->aMatch[i];
  pMatch->iCol = iCol;
  pMatch->iTerm = iTerm;

  pMatch->iStart = iStart;
  pMatch->nByte = nByte;
}

/*
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
*/







>

















>







3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
}
/*
** Append a single entry to the p->aMatch[] log.
*/
static void snippetAppendMatch(
  Snippet *p,               /* Append the entry to this snippet */
  int iCol, int iTerm,      /* The column and query term */
  int iToken,               /* Matching token in document */
  int iStart, int nByte     /* Offset and size of the match */
){
  int i;
  struct snippetMatch *pMatch;
  if( p->nMatch+1>=p->nAlloc ){
    p->nAlloc = p->nAlloc*2 + 10;
    p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
    if( p->aMatch==0 ){
      p->nMatch = 0;
      p->nAlloc = 0;
      return;
    }
  }
  i = p->nMatch++;
  pMatch = &p->aMatch[i];
  pMatch->iCol = iCol;
  pMatch->iTerm = iTerm;
  pMatch->iToken = iToken;
  pMatch->iStart = iStart;
  pMatch->nByte = nByte;
}

/*
** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
*/
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055


































































































3056
3057
3058
3059
3060
3061
3062
      assert( aTerm[i].nTerm<=nToken );
      if( memcmp(aTerm[i].pTerm, zToken, aTerm[i].nTerm) ) continue;
      if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
      match |= 1<<i;
      if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
        for(j=aTerm[i].iPhrase-1; j>=0; j--){
          int k = (iRotor-j) & FTS3_ROTOR_MASK;
          snippetAppendMatch(pSnippet, iColumn, i-j,
                iRotorBegin[k], iRotorLen[k]);
        }
      }
    }
    prevMatch = match<<1;
    iRotor++;
  }
  pTModule->xClose(pTCursor);  
}




































































































/*
** Compute all offsets for the current row of the query.  
** If the offsets have already been computed, this routine is a no-op.
*/
static void snippetAllOffsets(fulltext_cursor *p){
  int nColumn;







|










>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
      assert( aTerm[i].nTerm<=nToken );
      if( memcmp(aTerm[i].pTerm, zToken, aTerm[i].nTerm) ) continue;
      if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
      match |= 1<<i;
      if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
        for(j=aTerm[i].iPhrase-1; j>=0; j--){
          int k = (iRotor-j) & FTS3_ROTOR_MASK;
          snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j,
                iRotorBegin[k], iRotorLen[k]);
        }
      }
    }
    prevMatch = match<<1;
    iRotor++;
  }
  pTModule->xClose(pTCursor);  
}

/*
** Remove entries from the pSnippet structure to account for the NEAR
** operator. When this is called, pSnippet contains the list of token 
** offsets produced by treating all NEAR operators as AND operators.
** This function removes any entries that should not be present after
** accounting for the NEAR restriction. For example, if the queried
** document is:
**
**     "A B C D E A"
**
** and the query is:
** 
**     A NEAR/0 E
**
** then when this function is called the Snippet contains token offsets
** 0, 4 and 5. This function removes the "0" entry (because the first A
** is not near enough to an E).
*/
static void trimSnippetOffsetsForNear(Query *pQuery, Snippet *pSnippet){
  int ii;
  int iDir = 1;

  while(iDir>-2) {
    assert( iDir==1 || iDir==-1 );
    for(ii=0; ii<pSnippet->nMatch; ii++){
      int jj;
      int nNear;
      struct snippetMatch *pMatch = &pSnippet->aMatch[ii];
      QueryTerm *pQueryTerm = &pQuery->pTerms[pMatch->iTerm];

      if( (pMatch->iTerm+iDir)<0 
       || (pMatch->iTerm+iDir)>=pQuery->nTerms
      ){
        continue;
      }
     
      nNear = pQueryTerm->nNear;
      if( iDir<0 ){
        nNear = pQueryTerm[-1].nNear;
      }
  
      if( pMatch->iTerm>=0 && nNear ){
        int isOk = 0;
        int iNextTerm = pMatch->iTerm+iDir;
        int iPrevTerm = iNextTerm;

        int iEndToken;
        int iStartToken;

        if( iDir<0 ){
          int nPhrase = 1;
          iStartToken = pMatch->iToken;
          while( (pMatch->iTerm+nPhrase)<pQuery->nTerms 
              && pQuery->pTerms[pMatch->iTerm+nPhrase].iPhrase>1 
          ){
            nPhrase++;
          }
          iEndToken = iStartToken + nPhrase - 1;
        }else{
          iEndToken   = pMatch->iToken;
          iStartToken = pMatch->iToken+1-pQueryTerm->iPhrase;
        }

        while( pQuery->pTerms[iNextTerm].iPhrase>1 ){
          iNextTerm--;
        }
        while( (iPrevTerm+1)<pQuery->nTerms && 
               pQuery->pTerms[iPrevTerm+1].iPhrase>1 
        ){
          iPrevTerm++;
        }
  
        for(jj=0; isOk==0 && jj<pSnippet->nMatch; jj++){
          struct snippetMatch *p = &pSnippet->aMatch[jj];
          if( p->iCol==pMatch->iCol && ((
               p->iTerm==iNextTerm && 
               p->iToken>iEndToken && 
               p->iToken<=iEndToken+nNear
          ) || (
               p->iTerm==iPrevTerm && 
               p->iToken<iStartToken && 
               p->iToken>=iStartToken-nNear
          ))){
            isOk = 1;
          }
        }
        if( !isOk ){
          for(jj=1-pQueryTerm->iPhrase; jj<=0; jj++){
            pMatch[jj].iTerm = -1;
          }
          ii = -1;
          iDir = 1;
        }
      }
    }
    iDir -= 2;
  }
}

/*
** Compute all offsets for the current row of the query.  
** If the offsets have already been computed, this routine is a no-op.
*/
static void snippetAllOffsets(fulltext_cursor *p){
  int nColumn;
3079
3080
3081
3082
3083
3084
3085


3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100






3101
3102
3103
3104
3105

3106
3107
3108
3109
3110
3111
3112
  for(i=iFirst; i<=iLast; i++){
    const char *zDoc;
    int nDoc;
    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
    nDoc = sqlite3_column_bytes(p->pStmt, i+1);
    snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
  }


}

/*
** Convert the information in the aMatch[] array of the snippet
** into the string zOffset[0..nOffset-1].
*/
static void snippetOffsetText(Snippet *p){
  int i;
  int cnt = 0;
  StringBuffer sb;
  char zBuf[200];
  if( p->zOffset ) return;
  initStringBuffer(&sb);
  for(i=0; i<p->nMatch; i++){
    struct snippetMatch *pMatch = &p->aMatch[i];






    zBuf[0] = ' ';
    sprintf(&zBuf[cnt>0], "%d %d %d %d", pMatch->iCol,
        pMatch->iTerm, pMatch->iStart, pMatch->nByte);
    append(&sb, zBuf);
    cnt++;

  }
  p->zOffset = stringBufferData(&sb);
  p->nOffset = stringBufferLength(&sb);
}

/*
** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set







>
>















>
>
>
>
>
>
|
|
|
|
|
>







3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
  for(i=iFirst; i<=iLast; i++){
    const char *zDoc;
    int nDoc;
    zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
    nDoc = sqlite3_column_bytes(p->pStmt, i+1);
    snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
  }

  trimSnippetOffsetsForNear(&p->q, &p->snippet);
}

/*
** Convert the information in the aMatch[] array of the snippet
** into the string zOffset[0..nOffset-1].
*/
static void snippetOffsetText(Snippet *p){
  int i;
  int cnt = 0;
  StringBuffer sb;
  char zBuf[200];
  if( p->zOffset ) return;
  initStringBuffer(&sb);
  for(i=0; i<p->nMatch; i++){
    struct snippetMatch *pMatch = &p->aMatch[i];
    if( pMatch->iTerm>=0 ){
      /* If snippetMatch.iTerm is less than 0, then the match was 
      ** discarded as part of processing the NEAR operator (see the 
      ** trimSnippetOffsetsForNear() function for details). Ignore 
      ** it in this case
      */
      zBuf[0] = ' ';
      sprintf(&zBuf[cnt>0], "%d %d %d %d", pMatch->iCol,
          pMatch->iTerm, pMatch->iStart, pMatch->nByte);
      append(&sb, zBuf);
      cnt++;
    }
  }
  p->zOffset = stringBufferData(&sb);
  p->nOffset = stringBufferLength(&sb);
}

/*
** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371











3372
3373
3374
3375
3376
3377
3378
3379
3380

3381

3382
3383
3384
3385
3386
3387
3388
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The resulting DL_DOCIDS doclist is stored in pResult, which is
** overwritten.
*/
static int docListOfTerm(
  fulltext_vtab *v,   /* The full text index */
  int iColumn,        /* column to restrict to.  No restriction if >=nColumn */
  QueryTerm *pQTerm,  /* Term we are looking for, or 1st term of a phrase */
  DataBuffer *pResult /* Write the result here */
){
  DataBuffer left, right, new;
  int i, rc;

  /* No phrase search if no position info. */
  assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  dataBufferInit(&left, 0);
  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
                  0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &left);
  if( rc ) return rc;
  for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){











    dataBufferInit(&right, 0);
    rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
                    pQTerm[i].isPrefix, DL_POSITIONS, &right);
    if( rc ){
      dataBufferDestroy(&left);
      return rc;
    }
    dataBufferInit(&new, 0);
    docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,

                       i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS, &new);

    dataBufferDestroy(&left);
    dataBufferDestroy(&right);
    left = new;
  }
  *pResult = left;
  return SQLITE_OK;
}







|
|
|
|












|


>
>
>
>
>
>
>
>
>
>
>









>
|
>







3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
** is the first term of a phrase query, go ahead and evaluate the phrase
** query and return the doclist for the entire phrase query.
**
** The resulting DL_DOCIDS doclist is stored in pResult, which is
** overwritten.
*/
static int docListOfTerm(
  fulltext_vtab *v,    /* The full text index */
  int iColumn,         /* column to restrict to.  No restriction if >=nColumn */
  QueryTerm *pQTerm,   /* Term we are looking for, or 1st term of a phrase */
  DataBuffer *pResult  /* Write the result here */
){
  DataBuffer left, right, new;
  int i, rc;

  /* No phrase search if no position info. */
  assert( pQTerm->nPhrase==0 || DL_DEFAULT!=DL_DOCIDS );

  /* This code should never be called with buffered updates. */
  assert( v->nPendingData<0 );

  dataBufferInit(&left, 0);
  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
                  (0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS), &left);
  if( rc ) return rc;
  for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
    /* If this token is connected to the next by a NEAR operator, and
    ** the next token is the start of a phrase, then set nPhraseRight
    ** to the number of tokens in the phrase. Otherwise leave it at 1.
    */
    int nPhraseRight = 1;
    while( (i+nPhraseRight)<=pQTerm->nPhrase 
        && pQTerm[i+nPhraseRight].nNear==0 
    ){
      nPhraseRight++;
    }

    dataBufferInit(&right, 0);
    rc = termSelect(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm,
                    pQTerm[i].isPrefix, DL_POSITIONS, &right);
    if( rc ){
      dataBufferDestroy(&left);
      return rc;
    }
    dataBufferInit(&new, 0);
    docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
                       pQTerm[i-1].nNear, pQTerm[i-1].iPhrase + nPhraseRight,
                       ((i<pQTerm->nPhrase) ? DL_POSITIONS : DL_DOCIDS),
                       &new);
    dataBufferDestroy(&left);
    dataBufferDestroy(&right);
    left = new;
  }
  *pResult = left;
  return SQLITE_OK;
}
3466
3467
3468
3469
3470
3471
3472
3473
3474


3475
3476
3477

























3478
3479
3480
3481
3482
3483
3484
    if( rc!=SQLITE_OK ) break;
    if( !inPhrase &&
        pSegment[iEnd]==':' &&
         (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
      pQuery->nextColumn = iCol;
      continue;
    }
    if( !inPhrase && pQuery->nTerms>0 && nToken==2
         && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){


      pQuery->nextIsOr = 1;
      continue;
    }

























    queryAdd(pQuery, pToken, nToken);
    if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
      pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
    }
    if( iEnd<nSegment && pSegment[iEnd]=='*' ){
      pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
    }







|
|
>
>



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
    if( rc!=SQLITE_OK ) break;
    if( !inPhrase &&
        pSegment[iEnd]==':' &&
         (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
      pQuery->nextColumn = iCol;
      continue;
    }
    if( !inPhrase && pQuery->nTerms>0 && nToken==2 
     && pSegment[iBegin+0]=='O'
     && pSegment[iBegin+1]=='R' 
    ){
      pQuery->nextIsOr = 1;
      continue;
    }
    if( !inPhrase && pQuery->nTerms>0 && !pQuery->nextIsOr && nToken==4 
      && pSegment[iBegin+0]=='N' 
      && pSegment[iBegin+1]=='E' 
      && pSegment[iBegin+2]=='A' 
      && pSegment[iBegin+3]=='R' 
    ){
      QueryTerm *pTerm = &pQuery->pTerms[pQuery->nTerms-1];
      if( (iBegin+6)<nSegment 
       && pSegment[iBegin+4] == '/'
       && pSegment[iBegin+5]>='0' && pSegment[iBegin+5]<='9'
      ){
        pTerm->nNear = (pSegment[iBegin+5] - '0');
        nToken += 2;
        if( pSegment[iBegin+6]>='0' && pSegment[iBegin+6]<=9 ){
          pTerm->nNear = pTerm->nNear * 10 + (pSegment[iBegin+6] - '0');
          iEnd++;
        }
        pModule->xNext(pCursor, &pToken, &nToken, &iBegin, &iEnd, &iPos);
      } else {
        pTerm->nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
      }
      pTerm->nNear++;
      continue;
    }

    queryAdd(pQuery, pToken, nToken);
    if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
      pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
    }
    if( iEnd<nSegment && pSegment[iEnd]=='*' ){
      pQuery->pTerms[pQuery->nTerms-1].isPrefix = 1;
    }
3504
3505
3506
3507
3508
3509
3510


3511
3512
3513
3514
3515
3516
3517
  fulltext_vtab *v,        /* The fulltext index */
  const char *zInput,      /* Input text of the query string */
  int nInput,              /* Size of the input text */
  int dfltColumn,          /* Default column of the index to match against */
  Query *pQuery            /* Write the parse results here. */
){
  int iInput, inPhrase = 0;



  if( zInput==0 ) nInput = 0;
  if( nInput<0 ) nInput = strlen(zInput);
  pQuery->nTerms = 0;
  pQuery->pTerms = NULL;
  pQuery->nextIsOr = 0;
  pQuery->nextColumn = dfltColumn;







>
>







3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
  fulltext_vtab *v,        /* The fulltext index */
  const char *zInput,      /* Input text of the query string */
  int nInput,              /* Size of the input text */
  int dfltColumn,          /* Default column of the index to match against */
  Query *pQuery            /* Write the parse results here. */
){
  int iInput, inPhrase = 0;
  int ii;
  QueryTerm *aTerm;

  if( zInput==0 ) nInput = 0;
  if( nInput<0 ) nInput = strlen(zInput);
  pQuery->nTerms = 0;
  pQuery->pTerms = NULL;
  pQuery->nextIsOr = 0;
  pQuery->nextColumn = dfltColumn;
3533
3534
3535
3536
3537
3538
3539















3540
3541
3542
3543
3544
3545
3546
  }

  if( inPhrase ){
    /* unmatched quote */
    queryClear(pQuery);
    return SQLITE_ERROR;
  }















  return SQLITE_OK;
}

/* TODO(shess) Refactor the code to remove this forward decl. */
static int flushPendingTerms(fulltext_vtab *v);

/* Perform a full-text query using the search expression in







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
  }

  if( inPhrase ){
    /* unmatched quote */
    queryClear(pQuery);
    return SQLITE_ERROR;
  }

  /* Modify the values of the QueryTerm.nPhrase variables to account for
  ** the NEAR operator. For the purposes of QueryTerm.nPhrase, phrases
  ** and tokens connected by the NEAR operator are handled as a single
  ** phrase. See comments above the QueryTerm structure for details.
  */
  aTerm = pQuery->pTerms;
  for(ii=0; ii<pQuery->nTerms; ii++){
    if( aTerm[ii].nNear || aTerm[ii].nPhrase ){
      while (aTerm[ii+aTerm[ii].nPhrase].nNear) {
        aTerm[ii].nPhrase += (1 + aTerm[ii+aTerm[ii].nPhrase+1].nPhrase);
      }
    }
  }

  return SQLITE_OK;
}

/* TODO(shess) Refactor the code to remove this forward decl. */
static int flushPendingTerms(fulltext_vtab *v);

/* Perform a full-text query using the search expression in
Added test/fts3near.test.
















































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168

# 2007 October 15
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
# $Id: fts3near.test,v 1.1 2007/10/22 18:02:20 danielk1977 Exp $
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl

# If SQLITE_ENABLE_FTS3 is defined, omit this file.
ifcapable !fts3 {
  finish_test
  return
}

db eval {
  CREATE VIRTUAL TABLE t1 USING fts3(content);
  INSERT INTO t1(content) VALUES('one three four five');
  INSERT INTO t1(content) VALUES('two three four five');
  INSERT INTO t1(content) VALUES('one two three four five');
}

do_test fts3near-1.1 {
  execsql {SELECT docid FROM t1 WHERE content MATCH 'one NEAR/0 three'}
} {1}
do_test fts3near-1.2 {
  execsql {SELECT docid FROM t1 WHERE content MATCH 'one NEAR/1 two'}
} {3}
do_test fts3near-1.3 {
  execsql {SELECT docid FROM t1 WHERE content MATCH 'one NEAR/1 three'}
} {1 3}
do_test fts3near-1.4 {
  execsql {SELECT docid FROM t1 WHERE content MATCH 'three NEAR/1 one'}
} {1 3}
do_test fts3near-1.5 {
  execsql {SELECT docid FROM t1 WHERE content MATCH '"one two" NEAR/1 five'}
} {}
do_test fts3near-1.6 {
  execsql {SELECT docid FROM t1 WHERE content MATCH '"one two" NEAR/2 five'}
} {3}
do_test fts3near-1.7 {
  execsql {SELECT docid FROM t1 WHERE content MATCH 'one NEAR four'}
} {1 3}
do_test fts3near-1.8 {
  execsql {SELECT docid FROM t1 WHERE content MATCH 'four NEAR three'}
} {1 2 3}
do_test fts3near-1.9 {
  execsql {SELECT docid FROM t1 WHERE content MATCH '"four five" NEAR/0 three'}
} {1 2 3}
do_test fts3near-1.10 {
  execsql {SELECT docid FROM t1 WHERE content MATCH '"four five" NEAR/2 one'}
} {1 3}
do_test fts3near-1.11 {
  execsql {SELECT docid FROM t1 WHERE content MATCH '"four five" NEAR/1 one'}
} {1}
do_test fts3near-1.12 {
  execsql {SELECT docid FROM t1 WHERE content MATCH 'five NEAR/1 "two three"'}
} {2 3} 
do_test fts3near-1.13 {
  execsql {SELECT docid FROM t1 WHERE content MATCH 'one NEAR five'}
} {1 3} 


# Output format of the offsets() function:
#
#     <column number> <term number> <starting offset> <number of bytes>
#
db eval {
  INSERT INTO t1(content) VALUES('A X B C D A B');
}
do_test fts3near-2.1 {
  execsql {
    SELECT offsets(t1) FROM t1 WHERE content MATCH 'A NEAR/0 B'
  }
} {{0 0 10 1 0 1 12 1}}
do_test fts3near-2.2 {
  execsql {
    SELECT offsets(t1) FROM t1 WHERE content MATCH 'B NEAR/0 A'
  }
} {{0 1 10 1 0 0 12 1}}
do_test fts3near-2.3 {
  execsql {
    SELECT offsets(t1) FROM t1 WHERE content MATCH '"C D" NEAR/0 A'
  }
} {{0 0 6 1 0 1 8 1 0 2 10 1}}
do_test fts3near-2.4 {
  execsql {
    SELECT offsets(t1) FROM t1 WHERE content MATCH 'A NEAR/0 "C D"'
  }
} {{0 1 6 1 0 2 8 1 0 0 10 1}}
do_test fts3near-2.5 {
  execsql {
    SELECT offsets(t1) FROM t1 WHERE content MATCH 'A NEAR A'
  }
} {{0 0 0 1 0 1 0 1 0 0 10 1 0 1 10 1}}
do_test fts3near-2.6 {
  execsql {
    INSERT INTO t1 VALUES('A A A');
    SELECT offsets(t1) FROM t1 WHERE content MATCH 'A NEAR/2 A';
  }
} [list [list 0 0 0 1   0 1 0 1   0 0 2 1   0 1 2 1   0 0 4 1   0 1 4 1]]
do_test fts3near-2.7 {
  execsql {
    DELETE FROM t1;
    INSERT INTO t1 VALUES('A A A A');
    SELECT offsets(t1) FROM t1 WHERE content MATCH 'A NEAR A NEAR A';
  }
} [list [list \
    0 0 0 1   0 1 0 1   0 2 0 1   0 0 2 1   \
    0 1 2 1   0 2 2 1   0 0 4 1   0 1 4 1   \
    0 2 4 1   0 0 6 1   0 1 6 1   0 2 6 1   \
]]

db eval {
  DELETE FROM t1;
  INSERT INTO t1(content) VALUES(
    'one two three two four six three six nine four eight twelve'
  );
}

do_test fts3near-3.1 {
  execsql {SELECT offsets(t1) FROM t1 WHERE content MATCH 'three NEAR/1 one'}
} {{0 1 0 3 0 0 8 5}}
do_test fts3near-3.2 {
  execsql {SELECT offsets(t1) FROM t1 WHERE content MATCH 'one NEAR/1 three'}
} {{0 0 0 3 0 1 8 5}}
do_test fts3near-3.3 {
  execsql {SELECT offsets(t1) FROM t1 WHERE content MATCH 'three NEAR/1 two'}
} {{0 1 4 3 0 0 8 5 0 1 14 3}}
do_test fts3near-3.4 {
  execsql {SELECT offsets(t1) FROM t1 WHERE content MATCH 'three NEAR/2 two'}
} {{0 1 4 3 0 0 8 5 0 1 14 3 0 0 27 5}}
do_test fts3near-3.5 {
  execsql {SELECT offsets(t1) FROM t1 WHERE content MATCH 'two NEAR/2 three'}
} {{0 0 4 3 0 1 8 5 0 0 14 3 0 1 27 5}}
do_test fts3near-3.6 {
  execsql {
    SELECT offsets(t1) FROM t1 WHERE content MATCH 'three NEAR/0 "two four"'
  }
} {{0 0 8 5 0 1 14 3 0 2 18 4}}
do_test fts3near-3.7 {
  execsql {
    SELECT offsets(t1) FROM t1 WHERE content MATCH '"two four" NEAR/0 three'}
} {{0 2 8 5 0 0 14 3 0 1 18 4}}

db eval {
  INSERT INTO t1(content) VALUES('
    This specification defines Cascading Style Sheets, level 2 (CSS2). CSS2 is a style sheet language that allows authors and users to attach style (e.g., fonts, spacing, and aural cues) to structured documents (e.g., HTML documents and XML applications). By separating the presentation style of documents from the content of documents, CSS2 simplifies Web authoring and site maintenance.

    CSS2 builds on CSS1 (see [CSS1]) and, with very few exceptions, all valid CSS1 style sheets are valid CSS2 style sheets. CSS2 supports media-specific style sheets so that authors may tailor the presentation of their documents to visual browsers, aural devices, printers, braille devices, handheld devices, etc. This specification also supports content positioning, downloadable fonts, table layout, features for internationalization, automatic counters and numbering, and some properties related to user interface.
  ') 
}
do_test fts3near-4.1 {
  execsql {
    SELECT snippet(t1) FROM t1 WHERE content MATCH 'specification NEAR supports'
  }
} {{<b>...</b> devices, handheld devices, etc. This <b>specification</b> also <b>supports</b> content positioning, downloadable fonts, <b>...</b>}}

finish_test