SQLite4
Check-in [e21b7b67b5]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Many fts5 related fixes.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e21b7b67b5fed3bf2b86d872b50ae7f9852993a2
User & Date: dan 2013-01-12 15:50:10
Context
2013-01-13
05:30
Avoid using keyword "near" as a variable name in fts5.c. check-in: 278cfaeb70 user: dan tags: trunk
2013-01-12
15:50
Many fts5 related fixes. check-in: e21b7b67b5 user: dan tags: trunk
15:13
Get the amalgamation build working again. check-in: 0078080de5 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/build.c.

2285
2286
2287
2288
2289
2290
2291














2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303


2304
2305
2306
2307
2308
2309
2310
....
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
  }
  sqlite4OpenIndex(pParse, iIdx, iDb, pIdx, OP_OpenWrite);
  if( bCreate ) sqlite4VdbeChangeP5(v, 1);

  /* Loop through the contents of the PK index. At each row, insert the
  ** corresponding entry into the auxiliary index.  */
  addr1 = sqlite4VdbeAddOp2(v, OP_Rewind, iTab, 0);














  sqlite4GetTempRange(pParse,2);
  regKey = sqlite4GetTempReg(pParse);
  sqlite4EncodeIndexKey(pParse, pPk, iTab, pIdx, iIdx, 0, regKey);
  if( pIdx->onError!=OE_None ){
    const char *zErr = "indexed columns are not unique";
    int addrTest;
     
    addrTest = sqlite4VdbeAddOp4Int(v, OP_IsUnique, iIdx, 0, regKey, 0);
    sqlite4HaltConstraint(pParse, OE_Abort, (char *)zErr, P4_STATIC);
    sqlite4VdbeJumpHere(v, addrTest);
  }
  sqlite4VdbeAddOp3(v, OP_IdxInsert, iIdx, 0, regKey);  


  sqlite4VdbeAddOp2(v, OP_Next, iTab, addr1+1);
  sqlite4VdbeJumpHere(v, addr1);
  sqlite4ReleaseTempReg(pParse, regKey);

  sqlite4VdbeAddOp1(v, OP_Close, iTab);
  sqlite4VdbeAddOp1(v, OP_Close, iIdx);
}
................................................................................
        addIndexToHash(db, pIdx);
        pIdx = 0;
      }else{
        createIndexWriteSchema(pParse, pIdx, pIdxName, pEnd);
      }
    }

    sqlite4DbFree(db, pIdx);
    sqlite4DbFree(db, zIdx);
  }

  sqlite4ExprListDelete(db, pList);
  sqlite4SrcListDelete(db, p->pTblName);
}








>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
>
>







 







|







2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
....
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
  }
  sqlite4OpenIndex(pParse, iIdx, iDb, pIdx, OP_OpenWrite);
  if( bCreate ) sqlite4VdbeChangeP5(v, 1);

  /* Loop through the contents of the PK index. At each row, insert the
  ** corresponding entry into the auxiliary index.  */
  addr1 = sqlite4VdbeAddOp2(v, OP_Rewind, iTab, 0);

  if( pIdx->eIndexType==SQLITE4_INDEX_FTS5 ){
    int regData;
    int i;

    regKey = sqlite4GetTempRange(pParse, pTab->nCol+1);
    regData = regKey+1;

    sqlite4VdbeAddOp2(v, OP_RowKey, iTab, regKey);
    for(i=0; i<pTab->nCol; i++){
      sqlite4VdbeAddOp3(v, OP_Column, iTab, i, regData+i);
    }
    sqlite4Fts5CodeUpdate(pParse, pIdx, pParse->iNewidxReg, regKey, regData, 0);
  }else{
    sqlite4GetTempRange(pParse,2);
    regKey = sqlite4GetTempReg(pParse);
    sqlite4EncodeIndexKey(pParse, pPk, iTab, pIdx, iIdx, 0, regKey);
    if( pIdx->onError!=OE_None ){
      const char *zErr = "indexed columns are not unique";
      int addrTest;

      addrTest = sqlite4VdbeAddOp4Int(v, OP_IsUnique, iIdx, 0, regKey, 0);
      sqlite4HaltConstraint(pParse, OE_Abort, (char *)zErr, P4_STATIC);
      sqlite4VdbeJumpHere(v, addrTest);
    }
    sqlite4VdbeAddOp3(v, OP_IdxInsert, iIdx, 0, regKey);  
  }

  sqlite4VdbeAddOp2(v, OP_Next, iTab, addr1+1);
  sqlite4VdbeJumpHere(v, addr1);
  sqlite4ReleaseTempReg(pParse, regKey);

  sqlite4VdbeAddOp1(v, OP_Close, iTab);
  sqlite4VdbeAddOp1(v, OP_Close, iIdx);
}
................................................................................
        addIndexToHash(db, pIdx);
        pIdx = 0;
      }else{
        createIndexWriteSchema(pParse, pIdx, pIdxName, pEnd);
      }
    }

    if( pIdx ) freeIndex(db, pIdx);
    sqlite4DbFree(db, zIdx);
  }

  sqlite4ExprListDelete(db, pList);
  sqlite4SrcListDelete(db, p->pTblName);
}

Changes to src/delete.c.

599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
      int iCol;
      int iReg = pParse->nMem+1;
      pParse->nMem += (1 + pTab->nCol);
      for(iCol=0; iCol<pTab->nCol; iCol++){
        sqlite4VdbeAddOp3(v, OP_Column, iPkCsr, iCol, iReg+iCol);
      }
      sqlite4VdbeAddOp2(v, OP_RowKey, iPkCsr, iReg+pTab->nCol);
      sqlite4Fts5CodeUpdate(pParse, pIdx, iReg+pTab->nCol, iReg, 1);
    }else if( pIdx!=pPk && (aRegIdx==0 || aRegIdx[i]>0) ){
      int addrNotFound;
      sqlite4EncodeIndexKey(pParse, pPk, baseCur+iPk,pIdx,baseCur+i,0,regKey);
      addrNotFound = sqlite4VdbeAddOp4(v,
          OP_NotFound, baseCur+i, 0, regKey, 0, P4_INT32
      );
      sqlite4VdbeAddOp1(v, OP_Delete, baseCur+i);







|







599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
      int iCol;
      int iReg = pParse->nMem+1;
      pParse->nMem += (1 + pTab->nCol);
      for(iCol=0; iCol<pTab->nCol; iCol++){
        sqlite4VdbeAddOp3(v, OP_Column, iPkCsr, iCol, iReg+iCol);
      }
      sqlite4VdbeAddOp2(v, OP_RowKey, iPkCsr, iReg+pTab->nCol);
      sqlite4Fts5CodeUpdate(pParse, pIdx, 0, iReg+pTab->nCol, iReg, 1);
    }else if( pIdx!=pPk && (aRegIdx==0 || aRegIdx[i]>0) ){
      int addrNotFound;
      sqlite4EncodeIndexKey(pParse, pPk, baseCur+iPk,pIdx,baseCur+i,0,regKey);
      addrNotFound = sqlite4VdbeAddOp4(v,
          OP_NotFound, baseCur+i, 0, regKey, 0, P4_INT32
      );
      sqlite4VdbeAddOp1(v, OP_Delete, baseCur+i);

Changes to src/fts5.c.

256
257
258
259
260
261
262





263
264
265
266
267
268
269
...
592
593
594
595
596
597
598

599
600
601
602
603
604
605
....
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
....
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306


1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
....
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
....
1479
1480
1481
1482
1483
1484
1485

1486
1487
1488
1489
1490
1491
1492
....
1498
1499
1500
1501
1502
1503
1504

1505
1506
1507
1508
1509
1510
1511
....
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
....
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
....
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
....
1680
1681
1682
1683
1684
1685
1686

1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
....
2931
2932
2933
2934
2935
2936
2937
2938







2939
2940
2941
2942
2943
2944

2945



2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963


2964
2965
2966

2967
2968
2969
2970
2971
2972




2973


2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
....
2995
2996
2997
2998
2999
3000
3001

3002
3003
3004



3005
3006



3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
....
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
  KVCursor *pCsr;                 /* Cursor used to retrive values */
  Mem *aMem;                      /* Array of column values */
  int bMemValid;                  /* True if contents of aMem[] are valid */

  Fts5Size *pSz;                  /* Local size data */
  Fts5Size *pGlobal;              /* Global size data */
  i64 nGlobal;                    /* Total number of rows in table */





  int *anRow;

  Fts5MatchIter *pIter;           /* Used by mi_match_detail() */
};

/*
** A deserialized 'size record' (see above).
................................................................................

  pToken = &p->pStr->aToken[p->pStr->nToken];

  zSpace = &pParse->aSpace[pParse->iSpace];
  nUsed = putVarint32((u8 *)zSpace, pParse->iRoot);
  zSpace[nUsed++] = 0x24;
  pToken->bPrefix = 0;

  pToken->z = &zSpace[nUsed];
  pToken->n = n;
  memcpy(pToken->z, z, n);
  pToken->z[n] = '\0';

  nUsed += (n+1);
  pToken->aPrefix = (u8 *)zSpace;
................................................................................
  int nToken, 
  int iSrc, 
  int nSrc
){
  TokenizeCtx *p = (TokenizeCtx *)pCtx;
  sqlite4 *db = p->db;
  TokenizeTerm *pTerm = 0;
  TokenizeTerm *pOrig = 0;

  /* TODO: Error here if iStream is out of range */

  if( nToken>p->nMax ) p->nMax = nToken;

  if( iStream>=p->nStream ){
    int nOld = p->nStream;
................................................................................
  pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken);
  if( pTerm==0 ){
    /* Size the initial allocation so that it fits in the lookaside buffer */
    int nAlloc = sizeof(TokenizeTerm) + nToken + 32;

    pTerm = sqlite4DbMallocZero(p->db, nAlloc);
    if( pTerm ){
      void *pFree;
      pTerm->nAlloc = sqlite4DbMallocSize(p->db, pTerm);
      pTerm->nToken = nToken;
      memcpy(&pTerm[1], zToken, nToken);
      pFree = sqlite4HashInsert(&p->hash, (char *)&pTerm[1], nToken, pTerm);
      if( pFree ){
        sqlite4DbFree(p->db, pFree);
        pTerm = 0;
      }
      if( pTerm==0 ) goto tokenize_cb_out;


    }
  }
  pOrig = pTerm;

  if( iStream!=pTerm->iStream ){
    pTerm = fts5TokenizeAppendInt(p, pTerm, (iStream << 2) | 0x00000003);
    if( !pTerm ) goto tokenize_cb_out;
    pTerm->iStream = iStream;
  }

................................................................................
  }

  pTerm = fts5TokenizeAppendInt(p, pTerm, (iOff-pTerm->iOff) << 1);
  if( !pTerm ) goto tokenize_cb_out;
  pTerm->iOff = iOff;

tokenize_cb_out:
  if( pTerm!=pOrig ){
    sqlite4HashInsert(&p->hash, (char *)&pTerm[1], nToken, pTerm);
  }
  if( !pTerm ){
    p->rc = SQLITE4_NOMEM;
    return 1;
  }

  return 0;
}
................................................................................

/*
** Update an fts index.
*/
int sqlite4Fts5Update(
  sqlite4 *db,                    /* Database handle */
  Fts5Info *pInfo,                /* Description of fts index to update */

  Mem *pKey,                      /* Primary key blob */
  Mem *aArg,                      /* Array of arguments (see above) */
  int bDel,                       /* True for a delete, false for insert */
  char **pzErr                    /* OUT: Error message */
){
  int i;
  int rc = SQLITE4_OK;
................................................................................
  u8 *aSpace = 0;
  int nSpace = 0;

  const u8 *pPK;
  int nPK;
  HashElem *pElem;


  pStore = db->aDb[pInfo->iDb].pKV;

  memset(&sCtx, 0, sizeof(sCtx));
  sCtx.db = db;
  sCtx.nCol = pInfo->nCol;
  sqlite4HashInit(db->pEnv, &sCtx.hash, 1);

................................................................................
  **   * space for the size record and key for this document, and
  **   * space for the updated global size record for the document set.
  **
  ** To make it easier, the below allocates enough space to simultaneously
  ** store the largest index record key and the largest possible global
  ** size record.
  */
  nSpace = (sqlite4VarintLen(pInfo->iRoot) + 2 + sCtx.nMax + nPK) + 
           (9 * (2 + pInfo->nCol * sCtx.nStream));
  aSpace = sqlite4DbMallocRaw(db, nSpace);
  if( aSpace==0 ) rc = SQLITE4_NOMEM;

  for(pElem=sqliteHashFirst(&sCtx.hash); pElem; pElem=sqliteHashNext(pElem)){
    TokenizeTerm *pTerm = (TokenizeTerm *)sqliteHashData(pElem);
    if( rc==SQLITE4_OK ){
      int nToken = sqliteHashKeysize(pElem);
      char *zToken = (char *)sqliteHashKey(pElem);
      u8 *aKey = aSpace;
      int nKey;

      nKey = putVarint32(aKey, pInfo->iRoot);
      aKey[nKey++] = 0x24;
      memcpy(&aKey[nKey], zToken, nToken);
      nKey += nToken;
      aKey[nKey++] = 0x00;
      memcpy(&aKey[nKey], pPK, nPK);
      nKey += nPK;

................................................................................
  }

  /* Write the size record into the db */
  if( rc==SQLITE4_OK ){
    u8 *aKey = aSpace;
    int nKey;

    nKey = putVarint32(aKey, pInfo->iRoot);
    aKey[nKey++] = 0x00;
    memcpy(&aKey[nKey], pPK, nPK);
    nKey += nPK;

    if( bDel==0 ){
      Fts5Size sSz;
      sSz.nCol = pInfo->nCol;
................................................................................
  /* Update the global record */
  if( rc==SQLITE4_OK ){
    Fts5Size *pSz;                /* Deserialized global size record */
    i64 nRow;                     /* Number of rows in indexed table */
    u8 *aKey = aSpace;            /* Space to format the global record key */
    int nKey;                     /* Size of global record key in bytes */

    nKey = putVarint32(aKey, pInfo->iRoot);
    aKey[nKey++] = 0x00;
    rc = fts5LoadSizeRecord(db, aKey, nKey, sCtx.nStream, pInfo, &nRow, &pSz);
    assert( rc!=SQLITE4_OK || pSz->nStream>=sCtx.nStream );

    if( rc==SQLITE4_OK ){
      int iCol;
      for(iCol=0; iCol<pSz->nCol; iCol++){
................................................................................

  return pInfo;
}

void sqlite4Fts5CodeUpdate(
  Parse *pParse, 
  Index *pIdx, 

  int iRegPk, 
  int iRegData,
  int bDel
){
  Vdbe *v;
  Fts5Info *pInfo;                /* p4 argument for FtsUpdate opcode */

  if( 0==(pInfo = fts5InfoCreate(pParse, pIdx, 0)) ) return;

  v = sqlite4GetVdbe(pParse);
  sqlite4VdbeAddOp3(v, OP_FtsUpdate, iRegPk, 0, iRegData);
  sqlite4VdbeChangeP4(v, -1, (const char *)pInfo, P4_FTS5INFO);
  sqlite4VdbeChangeP5(v, (u8)bDel);
}

void sqlite4Fts5CodeQuery(
  Parse *pParse,
  Index *pIdx,
................................................................................
  int *pnMatch,
  int *pnDoc,
  int *pnRelevant
){
  return SQLITE4_OK;
}

static void fts5StrLoadRowcounts(Fts5Str *pStr, int nStream, int *anRow){







  u32 mask = 0;
  int iPrevCol = 0;
  InstanceList sList;

  fts5InstanceListInit(pStr->aList, pStr->nList, &sList);
  while( 0==fts5InstanceListNext(&sList) ){

    if( sList.iCol!=iPrevCol ) mask = 0;



    if( (mask & (1<<sList.iStream))==0 ){
      anRow[sList.iCol * nStream + sList.iStream]++;
      mask |= (1<<sList.iStream);
      iPrevCol = sList.iCol;
    }
  }
}

static int fts5ExprLoadRowcounts(
  sqlite4 *db, 
  Fts5Info *pInfo,
  int nStream,
  Fts5ExprNode *pNode, 
  int **panRow
){
  int rc = SQLITE4_OK;

  if( pNode ){


    if( pNode->eType==TOKEN_PRIMITIVE ){
      int *anRow = *panRow;
      Fts5Phrase *pPhrase = pNode->pPhrase;


      rc = fts5ExprAdvance(db, pNode, 1);
      while( rc==SQLITE4_OK ){
        int nIncr =  pInfo->nCol * nStream;      /* Values for each Fts5Str */
        int i;
        for(i=0; i<pPhrase->nStr; i++){




          fts5StrLoadRowcounts(&pPhrase->aStr[i], nStream, &anRow[i*nIncr]);


        }
        rc = fts5ExprAdvance(db, pNode, 0);
      }

      *panRow = &anRow[pInfo->nCol * nStream * pPhrase->nStr];
    }

    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pNode->pLeft, panRow);
    }
    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pNode->pRight, panRow);
    }
  }

  return rc;
}

static int fts5CsrLoadRowcounts(Fts5Cursor *pCsr){
................................................................................
  if( pCsr->anRow==0 ){
    int nStream = pCsr->pGlobal->nStream;
    sqlite4 *db = pCsr->db;
    Fts5Expr *pCopy;
    Fts5Expr *pExpr = pCsr->pExpr;
    Fts5Info *pInfo = pCsr->pInfo;
    int *anRow;


    pCsr->anRow = anRow = (int *)sqlite4DbMallocZero(db, 
        pExpr->nPhrase * pInfo->nCol * pCsr->pGlobal->nStream * sizeof(int)



    );
    if( !anRow ) return SQLITE4_NOMEM;




    rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, 
        pInfo->iRoot, pInfo->azCol, pInfo->nCol, pCsr->zExpr, &pCopy, 0
    );
    if( rc==SQLITE4_OK ){
      rc = fts5OpenExprCursors(db, pInfo, pExpr->pRoot);
    }
    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pInfo, nStream, pCopy->pRoot, &anRow);
    }

    fts5ExpressionFree(db, pCopy);
  }

  return rc;
}
................................................................................
      int i;
      int nRow = 0;
      int nStream = pCsr->pGlobal->nStream;
      int nCol = pCsr->pInfo->nCol;
      int *aRow = &pCsr->anRow[iP * nStream * nCol];

      if( iC<0 && iS<0 ){
        int nFin = nCol * nStream;
        for(i=0; i<nFin; i++) nRow += aRow[i];
      }else if( iC<0 ){
        for(i=0; i<nCol; i++) nRow += aRow[i*nStream + iS];
      }else if( iS<0 ){
        for(i=0; i<nStream; i++) nRow += aRow[nStream*iC + iS];
      }else if( iC<nCol && iS<nStream ){
        nRow = aRow[iC * nStream + iS];
      }

      *pn = nRow;
    }
  }







>
>
>
>
>







 







>







 







<







 







<



<
<
<
<
|
|
>
>
|
<
<







 







<
|
<







 







>







 







>







 







|












|







 







|







 







|







 







>










|







 







|
>
>
>
>
>
>
>

|




>
|
>
>
>










|


|




>
>

<

>


|



>
>
>
>
|
>
>




|



|


|







 







>

|
|
>
>
>
|

>
>
>





|


|







 







|
<



|







256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
...
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
....
1272
1273
1274
1275
1276
1277
1278

1279
1280
1281
1282
1283
1284
1285
....
1295
1296
1297
1298
1299
1300
1301

1302
1303
1304




1305
1306
1307
1308
1309


1310
1311
1312
1313
1314
1315
1316
....
1322
1323
1324
1325
1326
1327
1328

1329

1330
1331
1332
1333
1334
1335
1336
....
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
....
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
....
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
....
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
....
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
....
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
....
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978

2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
....
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
....
3067
3068
3069
3070
3071
3072
3073
3074

3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
  KVCursor *pCsr;                 /* Cursor used to retrive values */
  Mem *aMem;                      /* Array of column values */
  int bMemValid;                  /* True if contents of aMem[] are valid */

  Fts5Size *pSz;                  /* Local size data */
  Fts5Size *pGlobal;              /* Global size data */
  i64 nGlobal;                    /* Total number of rows in table */

  /* Arrays used by sqlite4_mi_row_count(). */
  int *anRowCS;
  int *anRowC;
  int *anRowS;
  int *anRow;

  Fts5MatchIter *pIter;           /* Used by mi_match_detail() */
};

/*
** A deserialized 'size record' (see above).
................................................................................

  pToken = &p->pStr->aToken[p->pStr->nToken];

  zSpace = &pParse->aSpace[pParse->iSpace];
  nUsed = putVarint32((u8 *)zSpace, pParse->iRoot);
  zSpace[nUsed++] = 0x24;
  pToken->bPrefix = 0;
  pToken->pPrefix = 0;
  pToken->z = &zSpace[nUsed];
  pToken->n = n;
  memcpy(pToken->z, z, n);
  pToken->z[n] = '\0';

  nUsed += (n+1);
  pToken->aPrefix = (u8 *)zSpace;
................................................................................
  int nToken, 
  int iSrc, 
  int nSrc
){
  TokenizeCtx *p = (TokenizeCtx *)pCtx;
  sqlite4 *db = p->db;
  TokenizeTerm *pTerm = 0;


  /* TODO: Error here if iStream is out of range */

  if( nToken>p->nMax ) p->nMax = nToken;

  if( iStream>=p->nStream ){
    int nOld = p->nStream;
................................................................................
  pTerm = (TokenizeTerm *)sqlite4HashFind(&p->hash, zToken, nToken);
  if( pTerm==0 ){
    /* Size the initial allocation so that it fits in the lookaside buffer */
    int nAlloc = sizeof(TokenizeTerm) + nToken + 32;

    pTerm = sqlite4DbMallocZero(p->db, nAlloc);
    if( pTerm ){

      pTerm->nAlloc = sqlite4DbMallocSize(p->db, pTerm);
      pTerm->nToken = nToken;
      memcpy(&pTerm[1], zToken, nToken);




    }
    if( pTerm==0 ) goto tokenize_cb_out;
  }else{
    sqlite4HashInsert(&p->hash, zToken, nToken, 0);
  }



  if( iStream!=pTerm->iStream ){
    pTerm = fts5TokenizeAppendInt(p, pTerm, (iStream << 2) | 0x00000003);
    if( !pTerm ) goto tokenize_cb_out;
    pTerm->iStream = iStream;
  }

................................................................................
  }

  pTerm = fts5TokenizeAppendInt(p, pTerm, (iOff-pTerm->iOff) << 1);
  if( !pTerm ) goto tokenize_cb_out;
  pTerm->iOff = iOff;

tokenize_cb_out:

  sqlite4HashInsert(&p->hash, (char *)&pTerm[1], nToken, pTerm);

  if( !pTerm ){
    p->rc = SQLITE4_NOMEM;
    return 1;
  }

  return 0;
}
................................................................................

/*
** Update an fts index.
*/
int sqlite4Fts5Update(
  sqlite4 *db,                    /* Database handle */
  Fts5Info *pInfo,                /* Description of fts index to update */
  int iRoot,
  Mem *pKey,                      /* Primary key blob */
  Mem *aArg,                      /* Array of arguments (see above) */
  int bDel,                       /* True for a delete, false for insert */
  char **pzErr                    /* OUT: Error message */
){
  int i;
  int rc = SQLITE4_OK;
................................................................................
  u8 *aSpace = 0;
  int nSpace = 0;

  const u8 *pPK;
  int nPK;
  HashElem *pElem;

  if( iRoot==0 ) iRoot = pInfo->iRoot;
  pStore = db->aDb[pInfo->iDb].pKV;

  memset(&sCtx, 0, sizeof(sCtx));
  sCtx.db = db;
  sCtx.nCol = pInfo->nCol;
  sqlite4HashInit(db->pEnv, &sCtx.hash, 1);

................................................................................
  **   * space for the size record and key for this document, and
  **   * space for the updated global size record for the document set.
  **
  ** To make it easier, the below allocates enough space to simultaneously
  ** store the largest index record key and the largest possible global
  ** size record.
  */
  nSpace = (sqlite4VarintLen(iRoot) + 2 + sCtx.nMax + nPK) + 
           (9 * (2 + pInfo->nCol * sCtx.nStream));
  aSpace = sqlite4DbMallocRaw(db, nSpace);
  if( aSpace==0 ) rc = SQLITE4_NOMEM;

  for(pElem=sqliteHashFirst(&sCtx.hash); pElem; pElem=sqliteHashNext(pElem)){
    TokenizeTerm *pTerm = (TokenizeTerm *)sqliteHashData(pElem);
    if( rc==SQLITE4_OK ){
      int nToken = sqliteHashKeysize(pElem);
      char *zToken = (char *)sqliteHashKey(pElem);
      u8 *aKey = aSpace;
      int nKey;

      nKey = putVarint32(aKey, iRoot);
      aKey[nKey++] = 0x24;
      memcpy(&aKey[nKey], zToken, nToken);
      nKey += nToken;
      aKey[nKey++] = 0x00;
      memcpy(&aKey[nKey], pPK, nPK);
      nKey += nPK;

................................................................................
  }

  /* Write the size record into the db */
  if( rc==SQLITE4_OK ){
    u8 *aKey = aSpace;
    int nKey;

    nKey = putVarint32(aKey, iRoot);
    aKey[nKey++] = 0x00;
    memcpy(&aKey[nKey], pPK, nPK);
    nKey += nPK;

    if( bDel==0 ){
      Fts5Size sSz;
      sSz.nCol = pInfo->nCol;
................................................................................
  /* Update the global record */
  if( rc==SQLITE4_OK ){
    Fts5Size *pSz;                /* Deserialized global size record */
    i64 nRow;                     /* Number of rows in indexed table */
    u8 *aKey = aSpace;            /* Space to format the global record key */
    int nKey;                     /* Size of global record key in bytes */

    nKey = putVarint32(aKey, iRoot);
    aKey[nKey++] = 0x00;
    rc = fts5LoadSizeRecord(db, aKey, nKey, sCtx.nStream, pInfo, &nRow, &pSz);
    assert( rc!=SQLITE4_OK || pSz->nStream>=sCtx.nStream );

    if( rc==SQLITE4_OK ){
      int iCol;
      for(iCol=0; iCol<pSz->nCol; iCol++){
................................................................................

  return pInfo;
}

void sqlite4Fts5CodeUpdate(
  Parse *pParse, 
  Index *pIdx, 
  int iRegRoot,
  int iRegPk, 
  int iRegData,
  int bDel
){
  Vdbe *v;
  Fts5Info *pInfo;                /* p4 argument for FtsUpdate opcode */

  if( 0==(pInfo = fts5InfoCreate(pParse, pIdx, 0)) ) return;

  v = sqlite4GetVdbe(pParse);
  sqlite4VdbeAddOp3(v, OP_FtsUpdate, iRegPk, iRegRoot, iRegData);
  sqlite4VdbeChangeP4(v, -1, (const char *)pInfo, P4_FTS5INFO);
  sqlite4VdbeChangeP5(v, (u8)bDel);
}

void sqlite4Fts5CodeQuery(
  Parse *pParse,
  Index *pIdx,
................................................................................
  int *pnMatch,
  int *pnDoc,
  int *pnRelevant
){
  return SQLITE4_OK;
}

static void fts5StrLoadRowcounts(
  Fts5Str *pStr, 
  int nStream, 
  int *anRow,
  int *anRowC,
  int *anRowS,
  int *pnRowCS
){
  u32 mask = 0;
  int iPrevCol = -1;
  InstanceList sList;

  fts5InstanceListInit(pStr->aList, pStr->nList, &sList);
  while( 0==fts5InstanceListNext(&sList) ){
    if( iPrevCol<0 ) (*pnRowCS)++;
    if( sList.iCol!=iPrevCol ){
      mask = 0;
      anRowC[sList.iCol]++;
    }
    if( (mask & (1<<sList.iStream))==0 ){
      anRow[sList.iCol * nStream + sList.iStream]++;
      mask |= (1<<sList.iStream);
      iPrevCol = sList.iCol;
    }
  }
}

static int fts5ExprLoadRowcounts(
  sqlite4 *db, 
  Fts5Cursor *pCsr,
  int nStream,
  Fts5ExprNode *pNode, 
  int *piStr
){
  int rc = SQLITE4_OK;

  if( pNode ){
    Fts5Info *pInfo = pCsr->pInfo;

    if( pNode->eType==TOKEN_PRIMITIVE ){

      Fts5Phrase *pPhrase = pNode->pPhrase;
      int iStr = *piStr;

      rc = fts5ExprAdvance(db, pNode, 1);
      while( rc==SQLITE4_OK && pNode->aPk ){
        int nIncr =  pInfo->nCol * nStream;      /* Values for each Fts5Str */
        int i;
        for(i=0; i<pPhrase->nStr; i++){
          int *anRow = &pCsr->anRow[(iStr+i) * pInfo->nCol * nStream];
          int *anRowC = &pCsr->anRowC[(iStr+i) * pInfo->nCol];
          int *anRowS = &pCsr->anRowS[(iStr+i) * nStream];
          int *pnRowCS = &pCsr->anRowCS[iStr+i];
          fts5StrLoadRowcounts(
              &pPhrase->aStr[i], nStream, anRow, anRowC, anRowS, pnRowCS
          );
        }
        rc = fts5ExprAdvance(db, pNode, 0);
      }

      *piStr = iStr + pPhrase->nStr;
    }

    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pCsr, nStream, pNode->pLeft, piStr);
    }
    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pCsr, nStream, pNode->pRight, piStr);
    }
  }

  return rc;
}

static int fts5CsrLoadRowcounts(Fts5Cursor *pCsr){
................................................................................
  if( pCsr->anRow==0 ){
    int nStream = pCsr->pGlobal->nStream;
    sqlite4 *db = pCsr->db;
    Fts5Expr *pCopy;
    Fts5Expr *pExpr = pCsr->pExpr;
    Fts5Info *pInfo = pCsr->pInfo;
    int *anRow;
    int iPhrase = 0;

    pCsr->anRow = anRow = (int *)sqlite4DbMallocZero(db, sizeof(int) * (
        pExpr->nPhrase * pInfo->nCol * pCsr->pGlobal->nStream
      + pExpr->nPhrase * pInfo->nCol
      + pExpr->nPhrase * pCsr->pGlobal->nStream
      + pExpr->nPhrase 
    ));
    if( !anRow ) return SQLITE4_NOMEM;
    pCsr->anRowC = &anRow[pExpr->nPhrase*pInfo->nCol*pCsr->pGlobal->nStream];
    pCsr->anRowS = &pCsr->anRowC[pExpr->nPhrase * pInfo->nCol];
    pCsr->anRowCS = &pCsr->anRowS[pExpr->nPhrase * pCsr->pGlobal->nStream];

    rc = fts5ParseExpression(db, pInfo->pTokenizer, pInfo->p, 
        pInfo->iRoot, pInfo->azCol, pInfo->nCol, pCsr->zExpr, &pCopy, 0
    );
    if( rc==SQLITE4_OK ){
      rc = fts5OpenExprCursors(db, pInfo, pCopy->pRoot);
    }
    if( rc==SQLITE4_OK ){
      rc = fts5ExprLoadRowcounts(db, pCsr, nStream, pCopy->pRoot, &iPhrase);
    }

    fts5ExpressionFree(db, pCopy);
  }

  return rc;
}
................................................................................
      int i;
      int nRow = 0;
      int nStream = pCsr->pGlobal->nStream;
      int nCol = pCsr->pInfo->nCol;
      int *aRow = &pCsr->anRow[iP * nStream * nCol];

      if( iC<0 && iS<0 ){
        nRow = pCsr->anRowCS[iP];

      }else if( iC<0 ){
        for(i=0; i<nCol; i++) nRow += aRow[i*nStream + iS];
      }else if( iS<0 ){
        nRow = pCsr->anRowC[iP*nCol + iC];
      }else if( iC<nCol && iS<nStream ){
        nRow = aRow[iC * nStream + iS];
      }

      *pn = nRow;
    }
  }

Changes to src/fts5func.c.

70
71
72
73
74
75
76

77
78
79
80
81





82
83
84
85
86
87
88
89
90
91
...
102
103
104
105
106
107
108

109
110
111
112
113
114
115
...
131
132
133
134
135
136
137

138
139
140
141
142










143
144
145









146

147
148
149
150
151
152
153
...
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
...
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351

352
353
354
355
356
357





358
359
360
361
362
363
364
...
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
...
506
507
508
509
510
511
512




513
514
515
516
517
518
519
520
** on the nature of both the documents and queries. The implementation
** below sets each parameter to the midpoint of the suggested range.
*/
static void fts5Rank(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){
  const double b = 0.65;
  const double k1 = 1.6;


  int rc = SQLITE4_OK;            /* Error code */
  Fts5RankCtx *p;                 /* Structure to store reusable values */
  int i;                          /* Used to iterate through phrases */
  double rank = 0.0;              /* UDF return value */






  p = sqlite4_get_auxdata(pCtx, 0);
  if( p==0 ){
    sqlite4 *db = sqlite4_context_db_handle(pCtx);
    int nPhrase;                  /* Number of phrases in query expression */
    int nByte;                    /* Number of bytes of data to allocate */

    sqlite4_mi_phrase_count(pCtx, &nPhrase);
    nByte = sizeof(Fts5RankCtx) + nPhrase * sizeof(double);
    p = (Fts5RankCtx *)sqlite4DbMallocZero(db, nByte);
    sqlite4_set_auxdata(pCtx, 0, (void *)p, fts5RankFreeCtx);
................................................................................
      p->aIdf = (double *)&p[1];

      /* Determine the IDF weight for each phrase in the query. */
      rc = sqlite4_mi_total_rows(pCtx, &N);
      for(i=0; rc==SQLITE4_OK && i<nPhrase; i++){
        rc = sqlite4_mi_row_count(pCtx, -1, -1, i, &ni);
        if( rc==SQLITE4_OK ){

          p->aIdf[i] = log((0.5 + N - ni) / (0.5 + ni));
        }
      }

      /* Determine the average document length */
      if( rc==SQLITE4_OK ){
        int nTotal;
................................................................................
    ** in this row (within any column). And dl to the number of tokens in
    ** the current row (again, in any column).  */
    rc = sqlite4_mi_match_count(pCtx, -1, -1, i, &tf); 
    if( rc==SQLITE4_OK ) rc = sqlite4_mi_size(pCtx, -1, -1, &dl); 

    /* Calculate the normalized document length */
    L = (double)dl / p->avgdl;


    /* Calculate the contribution to the rank made by this phrase. Then
    ** add it to variable rank.  */
    prank = (p->aIdf[i] * tf) / (k1 * ( (1.0 - b) + b * L) + tf);
    rank += prank;










  }

  if( rc==SQLITE4_OK ){









    sqlite4_result_double(pCtx, rank);

  }else{
    sqlite4_result_error_code(pCtx, rc);
  }
}

typedef struct Snippet Snippet;
typedef struct SnippetText SnippetText;
................................................................................
){
  SnippetCtx *p = (SnippetCtx *)pCtx;

  if( iOff<p->iOff ){
    return 0;
  }else if( iOff>=(p->iOff + p->nToken) ){
    fts5SnippetAppend(p, &p->zText[p->iFrom], p->iTo - p->iFrom);
    fts5SnippetAppend(p, "...", 3);
    p->iFrom = -1;
    return 1;
  }else{
    int bHighlight;               /* True to highlight term */

    bHighlight = (p->mask & (1 << (iOff-p->iOff)));

    if( p->iFrom==0 && p->iOff!=0 ){
      p->iFrom = iSrc;
      if( p->pOut->nOut==0 ) fts5SnippetAppend(p, p->zEllipses, -1);
    }

    if( bHighlight ){
................................................................................
      int nScore = 0;

      int nPTok;
      int iPTok;

      if( iColumn>=0 && iColumn!=iCol ) continue;

      allmask |= (1 << iPhrase);

      nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100);

      for(iMask=0; iMask<nPhrase; iMask++){
        if( nShift<64){
          aMask[iMask] = aMask[iMask] >> nShift;
        }else{
          aMask[iMask] = 0;
        }
      }
      sqlite4_mi_phrase_token_count(pCtx, iPhrase, &nPTok);
      for(iPTok=0; iPTok<nPTok; iPTok++){
        aMask[iPhrase] = aMask[iPhrase] | (1<<(nToken-1+iPTok));
      }

      for(iMask=0; iMask<nPhrase; iMask++){

        if( aMask[iMask] ){
          nScore += (((1 << iMask) & mask) ? 100 : 1);
        }else{
          miss |= (1 << iMask);
        }
        tmask = tmask | aMask[iMask];





      }

      if( nScore>nBest ){
        hlmask = tmask;
        missmask = miss;
        nBest = nScore;
        iBestOff = iOff;
................................................................................
  int nLead = 0;
  int nShift = 0;

  u64 mask = pSnip->hlmask;
  int iOff = pSnip->iOff;

  if( mask==0 ) return;
  assert( mask & (1 << (nToken-1)) );

  for(i=0; (mask & (1<<i))==0; i++);
  nLead = i;

  nShift = (nLead/2);
  if( iOff+nShift > nSz-nToken ) nShift = (nSz-nToken) - iOff;
  if( iOff+nShift < 0 ) nShift = -1 * iOff;

  iOff += nShift;
................................................................................

  rc = sqlite4_create_tokenizer(db, "simple", (void *)pEnv, 
      fts5SimpleCreate, fts5SimpleTokenize, fts5SimpleDestroy
  );
  if( rc!=SQLITE4_OK ) return rc;

  rc = sqlite4_create_mi_function(db, "rank", 0, SQLITE4_UTF8, 0, fts5Rank, 0);




  if( rc!=SQLITE4_OK ) return rc;

  rc = sqlite4_create_mi_function(
      db, "snippet", -1, SQLITE4_UTF8, 0, fts5Snippet, 0
  );
  return rc;
}








>





>
>
>
>
>


<







 







>







 







>





>
>
>
>
>
>
>
>
>
>



>
>
>
>
>
>
>
>
>
|
>







 







|





|







 







|












|



>

|

|


>
>
>
>
>







 







|

|







 







>
>
>
>








70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

90
91
92
93
94
95
96
...
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
...
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
...
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
...
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
...
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
...
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
** on the nature of both the documents and queries. The implementation
** below sets each parameter to the midpoint of the suggested range.
*/
static void fts5Rank(sqlite4_context *pCtx, int nArg, sqlite4_value **apArg){
  const double b = 0.65;
  const double k1 = 1.6;

  sqlite4 *db = sqlite4_context_db_handle(pCtx);
  int rc = SQLITE4_OK;            /* Error code */
  Fts5RankCtx *p;                 /* Structure to store reusable values */
  int i;                          /* Used to iterate through phrases */
  double rank = 0.0;              /* UDF return value */

  int bExplain = 0;
  char *zExplain = 0;

  if( sqlite4_user_data(pCtx) ) bExplain = 1;

  p = sqlite4_get_auxdata(pCtx, 0);
  if( p==0 ){

    int nPhrase;                  /* Number of phrases in query expression */
    int nByte;                    /* Number of bytes of data to allocate */

    sqlite4_mi_phrase_count(pCtx, &nPhrase);
    nByte = sizeof(Fts5RankCtx) + nPhrase * sizeof(double);
    p = (Fts5RankCtx *)sqlite4DbMallocZero(db, nByte);
    sqlite4_set_auxdata(pCtx, 0, (void *)p, fts5RankFreeCtx);
................................................................................
      p->aIdf = (double *)&p[1];

      /* Determine the IDF weight for each phrase in the query. */
      rc = sqlite4_mi_total_rows(pCtx, &N);
      for(i=0; rc==SQLITE4_OK && i<nPhrase; i++){
        rc = sqlite4_mi_row_count(pCtx, -1, -1, i, &ni);
        if( rc==SQLITE4_OK ){
          assert( ni<=N );
          p->aIdf[i] = log((0.5 + N - ni) / (0.5 + ni));
        }
      }

      /* Determine the average document length */
      if( rc==SQLITE4_OK ){
        int nTotal;
................................................................................
    ** in this row (within any column). And dl to the number of tokens in
    ** the current row (again, in any column).  */
    rc = sqlite4_mi_match_count(pCtx, -1, -1, i, &tf); 
    if( rc==SQLITE4_OK ) rc = sqlite4_mi_size(pCtx, -1, -1, &dl); 

    /* Calculate the normalized document length */
    L = (double)dl / p->avgdl;


    /* Calculate the contribution to the rank made by this phrase. Then
    ** add it to variable rank.  */
    prank = (p->aIdf[i] * tf) / (k1 * ( (1.0 - b) + b * L) + tf);
    rank += prank;

    if( bExplain ){
      zExplain = sqlite4MAppendf(
          db, zExplain, "%s(idf=%.2f L=%.2f tf=%d) rank=%.2f", zExplain,
          p->aIdf[i], L, tf, prank
      );
      if( (i+1)<p->nPhrase ){
        zExplain = sqlite4MAppendf(db, zExplain, "%s<br>", zExplain);
      }
    }
  }

  if( rc==SQLITE4_OK ){
    if( bExplain ){
      if( p->nPhrase>1 ){
        zExplain = sqlite4MAppendf(
            db, zExplain, "%s<br>total=%.2f", zExplain, rank
        );
      }
      sqlite4_result_text(pCtx, zExplain, -1, SQLITE4_TRANSIENT);
      sqlite4DbFree(db, zExplain);
    }else{
      sqlite4_result_double(pCtx, rank);
    }
  }else{
    sqlite4_result_error_code(pCtx, rc);
  }
}

typedef struct Snippet Snippet;
typedef struct SnippetText SnippetText;
................................................................................
){
  SnippetCtx *p = (SnippetCtx *)pCtx;

  if( iOff<p->iOff ){
    return 0;
  }else if( iOff>=(p->iOff + p->nToken) ){
    fts5SnippetAppend(p, &p->zText[p->iFrom], p->iTo - p->iFrom);
    fts5SnippetAppend(p, p->zEllipses, -1);
    p->iFrom = -1;
    return 1;
  }else{
    int bHighlight;               /* True to highlight term */

    bHighlight = (p->mask & ((u64)1 << (iOff-p->iOff))) ? 1 : 0;

    if( p->iFrom==0 && p->iOff!=0 ){
      p->iFrom = iSrc;
      if( p->pOut->nOut==0 ) fts5SnippetAppend(p, p->zEllipses, -1);
    }

    if( bHighlight ){
................................................................................
      int nScore = 0;

      int nPTok;
      int iPTok;

      if( iColumn>=0 && iColumn!=iCol ) continue;

      allmask |= ((u64)1 << iPhrase);

      nShift = ((iPrevCol==iCol) ? (iOff-iPrev) : 100);

      for(iMask=0; iMask<nPhrase; iMask++){
        if( nShift<64){
          aMask[iMask] = aMask[iMask] >> nShift;
        }else{
          aMask[iMask] = 0;
        }
      }
      sqlite4_mi_phrase_token_count(pCtx, iPhrase, &nPTok);
      for(iPTok=0; iPTok<nPTok; iPTok++){
        aMask[iPhrase] = aMask[iPhrase] | ((u64)1 << (nToken-1+iPTok));
      }

      for(iMask=0; iMask<nPhrase; iMask++){
        int iBit;
        if( aMask[iMask] ){
          nScore += ((((u64)1 << iMask) & mask) ? 100 : 1);
        }else{
          miss |= ((u64)1 << iMask);
        }
        tmask = tmask | aMask[iMask];
        /* TODO: This is the Hamming Weight. There are much more efficient
        ** ways to calculate it. */
        for(iBit=0; iBit<nToken; iBit++){
          if( tmask & ((u64)1 << iBit) ) nScore++;
        }
      }

      if( nScore>nBest ){
        hlmask = tmask;
        missmask = miss;
        nBest = nScore;
        iBestOff = iOff;
................................................................................
  int nLead = 0;
  int nShift = 0;

  u64 mask = pSnip->hlmask;
  int iOff = pSnip->iOff;

  if( mask==0 ) return;
  assert( mask & ((u64)1 << (nToken-1)) );

  for(i=0; (mask & ((u64)1 << i))==0; i++);
  nLead = i;

  nShift = (nLead/2);
  if( iOff+nShift > nSz-nToken ) nShift = (nSz-nToken) - iOff;
  if( iOff+nShift < 0 ) nShift = -1 * iOff;

  iOff += nShift;
................................................................................

  rc = sqlite4_create_tokenizer(db, "simple", (void *)pEnv, 
      fts5SimpleCreate, fts5SimpleTokenize, fts5SimpleDestroy
  );
  if( rc!=SQLITE4_OK ) return rc;

  rc = sqlite4_create_mi_function(db, "rank", 0, SQLITE4_UTF8, 0, fts5Rank, 0);
  if( rc!=SQLITE4_OK ) return rc;
  rc = sqlite4_create_mi_function(
      db, "erank", 0, SQLITE4_UTF8, (void *)1, fts5Rank, 0
  );
  if( rc!=SQLITE4_OK ) return rc;

  rc = sqlite4_create_mi_function(
      db, "snippet", -1, SQLITE4_UTF8, 0, fts5Snippet, 0
  );
  return rc;
}

Changes to src/insert.c.

1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432

  /* Write the entry to each index. */
  for(i=0, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){
    assert( pIdx->eIndexType!=SQLITE4_INDEX_PRIMARYKEY || aRegIdx[i] );
    if( pIdx->eIndexType==SQLITE4_INDEX_FTS5 ){
      int iPK;
      sqlite4FindPrimaryKey(pTab, &iPK);
      sqlite4Fts5CodeUpdate(pParse, pIdx, aRegIdx[iPK], regContent, 0);
    }
    else if( aRegIdx[i] ){
      int regData = 0;
      int flags = 0;
      if( pIdx->eIndexType==SQLITE4_INDEX_PRIMARYKEY ){
        regData = regRec;
        flags = pik_flags;







|







1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432

  /* Write the entry to each index. */
  for(i=0, pIdx=pTab->pIndex; pIdx; i++, pIdx=pIdx->pNext){
    assert( pIdx->eIndexType!=SQLITE4_INDEX_PRIMARYKEY || aRegIdx[i] );
    if( pIdx->eIndexType==SQLITE4_INDEX_FTS5 ){
      int iPK;
      sqlite4FindPrimaryKey(pTab, &iPK);
      sqlite4Fts5CodeUpdate(pParse, pIdx, 0, aRegIdx[iPK], regContent, 0);
    }
    else if( aRegIdx[i] ){
      int regData = 0;
      int flags = 0;
      if( pIdx->eIndexType==SQLITE4_INDEX_PRIMARYKEY ){
        regData = regRec;
        flags = pik_flags;

Changes to src/rowset.c.

154
155
156
157
158
159
160
161
162
163
164
165



166
167
168
169
170
171
172
  memset(p, 0, sizeof(RowSet));
  p->isSorted = 1;
}


static u8 *rowsetAllocateChunk(RowSet *p, int nByte){
  int rowChunkSize = ROUND8(sizeof(RowSetChunk));
  u8 *pNew;                       /* New RowSetChunk */
  int nAlloc;                     /* Bytes to request from malloc() */
  nAlloc =  rowChunkSize + nByte;
  pNew = (u8 *)sqlite4DbMallocRaw(p->db, nAlloc);
  return (pNew ? (pNew + rowChunkSize) : 0);



}

static int rowsetEntryKeyCmp(RowSetEntry *pLeft, const u8 *aKey, int nKey){
  int nCmp = SQLITE4_MIN(pLeft->nKey, nKey);
  int res;
  res = memcmp(pLeft->aKey, aKey, nCmp);
  return (res ? res : (pLeft->nKey - nKey));







|


|
|
>
>
>







154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
  memset(p, 0, sizeof(RowSet));
  p->isSorted = 1;
}


static u8 *rowsetAllocateChunk(RowSet *p, int nByte){
  int rowChunkSize = ROUND8(sizeof(RowSetChunk));
  RowSetChunk *pNew;              /* New RowSetChunk */
  int nAlloc;                     /* Bytes to request from malloc() */
  nAlloc =  rowChunkSize + nByte;
  pNew = (RowSetChunk *)sqlite4DbMallocRaw(p->db, nAlloc);
  if( !pNew ) return 0;
  pNew->pNextChunk = p->pChunk;
  p->pChunk = pNew;
  return (u8 *)(&pNew[1]);
}

static int rowsetEntryKeyCmp(RowSetEntry *pLeft, const u8 *aKey, int nKey){
  int nCmp = SQLITE4_MIN(pLeft->nKey, nKey);
  int res;
  res = memcmp(pLeft->aKey, aKey, nCmp);
  return (res ? res : (pLeft->nKey - nKey));

Changes to src/sqliteInt.h.

3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
void sqlite4ShutdownFts5(sqlite4 *db);
void sqlite4CreateUsingIndex(Parse*, CreateIndex*, ExprList*, Token*, Token*);

int sqlite4Fts5IndexSz(void);
void sqlite4Fts5IndexInit(Parse *, Index *, ExprList *);
void sqlite4Fts5IndexFree(sqlite4 *, Index *);

int sqlite4Fts5Update(sqlite4 *, Fts5Info *, Mem *pPk, Mem *aArg, int, char **);
void sqlite4Fts5FreeInfo(sqlite4 *db, Fts5Info *);
void sqlite4Fts5CodeUpdate(Parse *, Index *pIdx, int iRegPk, int iRegData, int);
void sqlite4Fts5CodeCksum(Parse *, Index *, int, int, int);
void sqlite4Fts5CodeQuery(Parse *, Index *, int, int, int);

int sqlite4Fts5Pk(Fts5Cursor *, int, KVByteArray **, KVSize *);
int sqlite4Fts5Next(Fts5Cursor *pCsr);

int sqlite4Fts5EntryCksum(sqlite4 *, Fts5Info *, Mem *, Mem *, i64 *);
int sqlite4Fts5RowCksum(sqlite4 *, Fts5Info *, Mem *, Mem *, i64 *);
int sqlite4Fts5Open(sqlite4*, Fts5Info*, const char*, int, Fts5Cursor**,char**);
int sqlite4Fts5Valid(Fts5Cursor *);
void sqlite4Fts5Close(Fts5Cursor *);

#endif /* _SQLITEINT_H_ */







|

|













3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
void sqlite4ShutdownFts5(sqlite4 *db);
void sqlite4CreateUsingIndex(Parse*, CreateIndex*, ExprList*, Token*, Token*);

int sqlite4Fts5IndexSz(void);
void sqlite4Fts5IndexInit(Parse *, Index *, ExprList *);
void sqlite4Fts5IndexFree(sqlite4 *, Index *);

int sqlite4Fts5Update(sqlite4 *, Fts5Info *, int, Mem *, Mem *, int, char **);
void sqlite4Fts5FreeInfo(sqlite4 *db, Fts5Info *);
void sqlite4Fts5CodeUpdate(Parse *, Index *pIdx, int, int, int, int);
void sqlite4Fts5CodeCksum(Parse *, Index *, int, int, int);
void sqlite4Fts5CodeQuery(Parse *, Index *, int, int, int);

int sqlite4Fts5Pk(Fts5Cursor *, int, KVByteArray **, KVSize *);
int sqlite4Fts5Next(Fts5Cursor *pCsr);

int sqlite4Fts5EntryCksum(sqlite4 *, Fts5Info *, Mem *, Mem *, i64 *);
int sqlite4Fts5RowCksum(sqlite4 *, Fts5Info *, Mem *, Mem *, i64 *);
int sqlite4Fts5Open(sqlite4*, Fts5Info*, const char*, int, Fts5Cursor**,char**);
int sqlite4Fts5Valid(Fts5Cursor *);
void sqlite4Fts5Close(Fts5Cursor *);

#endif /* _SQLITEINT_H_ */

Changes to src/vdbe.c.

4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
....
4863
4864
4865
4866
4867
4868
4869




4870
4871
4872
4873
4874


4875
4876
4877
4878
4879
4880






4881
4882
4883
4884
4885
4886
4887
4888
    sqlite4DebugPrintf("SQL-trace: %s\n", zTrace);
  }
#endif /* SQLITE4_DEBUG */
  break;
}
#endif

/* Opcode: FtsUpdate P1 * P3 P4 P5
**
** This opcode is used to write to an FTS index. P4 points to an Fts5Info 
** object describing the index.
**
** If argument P5 is non-zero, then entries are removed from the FTS index.
** If it is zero, then entries are inserted. In other words, when a row
** is deleted from a table with an FTS index, this opcode is invoked with
................................................................................
** row is updated, this opcode is invoked twice - once with P5==1 and then
** again with P5==0.
**
** Register P1 contains the PK (a blob in key format) of the affected row.
** P3 is the first in an array of N registers, where N is the number of
** columns in the indexed table. Each register contains the value for the
** corresponding table column.




*/
case OP_FtsUpdate: {
  Fts5Info *pInfo;                /* Description of fts5 index to update */
  Mem *pKey;                      /* Primary key of indexed row */
  Mem *aArg;                      /* Pointer to array of N arguments */



  assert( pOp->p4type==P4_FTS5INFO );
  pInfo = pOp->p4.pFtsInfo;
  aArg = &aMem[pOp->p3];
  pKey = &aMem[pOp->p1];







  rc = sqlite4Fts5Update(db, pInfo, pKey, aArg, pOp->p5, &p->zErrMsg);
  break;
}

/*
** Opcode: FtsCksum P1 * P3 P4 P5
**
** This opcode is used by the integrity-check procedure that verifies that







|







 







>
>
>
>





>
>






>
>
>
>
>
>
|







4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
....
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
    sqlite4DebugPrintf("SQL-trace: %s\n", zTrace);
  }
#endif /* SQLITE4_DEBUG */
  break;
}
#endif

/* Opcode: FtsUpdate P1 P2 P3 P4 P5
**
** This opcode is used to write to an FTS index. P4 points to an Fts5Info 
** object describing the index.
**
** If argument P5 is non-zero, then entries are removed from the FTS index.
** If it is zero, then entries are inserted. In other words, when a row
** is deleted from a table with an FTS index, this opcode is invoked with
................................................................................
** row is updated, this opcode is invoked twice - once with P5==1 and then
** again with P5==0.
**
** Register P1 contains the PK (a blob in key format) of the affected row.
** P3 is the first in an array of N registers, where N is the number of
** columns in the indexed table. Each register contains the value for the
** corresponding table column.
**
** If P2 is non-zero, then it is a register containing the root page number
** of the fts index to update. If it is zero, then the root page of the 
** index is available as part of the Fts5Info structure.
*/
case OP_FtsUpdate: {
  Fts5Info *pInfo;                /* Description of fts5 index to update */
  Mem *pKey;                      /* Primary key of indexed row */
  Mem *aArg;                      /* Pointer to array of N arguments */
  Mem *pRoot;                     /* Root page number */
  int iRoot;

  assert( pOp->p4type==P4_FTS5INFO );
  pInfo = pOp->p4.pFtsInfo;
  aArg = &aMem[pOp->p3];
  pKey = &aMem[pOp->p1];

  if( pOp->p2 ){
    iRoot = aMem[pOp->p2].u.i;
  }else{
    iRoot = 0;
  }

  rc = sqlite4Fts5Update(db, pInfo, iRoot, pKey, aArg, pOp->p5, &p->zErrMsg);
  break;
}

/*
** Opcode: FtsCksum P1 * P3 P4 P5
**
** This opcode is used by the integrity-check procedure that verifies that

Changes to test/fts5create.test.

70
71
72
73
74
75
76
77
78
79
80
















81
82
83
84
  CREATE INDEX ft ON t2 USING fts5(tukenizer=simple);
} {1 {unrecognized argument: "tukenizer"}}

do_catchsql_test 2.3 { 
  CREATE INDEX ft ON t2 USING fts5("a b c");
} {1 {unrecognized argument: "a b c"}}

breakpoint
do_catchsql_test 2.4 {
  CREATE INDEX ft ON t2 USING fts5(tokenizer="nosuch");
} {1 {no such tokenizer: "nosuch"}}

















finish_test









<



>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




70
71
72
73
74
75
76

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  CREATE INDEX ft ON t2 USING fts5(tukenizer=simple);
} {1 {unrecognized argument: "tukenizer"}}

do_catchsql_test 2.3 { 
  CREATE INDEX ft ON t2 USING fts5("a b c");
} {1 {unrecognized argument: "a b c"}}


do_catchsql_test 2.4 {
  CREATE INDEX ft ON t2 USING fts5(tokenizer="nosuch");
} {1 {no such tokenizer: "nosuch"}}

#-------------------------------------------------------------------------
#
reset_db

do_execsql_test 3.1 {
  CREATE TABLE t1(a, b, c, PRIMARY KEY(a));
  INSERT INTO t1 VALUES(1, 'a b c d', 'e f g h');
  INSERT INTO t1 VALUES(2, 'e f g h', 'a b c d');
}

do_execsql_test 3.2 {
  CREATE INDEX ft ON t1 USING fts5();
  PRAGMA fts_check(ft);
} {ok}


finish_test


Changes to www/lsmusr.wiki.

146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
based system. Additionally, avoiding random writes in favour of largely
contiguous updates (as LSM does) can significantly reduce the wear on SSD or
flash memory devices.

<p>Although it has quite different features to LSM in other respects, 
LevelDB makes similar performance tradeoffs.

<p>Benchmark test results for LSM are <a href=#>available here</a>. <i>Todo:
Fix this link to point to a page with performance graphs.</i>


<h1 id=using_lsm_in_applications>2. Using LSM in Applications </h1>

<p>LSM is not currently built or distributed independently. Instead, it
is part of the SQLite4 library. To use LSM in an application, the application
links against libsqlite4 and includes the header file "lsm.h" in any files
that access the LSM API.







|
<
<







146
147
148
149
150
151
152
153


154
155
156
157
158
159
160
based system. Additionally, avoiding random writes in favour of largely
contiguous updates (as LSM does) can significantly reduce the wear on SSD or
flash memory devices.

<p>Although it has quite different features to LSM in other respects, 
LevelDB makes similar performance tradeoffs.

<p>Benchmark test results for LSM are <a href=lsmperf.wiki>available here</a>.



<h1 id=using_lsm_in_applications>2. Using LSM in Applications </h1>

<p>LSM is not currently built or distributed independently. Instead, it
is part of the SQLite4 library. To use LSM in an application, the application
links against libsqlite4 and includes the header file "lsm.h" in any files
that access the LSM API.