Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Explicitly limit the size of fts5 tokens to 32768 bytes. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
70fc69eed9b09159899d7cbd1416a59d |
User & Date: | dan 2016-03-23 15:04:00.239 |
Context
2016-03-23
| ||
15:53 | Remove an unused local variable from FTS5. (check-in: 0ed693c29f user: drh tags: trunk) | |
15:04 | Explicitly limit the size of fts5 tokens to 32768 bytes. (check-in: 70fc69eed9 user: dan tags: trunk) | |
13:46 | Update a requirement mark. No changes to code. (check-in: 412984642a user: drh tags: trunk) | |
Changes
Changes to ext/fts5/fts5Int.h.
︙ | ︙ | |||
43 44 45 46 47 48 49 50 51 52 53 54 55 56 | ** Constants for the largest and smallest possible 64-bit signed integers. */ # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) #endif /* ** Maximum number of prefix indexes on single FTS5 table. This must be ** less than 32. If it is set to anything large than that, an #error ** directive in fts5_index.c will cause the build to fail. */ #define FTS5_MAX_PREFIX_INDEXES 31 | > > > > | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | ** Constants for the largest and smallest possible 64-bit signed integers. */ # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) #endif /* Truncate very long tokens to this many bytes. Hard limit is ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset ** field that occurs at the start of each leaf page (see fts5_index.c). */ #define FTS5_MAX_TOKEN_SIZE 32768 /* ** Maximum number of prefix indexes on single FTS5 table. This must be ** less than 32. If it is set to anything large than that, an #error ** directive in fts5_index.c will cause the build to fail. */ #define FTS5_MAX_PREFIX_INDEXES 31 |
︙ | ︙ |
Changes to ext/fts5/fts5_expr.c.
︙ | ︙ | |||
1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 | TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; UNUSED_PARAM2(iUnused1, iUnused2); /* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); if( pSyn==0 ){ rc = SQLITE_NOMEM; | > | 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 | TokenCtx *pCtx = (TokenCtx*)pContext; Fts5ExprPhrase *pPhrase = pCtx->pPhrase; UNUSED_PARAM2(iUnused1, iUnused2); /* If an error has already occurred, this is a no-op */ if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ Fts5ExprTerm *pSyn; int nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; pSyn = (Fts5ExprTerm*)sqlite3_malloc(nByte); if( pSyn==0 ){ rc = SQLITE_NOMEM; |
︙ | ︙ | |||
2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 | ){ Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; Fts5Expr *pExpr = p->pExpr; int i; UNUSED_PARAM2(iUnused1, iUnused2); if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; for(i=0; i<pExpr->nPhrase; i++){ Fts5ExprTerm *pTerm; if( p->aPopulator[i].bOk==0 ) continue; for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ int nTerm = (int)strlen(pTerm->zTerm); if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix)) | > | 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 | ){ Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; Fts5Expr *pExpr = p->pExpr; int i; UNUSED_PARAM2(iUnused1, iUnused2); if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; for(i=0; i<pExpr->nPhrase; i++){ Fts5ExprTerm *pTerm; if( p->aPopulator[i].bOk==0 ) continue; for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ int nTerm = (int)strlen(pTerm->zTerm); if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix)) |
︙ | ︙ |
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 | pIter->iEndofDoclist = iTermOff + nExtra; } pIter->iPgidxOff = iPgidx; fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterSeekInit( Fts5Index *p, /* FTS5 backend */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ int iPg = 1; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ | > > > > > > > > > > > > > | < < < < < < < | | | | | | 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 | pIter->iEndofDoclist = iTermOff + nExtra; } pIter->iPgidxOff = iPgidx; fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ if( p->pIdxSelect==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( "SELECT pgno FROM '%q'.'%q_idx' WHERE " "segid=? AND term<=? ORDER BY term DESC LIMIT 1", pConfig->zDb, pConfig->zName )); } return p->pIdxSelect; } /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterSeekInit( Fts5Index *p, /* FTS5 backend */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ int iPg = 1; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ sqlite3_stmt *pIdxSelect = 0; assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ pIdxSelect = fts5IdxSelectStmt(p); if( p->rc ) return; sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid); sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){ i64 val = sqlite3_column_int(pIdxSelect, 0); iPg = (int)(val>>1); bDlidx = (val & 0x0001); } p->rc = sqlite3_reset(pIdxSelect); if( iPg<pSeg->pgnoFirst ){ iPg = pSeg->pgnoFirst; bDlidx = 0; } pIter->iLeafPgno = iPg - 1; |
︙ | ︙ | |||
3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 | #ifdef SQLITE_DEBUG for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); } } assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); #endif } } return iSegid; } | > > > > > > > > > > > > | 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 | #ifdef SQLITE_DEBUG for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); } } assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); { sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); if( p->rc==SQLITE_OK ){ int rc; u8 aBlob[2] = {0xff, 0xff}; sqlite3_bind_int(pIdxSelect, 1, iSegid); sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC); assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW ); p->rc = sqlite3_reset(pIdxSelect); } } #endif } } return iSegid; } |
︙ | ︙ | |||
3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 | } } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); /* Set the szLeaf header field. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); | > > > | 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 | } } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; static int nCall = 0; nCall++; assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); /* Set the szLeaf header field. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); |
︙ | ︙ |
Changes to ext/fts5/fts5_storage.c.
︙ | ︙ | |||
365 366 367 368 369 370 371 372 373 374 375 376 377 378 | int nToken, /* Size of token in bytes */ int iUnused1, /* Start offset of token */ int iUnused2 /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; UNUSED_PARAM2(iUnused1, iUnused2); if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); } /* | > | 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 | int nToken, /* Size of token in bytes */ int iUnused1, /* Start offset of token */ int iUnused2 /* End offset of token */ ){ Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; Fts5Index *pIdx = pCtx->pStorage->pIndex; UNUSED_PARAM2(iUnused1, iUnused2); if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); } /* |
︙ | ︙ | |||
811 812 813 814 815 816 817 818 819 820 821 822 823 824 | int bPresent; int ii; int rc = SQLITE_OK; int iPos; int iCol; UNUSED_PARAM2(iUnused1, iUnused2); if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } switch( pCtx->pConfig->eDetail ){ case FTS5_DETAIL_FULL: | > | 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 | int bPresent; int ii; int rc = SQLITE_OK; int iPos; int iCol; UNUSED_PARAM2(iUnused1, iUnused2); if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ pCtx->szCol++; } switch( pCtx->pConfig->eDetail ){ case FTS5_DETAIL_FULL: |
︙ | ︙ |
Changes to ext/fts5/test/fts5simple.test.
︙ | ︙ | |||
443 444 445 446 447 448 449 450 451 | execsql { INSERT INTO x1(x1) VALUES('optimize'); } execsql { DELETE FROM x1 WHERE rowid = 4; } } {} do_execsql_test 20.2 { INSERT INTO x1(x1) VALUES('optimize'); INSERT INTO x1(x1) VALUES('integrity-check'); } {} finish_test | > > > > > > > > > > > > > > > > > > > > | 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 | execsql { INSERT INTO x1(x1) VALUES('optimize'); } execsql { DELETE FROM x1 WHERE rowid = 4; } } {} do_execsql_test 20.2 { INSERT INTO x1(x1) VALUES('optimize'); INSERT INTO x1(x1) VALUES('integrity-check'); } {} #------------------------------------------------------------------------- reset_db set doc "a b [string repeat x 100000]" do_execsql_test 21.0 { CREATE VIRTUAL TABLE x1 USING fts5(x); INSERT INTO x1(rowid, x) VALUES(11111, $doc); INSERT INTO x1(rowid, x) VALUES(11112, $doc); } do_execsql_test 21.1 { INSERT INTO x1(x1) VALUES('integrity-check'); } do_execsql_test 21.2 { SELECT rowid FROM x1($doc); } {11111 11112} do_execsql_test 21.3 { DELETE FROM x1 WHERE rowid=11111; INSERT INTO x1(x1) VALUES('integrity-check'); SELECT rowid FROM x1($doc); } {11112} finish_test |