Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix problems with doclist-indexes involving very large rowids. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
1e5994097e4c740c5173ea9718c39357 |
User & Date: | dan 2015-04-22 20:14:46.893 |
Context
2015-04-22
| ||
20:58 | Add extra OOM tests for fts5. (check-in: 2dd59b5762 user: dan tags: fts5) | |
20:14 | Fix problems with doclist-indexes involving very large rowids. (check-in: 1e5994097e user: dan tags: fts5) | |
09:40 | Update this branch with latest trunk changes. (check-in: 9797482ded user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
2989 2990 2991 2992 2993 2994 2995 | fts5BufferZero(&pPage->buf); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; | | > | 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 | fts5BufferZero(&pPage->buf); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; /* The new leaf holds no terms or rowids */ pWriter->bFirstTermInPage = 1; pWriter->bFirstRowidInPage = 1; } /* ** Append term pTerm/nTerm to the segment being written by the writer passed ** as the second argument. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has |
︙ | ︙ | |||
3063 3064 3065 3066 3067 3068 3069 | pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); | < | 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 | pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } /* ** Append a docid and position-list size field to the writers output. */ static void fts5WriteAppendRowid( |
︙ | ︙ | |||
3102 3103 3104 3105 3106 3107 3108 | pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); | < < | 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 | pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } } static void fts5WriteAppendPoslistInt( Fts5Index *p, Fts5SegWriter *pWriter, int iVal ){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } } static void fts5WriteAppendPoslistData( Fts5Index *p, Fts5SegWriter *pWriter, |
︙ | ︙ | |||
3144 3145 3146 3147 3148 3149 3150 | i64 dummy; nCopy += getVarint(&a[nCopy], (u64*)&dummy); } fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); a += nCopy; n -= nCopy; fts5WriteFlushLeaf(p, pWriter); | < | 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 | i64 dummy; nCopy += getVarint(&a[nCopy], (u64*)&dummy); } fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); a += nCopy; n -= nCopy; fts5WriteFlushLeaf(p, pWriter); } if( n>0 ){ fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); } } static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ |
︙ | ︙ | |||
3663 3664 3665 3666 3667 3668 3669 | if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; | | > | | | 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 | if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; writer.bFirstRowidInPage = 0; /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ while( iOff<nDoclist ){ int nPos; int nCopy; int bDummy; iOff += getVarint(&pDoclist[iOff], (u64*)&iDelta); nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); nCopy += nPos; iRowid += iDelta; if( writer.bFirstRowidInPage ){ fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); writer.bFirstRowidInPage = 0; fts5WriteDlidxAppend(p, &writer, iRowid); }else{ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); if( (pBuf->n + nCopy) <= pgsz ){ |
︙ | ︙ | |||
3704 3705 3706 3707 3708 3709 3710 | while( 1 ){ int nSpace = pgsz - pBuf->n; int n = 0; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); | < < | 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 | while( 1 ){ int nSpace = pgsz - pBuf->n; int n = 0; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); } assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; if( pBuf->n>=pgsz ){ fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; } if( iPos>=nCopy ) break; } } iOff += nCopy; } } pBuf->p[pBuf->n++] = '\0'; assert( pBuf->n<=pBuf->nSpace ); |
︙ | ︙ |
Changes to ext/fts5/test/fts5dlidx.test.
︙ | ︙ | |||
27 28 29 30 31 32 33 | } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } | | | > | > > > > > | < < | | | > | | | | | | | | | | | | | | | | > > < < | | | | | | | | > > > > > > | 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x); INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } # This test populates the FTS5 table containing $nEntry entries. Rows are # numbered from 0 to ($nEntry-1). The rowid for row $i is: # # ($iFirst + $i*$nStep) # # Each document is of the form "a b c a b c a b c...". If the row number ($i) # is an integer multiple of $spc1, then an "x" token is appended to the # document. If it is *also* a multiple of $spc2, a "y" token is also appended. # proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { do_execsql_test $tn.0 { DELETE FROM t1 } set xdoc [list] set ydoc [list] execsql BEGIN for {set i 0} {$i < $nEntry} {incr i} { set rowid [expr $i * $nStep] set doc [string trim [string repeat "a b c " 100]] if {($i % $spc1)==0} { lappend xdoc $rowid append doc " x" if {($i % $spc2)==0} { lappend ydoc $rowid append doc " y" } } execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } } execsql COMMIT do_test $tn.1 { execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc do_fb_test $tn.5.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc do_fb_test $tn.5.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc } do_dlidx_test1 1.1 10 100 10000 0 1000 do_dlidx_test1 1.2 10 10 10000 0 128 do_dlidx_test1 1.3 10 10 100 0 36028797018963970 do_dlidx_test1 1.3 10 10 50 0 150000000000000000 finish_test |