Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix some fts5 problems with very large position lists. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts5 |
Files: | files | file ages | folders |
SHA1: |
2ea8f9cbe67dac60c1a0a661c95a03ec |
User & Date: | dan 2015-04-20 18:48:57.780 |
Context
2015-04-21
| ||
19:07 | Fix an fts5 problem with large deletes. (check-in: e50e8031d6 user: dan tags: fts5) | |
2015-04-20
| ||
18:48 | Fix some fts5 problems with very large position lists. (check-in: 2ea8f9cbe6 user: dan tags: fts5) | |
2015-04-15
| ||
18:49 | Logically store updates as (insert+delete) within the FTS tree. This allows keys to be annihilated more quickly under some circumstances. (check-in: 50fae1f000 user: dan tags: fts5) | |
Changes
Changes to ext/fts5/fts5_index.c.
︙ | ︙ | |||
1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 | /* ** Iterator pIter currently points to the first rowid in a doclist. This ** function sets the iterator up so that iterates in reverse order through ** the doclist. */ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ Fts5Data *pLast = 0; int pgnoLast = 0; | > | > > > | | | > > > | | 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 | /* ** Iterator pIter currently points to the first rowid in a doclist. This ** function sets the iterator up so that iterates in reverse order through ** the doclist. */ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){ Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; if( pDlidx ){ /* If the doclist-iterator is already at EOF, then the current doclist ** contains no entries except those on the current page. */ if( fts5DlidxIterEof(p, pDlidx)==0 ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = pDlidx->iLeafPgno; pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast)); }else{ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); } }else{ int iOff; /* Byte offset within pLeaf */ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first ** byte of position-list content for the current rowid. Back it up ** so that it points to the start of the position-list size field. */ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); iOff = pIter->iLeafOffset; assert( iOff>=4 ); /* Search for a new term within the current leaf. If one can be found, ** then this page contains the largest rowid for the current term. */ while( iOff<pLeaf->n ){ int nPos; |
︙ | ︙ | |||
3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 | Fts5Buffer buf; memset(&buf, 0, sizeof(Fts5Buffer)); for(i=0; i<pIter->nSeg; i++){ Fts5SegIter *pSeg = &pIter->aSeg[i]; if( pSeg->pSeg==0 ){ /* no-op */ }else if( pSeg->pLeaf==0 ){ pSeg->pSeg->pgnoLast = 0; pSeg->pSeg->pgnoFirst = 0; }else{ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ i64 iLeafRowid; Fts5Data *pData; int iId = pSeg->pSeg->iSegid; | > > > | 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 | Fts5Buffer buf; memset(&buf, 0, sizeof(Fts5Buffer)); for(i=0; i<pIter->nSeg; i++){ Fts5SegIter *pSeg = &pIter->aSeg[i]; if( pSeg->pSeg==0 ){ /* no-op */ }else if( pSeg->pLeaf==0 ){ /* All keys from this input segment have been transfered to the output. ** Set both the first and last page-numbers to 0 to indicate that the ** segment is now empty. */ pSeg->pSeg->pgnoLast = 0; pSeg->pSeg->pgnoFirst = 0; }else{ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ i64 iLeafRowid; Fts5Data *pData; int iId = pSeg->pSeg->iSegid; |
︙ | ︙ | |||
4088 4089 4090 4091 4092 4093 4094 | } fts5DlidxIterFree(pDlidx); fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); } } | > > > | > > > | 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 | } fts5DlidxIterFree(pDlidx); fts5DlidxIterTestReverse(p, iIdx, iSegid, iter.iLeaf); } } /* Either iter.iLeaf must be the rightmost leaf-page in the segment, or ** else the segment has been completely emptied by an ongoing merge ** operation. */ if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast && (pSeg->pgnoFirst || pSeg->pgnoLast) ){ p->rc = FTS5_CORRUPT; } fts5BtreeIterFree(&iter); } /* |
︙ | ︙ |
Added ext/fts5/test/fts5bigpl.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | # 2015 April 21 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # # This test is focused on really large position lists. Those that require # 4 or 5 byte position-list size varints. Because of the amount of memory # required, these tests only run on 64-bit platforms. # source [file join [file dirname [info script]] fts5_common.tcl] set testprefix fts5bigpl if { $tcl_platform(wordSize)<8 } { finish_test return } do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x) } do_test 1.1 { foreach t {a b c d e f g h i j} { set doc [string repeat "$t " 1200000] execsql { INSERT INTO t1 VALUES($doc) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} do_test 1.2 { execsql { DELETE FROM t1 } foreach t {"a b" "b a" "c d" "d c"} { set doc [string repeat "$t " 600000] execsql { INSERT INTO t1 VALUES($doc) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} # 5-byte varint. This test takes 30 seconds or so on a 2014 workstation. # The generated database is roughly 635MiB. # do_test 2.1...slow { execsql { DELETE FROM t1 } foreach t {a} { set doc [string repeat "$t " 150000000] execsql { INSERT INTO t1 VALUES($doc) } } execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} finish_test |