Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Modify incremental merge code to merge nMin segments at a time. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts4-incr-merge |
Files: | files | file ages | folders |
SHA1: |
cd34bc1af4ba608ea3b52bab55bcfe00 |
User & Date: | dan 2012-03-15 17:45:50.857 |
Context
2012-03-16
| ||
14:54 | Add a comment to the FTS getAbsoluteLevel() function. No actual code changes. (check-in: 7e0f861bed user: dan tags: fts4-incr-merge) | |
2012-03-15
| ||
17:45 | Modify incremental merge code to merge nMin segments at a time. (check-in: cd34bc1af4 user: dan tags: fts4-incr-merge) | |
2012-03-14
| ||
20:01 | Add tests for incremental merge code. (check-in: 570473729d user: dan tags: fts4-incr-merge) | |
Changes
Changes to ext/fts3/fts3_write.c.
︙ | ︙ | |||
302 303 304 305 306 307 308 | ** of the oldest level in the db that contains at least ? segments. Or, ** if no level in the FTS index contains more than ? segments, the statement ** returns zero rows. */ /* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>?" " ORDER BY (level %% 1024) DESC LIMIT 1", /* Estimate the upper limit on the number of leaf nodes in a new segment | | | | | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | ** of the oldest level in the db that contains at least ? segments. Or, ** if no level in the FTS index contains more than ? segments, the statement ** returns zero rows. */ /* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>?" " ORDER BY (level %% 1024) DESC LIMIT 1", /* Estimate the upper limit on the number of leaf nodes in a new segment ** created by merging the oldest :2 segments from absolute level :1. See ** function fts3Incrmerge() for details. */ /* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?", /* SQL_DELETE_SEGDIR_ENTRY ** Delete the %_segdir entry on absolute level :1 with index :2. */ /* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", /* SQL_SHIFT_SEGDIR_ENTRIES ** Reduce by one the idx values of all segments on absolute level :1 with |
︙ | ︙ | |||
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 | if( rc==SQLITE_OK ){ /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already ** full, merge all segments in level iLevel into a single iLevel+1 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. */ if( iNext>=FTS3_MERGE_COUNT ){ rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); *piIdx = 0; }else{ *piIdx = iNext; } } | > > > | 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 | if( rc==SQLITE_OK ){ /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already ** full, merge all segments in level iLevel into a single iLevel+1 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. */ if( iNext>=FTS3_MERGE_COUNT ){ sqlite3_log(SQLITE_OK, "16-way merge at level=%d langid=%d index=%d", iLevel, iLangid, iIndex ); rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); *piIdx = 0; }else{ *piIdx = iNext; } } |
︙ | ︙ | |||
3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 | ** This function opens a cursor used to read the input data for an ** incremental merge operation. Specifically, it opens a cursor to scan ** the oldest two segments (idx=0 and idx=1) in absolute level iAbsLevel. */ static int fts3IncrmergeCsr( Fts3Table *p, /* FTS3 table handle */ sqlite3_int64 iAbsLevel, /* Absolute level to open */ Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ int rc; /* Return Code */ | > | > > > > | | | | | 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 | ** This function opens a cursor used to read the input data for an ** incremental merge operation. Specifically, it opens a cursor to scan ** the oldest two segments (idx=0 and idx=1) in absolute level iAbsLevel. */ static int fts3IncrmergeCsr( Fts3Table *p, /* FTS3 table handle */ sqlite3_int64 iAbsLevel, /* Absolute level to open */ int nSeg, /* Number of segments to merge */ Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ int rc; /* Return Code */ sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ int nByte; /* Bytes allocated at pCsr->apSegment[] */ assert( nSeg>=2 ); memset(pCsr, 0, sizeof(*pCsr)); nByte = sizeof(Fts3SegReader *) * nSeg; pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); if( pCsr->apSegment==0 ){ rc = SQLITE_NOMEM; }else{ memset(pCsr->apSegment, 0, nByte); pCsr->nSegment = nSeg; rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); } if( rc==SQLITE_OK ){ int i; int rc2; sqlite3_bind_int64(pStmt, 1, iAbsLevel); for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){ rc = sqlite3Fts3SegReaderNew(i, 0, sqlite3_column_int64(pStmt, 1), /* segdir.start_block */ sqlite3_column_int64(pStmt, 2), /* segdir.leaves_end_block */ sqlite3_column_int64(pStmt, 3), /* segdir.end_block */ sqlite3_column_blob(pStmt, 4), /* segdir.root */ sqlite3_column_bytes(pStmt, 4), /* segdir.root */ &pCsr->apSegment[i] |
︙ | ︙ | |||
3814 3815 3816 3817 3818 3819 3820 | ** ** In the actual code below, the value "10" is replaced with the ** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. */ static int fts3IncrmergeWriter( Fts3Table *p, /* Fts3 table handle */ sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ | | < > > > | 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 | ** ** In the actual code below, the value "10" is replaced with the ** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. */ static int fts3IncrmergeWriter( Fts3Table *p, /* Fts3 table handle */ sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */ IncrmergeWriter *pWriter /* Populate this object */ ){ int rc; /* Return Code */ int i; /* Iterator variable */ int nLeafEst; /* Blocks allocated for leaf nodes */ int iIdx; /* Index of output segment */ sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */ sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */ sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */ const char *zKey = pCsr->zTerm; int nKey = pCsr->nTerm; rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0); if( rc==SQLITE_OK ){ sqlite3_bind_int(pOutputIdx, 1, iAbsLevel+1); sqlite3_step(pOutputIdx); iIdx = sqlite3_column_int(pOutputIdx, 0) - 1; rc = sqlite3_reset(pOutputIdx); } if( rc!=SQLITE_OK ) return rc; assert( zKey ); assert( pWriter->nLeafEst==0 ); rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx, zKey, nKey, pWriter); if( rc!=SQLITE_OK || pWriter->nLeafEst ) return rc; iIdx++; /* Calculate nLeafEst. */ rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0); if( rc==SQLITE_OK ){ sqlite3_bind_int64(pLeafEst, 1, iAbsLevel); sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment); if( SQLITE_ROW==sqlite3_step(pLeafEst) ){ nLeafEst = sqlite3_column_int(pLeafEst, 0); } rc = sqlite3_reset(pLeafEst); } if( rc!=SQLITE_OK ) return rc; |
︙ | ︙ | |||
4116 4117 4118 4119 4120 4121 4122 | Fts3Table *p, sqlite3_int64 iAbsLevel, Fts3MultiSegReader *pCsr ){ int i; int rc = SQLITE_OK; | | | | < < > | > | | > > | 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 | Fts3Table *p, sqlite3_int64 iAbsLevel, Fts3MultiSegReader *pCsr ){ int i; int rc = SQLITE_OK; for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){ Fts3SegReader *pSeg = 0; int j; /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding ** somewhere in the pCsr->apSegment[] array. */ for(j=0; ALWAYS(j<pCsr->nSegment); j++){ pSeg = pCsr->apSegment[j]; if( pSeg->iIdx==i ) break; } assert( j<pCsr->nSegment && pSeg->iIdx==i ); if( pSeg->aNode==0 ){ /* Seg-reader is at EOF. Remove the entire input segment. */ rc = fts3DeleteSegment(p, pSeg); if( rc==SQLITE_OK ){ rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx); } |
︙ | ︙ | |||
4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 | /* Determine which level to merge segments from. Any level, from any ** prefix or language index may be selected. Stack variable iAbsLevel ** is set to the absolute level number of the level to merge from. */ rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); sqlite3_bind_int(pFindLevel, 1, nMin); if( sqlite3_step(pFindLevel)!=SQLITE_ROW ){ return sqlite3_reset(pFindLevel); } iAbsLevel = sqlite3_column_int64(pFindLevel, 0); sqlite3_reset(pFindLevel); /* Allocate space for the cursor, filter and writer objects */ pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); if( !pWriter ) return SQLITE_NOMEM; memset(pWriter, 0, nAlloc); pFilter = (Fts3SegFilter *)&pWriter[1]; pCsr = (Fts3MultiSegReader *)&pFilter[1]; /* Open a cursor to iterate through the contents of indexes 0 and 1 of ** the selected absolute level. */ pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; | > > | | > > | | 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 | /* Determine which level to merge segments from. Any level, from any ** prefix or language index may be selected. Stack variable iAbsLevel ** is set to the absolute level number of the level to merge from. */ rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); sqlite3_bind_int(pFindLevel, 1, nMin); if( sqlite3_step(pFindLevel)!=SQLITE_ROW ){ /* There are no levels with nMin or more segments. Or an error has ** occurred. Either way, exit early. */ return sqlite3_reset(pFindLevel); } iAbsLevel = sqlite3_column_int64(pFindLevel, 0); sqlite3_reset(pFindLevel); /* Allocate space for the cursor, filter and writer objects */ pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); if( !pWriter ) return SQLITE_NOMEM; memset(pWriter, 0, nAlloc); pFilter = (Fts3SegFilter *)&pWriter[1]; pCsr = (Fts3MultiSegReader *)&pFilter[1]; /* Open a cursor to iterate through the contents of indexes 0 and 1 of ** the selected absolute level. */ pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; rc = fts3IncrmergeCsr(p, iAbsLevel, nMin, pCsr); sqlite3_log(SQLITE_OK, "%d-way merge from level=%d to level=%d", nMin, (int)iAbsLevel, (int)iAbsLevel+1 ); if( rc==SQLITE_OK ){ rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter); } if( rc==SQLITE_OK ){ if( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) ){ rc = fts3IncrmergeWriter(p, iAbsLevel, pCsr, pWriter); if( rc==SQLITE_OK ){ do { rc = fts3IncrmergeAppend(p, pWriter, pCsr); if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; }while( rc==SQLITE_ROW ); } |
︙ | ︙ |
Changes to test/fts3_common.tcl.
︙ | ︙ | |||
62 63 64 65 66 67 68 | db eval { CREATE VIRTUAL TABLE t2 USING fts4 } set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z ""] for {set i 0} {$i < $n} {incr i} { set word "" | | | | | | 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | db eval { CREATE VIRTUAL TABLE t2 USING fts4 } set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z ""] for {set i 0} {$i < $n} {incr i} { set word "" set nChar [llength $chars] append word [lindex $chars [expr {($i / 1) % $nChar}]] append word [lindex $chars [expr {($i / $nChar) % $nChar}]] append word [lindex $chars [expr {($i / ($nChar*$nChar)) % $nChar}]] db eval { INSERT INTO t2(docid, content) VALUES($i, $word) } } } #------------------------------------------------------------------------- # USAGE: fts3_integrity_check TBL |
︙ | ︙ |
Changes to test/fts4merge.test.
︙ | ︙ | |||
43 44 45 46 47 48 49 | SELECT docid FROM t1 WHERE t1 MATCH 'zero one two three' } {123 132 213 231 312 321} } do_execsql_test 1.3 { SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level } { | | | | | | | | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | SELECT docid FROM t1 WHERE t1 MATCH 'zero one two three' } {123 132 213 231 312 321} } do_execsql_test 1.3 { SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level } { 0 {0 1 2 3} 1 {0 1 2 3 4 5 6} 2 {0 1 2 3} } for {set i 0} {$i<100} {incr i} { do_execsql_test 1.4.$i { INSERT INTO t1(t1) VALUES('merge=1,4') } do_test 1.4.$i.2 { fts3_integrity_check t1 } ok do_execsql_test 1.4.$i.3 { SELECT docid FROM t1 WHERE t1 MATCH 'zero one two three' } {123 132 213 231 312 321} } do_execsql_test 1.5 { SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level } { 0 {0 1 2 3} 1 {0 1 2} 2 0 3 0 } #------------------------------------------------------------------------- # Test cases 2.* test that errors in the xxx part of the 'merge=xxx' are # handled correctly. # do_execsql_test 2.0 { CREATE VIRTUAL TABLE t2 USING fts4 } |
︙ | ︙ | |||
99 100 101 102 103 104 105 | fts3_build_db_2 30040 } {} do_test 3.1 { fts3_integrity_check t2 } {ok} do_execsql_test 3.2 { SELECT level, group_concat(idx, ' ') FROM t2_segdir GROUP BY level } { | | | | | 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | fts3_build_db_2 30040 } {} do_test 3.1 { fts3_integrity_check t2 } {ok} do_execsql_test 3.2 { SELECT level, group_concat(idx, ' ') FROM t2_segdir GROUP BY level } { 0 {0 1 2 3 4 5 6} 1 {0 1 2 3 4} 2 {0 1 2 3 4} 3 {0 1 2 3 4 5 6} } do_execsql_test 3.3 { INSERT INTO t2(t2) VALUES('merge=1000000,2'); SELECT level, group_concat(idx, ' ') FROM t2_segdir GROUP BY level } { 0 0 1 {0 1} 2 0 3 {0 1} 4 {0 1} 5 0 } finish_test |