Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Fix problems with combining content= and languageid= in a single fts4 table. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts4-languageid |
Files: | files | file ages | folders |
SHA1: |
22491e7bc38aee43819b888e04241cb6 |
User & Date: | dan 2012-03-02 19:53:02.350 |
Context
2012-03-03
| ||
18:46 | Add the xLanguageid method to sqlite3_fts3_tokenizer versions 1 and greater. (Closed-Leaf check-in: f8e9c445dd user: dan tags: fts4-languageid) | |
2012-03-02
| ||
19:53 | Fix problems with combining content= and languageid= in a single fts4 table. (check-in: 22491e7bc3 user: dan tags: fts4-languageid) | |
16:18 | Add test for FTS 'rebuild' command. (check-in: 181bc35731 user: dan tags: fts4-languageid) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | ︙ | |||
796 797 798 799 800 801 802 | }else{ zFree = zFunction = fts3QuoteId(zFunc); } fts3Appendf(pRc, &zRet, "docid"); for(i=0; i<p->nColumn; i++){ fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); } | | > > > > > | 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 | }else{ zFree = zFunction = fts3QuoteId(zFunc); } fts3Appendf(pRc, &zRet, "docid"); for(i=0; i<p->nColumn; i++){ fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); } if( p->zLanguageid ){ fts3Appendf(pRc, &zRet, ", x.%Q", "langid"); } sqlite3_free(zFree); }else{ fts3Appendf(pRc, &zRet, "rowid"); for(i=0; i<p->nColumn; i++){ fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); } if( p->zLanguageid ){ fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid); } } fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", p->zDb, (p->zContentTbl ? p->zContentTbl : p->zName), (p->zContentTbl ? "" : "_content") ); return zRet; |
︙ | ︙ | |||
1211 1212 1213 1214 1215 1216 1217 | sqlite3_free(zUncompress); zCompress = 0; zUncompress = 0; if( nCol==0 ){ sqlite3_free((void*)aCol); aCol = 0; rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString); | | > > | > > > > > > > > > > | 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 | sqlite3_free(zUncompress); zCompress = 0; zUncompress = 0; if( nCol==0 ){ sqlite3_free((void*)aCol); aCol = 0; rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString); /* If a languageid= option was specified, remove the language id ** column from the aCol[] array. */ if( rc==SQLITE_OK && zLanguageid ){ int j; for(j=0; j<nCol; j++){ if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){ memmove(&aCol[j], &aCol[j+1], (nCol-j) * sizeof(aCol[0])); nCol--; break; } } } } } if( rc!=SQLITE_OK ) goto fts3_init_out; if( nCol==0 ){ assert( nString==0 ); aCol[0] = "content"; nString = 8; |
︙ | ︙ | |||
3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 | ** alias for "rowid", use the xRowid() method to obtain the value. */ sqlite3_result_int64(pCtx, pCsr->iPrevId); }else if( iCol==p->nColumn ){ /* The extra column whose name is the same as the table. ** Return a blob which is a pointer to the cursor. */ sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); }else{ /* The requested column is either a user column (one that contains ** indexed data), or the language-id column. */ rc = fts3CursorSeek(0, pCsr); if( rc==SQLITE_OK ){ if( iCol==p->nColumn+2 ){ | > > | 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 | ** alias for "rowid", use the xRowid() method to obtain the value. */ sqlite3_result_int64(pCtx, pCsr->iPrevId); }else if( iCol==p->nColumn ){ /* The extra column whose name is the same as the table. ** Return a blob which is a pointer to the cursor. */ sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); }else if( iCol==p->nColumn+2 && pCsr->pExpr ){ sqlite3_result_int64(pCtx, pCsr->iLangid); }else{ /* The requested column is either a user column (one that contains ** indexed data), or the language-id column. */ rc = fts3CursorSeek(0, pCsr); if( rc==SQLITE_OK ){ if( iCol==p->nColumn+2 ){ |
︙ | ︙ |
Changes to test/fts4langid.test.
︙ | ︙ | |||
33 34 35 36 37 38 39 | # 2.1.* - Test that FTS queries only ever return rows associated with # the requested language. # # 2.2.* - Same as 2.1.*, after an 'optimize' command. # # 2.3.* - Same as 2.1.*, after a 'rebuild' command. # | > > > > | | < < < < | 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | # 2.1.* - Test that FTS queries only ever return rows associated with # the requested language. # # 2.2.* - Same as 2.1.*, after an 'optimize' command. # # 2.3.* - Same as 2.1.*, after a 'rebuild' command. # # 3.* - Tests with content= tables. Both where there is a real # underlying content table and where there is not. # # # 4.* - Test that if one is provided, the tokenizer xLanguage method # is called to configure the tokenizer before tokenizing query # or document text. # # 5.* - Test the fts4aux table when the associated FTS4 table contains # multiple languages. # do_execsql_test 1.1 { CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); } do_execsql_test 1.2 { SELECT sql FROM sqlite_master WHERE name = 't1_content'; |
︙ | ︙ | |||
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | lappend y [lindex $ywords [expr ($i / 1000) % 10]] lappend y [lindex $ywords [expr ($i / 100) % 10]] lappend y [lindex $ywords [expr ($i / 10) % 10]] lappend y [lindex $ywords [expr ($i / 1) % 10]] $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } } } proc rowid_list_set_langid {langid} { set ::rowid_list_langid $langid } proc rowid_list {pattern} { set langid $::rowid_list_langid set res [list] | > > > > > | | | 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | lappend y [lindex $ywords [expr ($i / 1000) % 10]] lappend y [lindex $ywords [expr ($i / 100) % 10]] lappend y [lindex $ywords [expr ($i / 10) % 10]] lappend y [lindex $ywords [expr ($i / 1) % 10]] $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } } $db eval { CREATE TABLE data(x, y, l); INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2; } } proc rowid_list_set_langid {langid} { set ::rowid_list_langid $langid } proc rowid_list {pattern} { set langid $::rowid_list_langid set res [list] db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} { if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { lappend res $rowid } } return $res } proc or_merge_list {list1 list2} { set res [list] |
︙ | ︙ | |||
231 232 233 234 235 236 237 | } do_test 2.0 { reset_db build_multilingual_db_1 db } {} | | | | | | | | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 | } do_test 2.0 { reset_db build_multilingual_db_1 db } {} proc do_test_query1 {tn query res_script} { for {set langid 0} {$langid < 10} {incr langid} { rowid_list_set_langid $langid set res [eval $res_script] set actual [ execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} ] do_test $tn.$langid [list set {} $actual] $res } } # Run some queries. do_test_query1 2.1.1 {delta} { rowid_list delta } do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" } do_test_query1 2.1.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } do_test_query1 2.1.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } # Now try the same tests as above, but after running the 'optimize' # command on the FTS table. # do_execsql_test 2.2 { INSERT INTO t2(t2) VALUES('optimize'); SELECT count(*) FROM t2_segdir; } {9} do_test_query1 2.2.1 {delta} { rowid_list delta } do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" } do_test_query1 2.2.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } do_test_query1 2.2.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } # And rebuild. # do_test 2.3 { reset_db build_multilingual_db_1 db execsql { INSERT INTO t2(t2) VALUES('rebuild') } } {} do_test_query1 2.3.1 {delta} { rowid_list delta } do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" } do_test_query1 2.3.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } do_test_query1 2.3.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } #------------------------------------------------------------------------- # Test cases 3.* # do_test 3.0 { reset_db build_multilingual_db_1 db execsql { CREATE TABLE t3_data(l, x, y); INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2; DROP TABLE t2; } } {} do_execsql_test 3.1 { CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l); INSERT INTO t2(t2) VALUES('rebuild'); } do_test_query1 3.1.1 {delta} { rowid_list delta } do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" } do_test_query1 3.1.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } do_test_query1 3.1.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } do_execsql_test 3.2.1 { DROP TABLE t2; CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable); } do_execsql_test 3.2.2 { INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data; } do_execsql_test 3.2.3 { DROP TABLE t3_data; } do_test_query1 3.3.1 {delta} { rowid_list delta } do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" } do_test_query1 3.3.3 {zero one two} { and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] } do_test_query1 3.3.4 {"zero one" OR "one two"} { or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] } finish_test |