Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | fts3-prefix-search |
Files: | files | file ages | folders |
SHA1: |
9d10a6846b12a9cc8fd4fdc3affd931a |
User & Date: | dan 2011-06-07 18:35:45.780 |
Context
2011-06-08
| ||
18:39 | Fix various issues to do with deferred tokens, NEAR expressions and matchinfo(). (check-in: 3972a787df user: dan tags: fts3-prefix-search) | |
2011-06-07
| ||
18:35 | Have NEAR queries use incremental merging. Fix issues surrounding the deferred token optimization. (check-in: 9d10a6846b user: dan tags: fts3-prefix-search) | |
2011-06-06
| ||
18:14 | Merge the latest trunk changes into the fts3-prefix-search branch. (check-in: 567dd84359 user: drh tags: fts3-prefix-search) | |
Changes
Changes to ext/fts3/fts3.c.
︙ | |||
1065 1066 1067 1068 1069 1070 1071 | 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 | - | } memset(p, 0, nByte); p->db = db; p->nColumn = nCol; p->nPendingData = 0; p->azColumn = (char **)&p[1]; p->pTokenizer = pTokenizer; |
︙ | |||
1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 | 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 | + | } /* Figure out the page-size for the database. This is required in order to ** estimate the cost of loading large doclists from the database (see ** function sqlite3Fts3SegReaderCost() for details). */ fts3DatabasePageSize(&rc, p); p->nNodeSize = p->nPgsz-35; /* Declare the table schema to SQLite. */ fts3DeclareVtab(&rc, p); fts3_init_out: sqlite3_free(zPrefix); sqlite3_free(aFree); |
︙ | |||
1858 1859 1860 1861 1862 1863 1864 | 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 | - + + + + + + + + + + + + + - - - - - - - - - - + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - + - | } *p++ = 0x00; *pp = p; return 1; } /* |
︙ | |||
3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 | 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 | + + - - + + | ** and merged incrementally. Otherwise, it has to be merged into an in-memory ** doclist and then traversed. */ static void fts3EvalAllocateReaders( Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pnToken, /* OUT: Total number of tokens in phrase. */ int *pnOr, /* OUT: Total number of OR nodes in expr. */ int *pRc ){ if( pExpr && SQLITE_OK==*pRc ){ if( pExpr->eType==FTSQUERY_PHRASE ){ int i; int nToken = pExpr->pPhrase->nToken; *pnToken += nToken; for(i=0; i<nToken; i++){ Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; int rc = sqlite3Fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); if( rc!=SQLITE_OK ){ *pRc = rc; return; } } }else{ *pnOr += (pExpr->eType==FTSQUERY_OR); |
︙ | |||
3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 | 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 | + | ){ int rc = SQLITE_OK; Fts3Doclist *pDL = &p->doclist; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; if( p->bIncr ){ assert( p->nToken==1 ); assert( pDL->pNextDocid==0 ); rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, &pDL->iDocid, &pDL->pList, &pDL->nList ); if( rc==SQLITE_OK && !pDL->pList ){ *pbEof = 1; } }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ |
︙ | |||
3598 3599 3600 3601 3602 3603 3604 | 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 | - - - | int nToken = pExpr->pPhrase->nToken; for(i=0; i<nToken; i++){ if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; } pExpr->bDeferred = (i==nToken); *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); }else{ |
︙ | |||
3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 | 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 | + + + + - - - + + + + + + + + | } } } typedef struct Fts3TokenAndCost Fts3TokenAndCost; struct Fts3TokenAndCost { Fts3PhraseToken *pToken; Fts3Expr *pRoot; int nOvfl; int iCol; }; static void fts3EvalTokenCosts( Fts3Cursor *pCsr, Fts3Expr *pRoot, Fts3Expr *pExpr, Fts3TokenAndCost **ppTC, Fts3Expr ***ppOr, int *pRc ){ if( *pRc==SQLITE_OK && pExpr ){ if( pExpr->eType==FTSQUERY_PHRASE ){ Fts3Phrase *pPhrase = pExpr->pPhrase; int i; for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){ Fts3TokenAndCost *pTC = (*ppTC)++; pTC->pRoot = pRoot; pTC->pToken = &pPhrase->aToken[i]; pTC->iCol = pPhrase->iColumn; *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); } |
︙ | |||
3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 | 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - + + + - + + + + + + - + - - + + + + + + + + + | rc = sqlite3_reset(pStmt); if( rc!=SQLITE_OK ) return rc; } *pnPage = pCsr->nRowAvg; return SQLITE_OK; } static int fts3EvalSelectDeferred( Fts3Cursor *pCsr, Fts3Expr *pRoot, Fts3TokenAndCost *aTC, int nTC ){ int nDocSize = 0; int nDocEst = 0; int rc = SQLITE_OK; Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int ii; int nOvfl = 0; int nTerm = 0; for(ii=0; ii<nTC; ii++){ if( aTC[ii].pRoot==pRoot ){ nOvfl += aTC[ii].nOvfl; nTerm++; } } if( nOvfl==0 || nTerm<2 ) return SQLITE_OK; rc = fts3EvalAverageDocsize(pCsr, &nDocSize); for(ii=0; ii<nTerm && rc==SQLITE_OK; ii++){ int jj; Fts3TokenAndCost *pTC = 0; for(jj=0; jj<nTC; jj++){ if( aTC[jj].pToken && aTC[jj].pRoot==pRoot && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) ){ pTC = &aTC[jj]; } } assert( pTC ); /* At this point pTC points to the cheapest remaining token. */ if( ii==0 ){ if( pTC->nOvfl ){ nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; }else{ /* TODO: Fix this so that the doclist need not be read twice. */ Fts3PhraseToken *pToken = pTC->pToken; int nList = 0; char *pList = 0; rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList); if( rc==SQLITE_OK ){ nDocEst = fts3DoclistCountDocids(1, pList, nList); } sqlite3_free(pList); if( rc==SQLITE_OK ){ rc = sqlite3Fts3TermSegReaderCursor(pCsr, pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr ); } } }else{ if( pTC->nOvfl>=(nDocEst*nDocSize) ){ Fts3PhraseToken *pToken = pTC->pToken; rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); fts3SegReaderCursorFree(pToken->pSegcsr); pToken->pSegcsr = 0; } nDocEst = 1 + (nDocEst/4); } pTC->pToken = 0; } return rc; } int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; int nToken = 0; int nOr = 0; /* Allocate a MultiSegReader for each token in the expression. */ |
︙ | |||
3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 | 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 | + + - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + | fts3SegReaderCursorFree(pToken->pSegcsr); pToken->pSegcsr = 0; } nDocEst = 1 + (nDocEst/4); } pTC->pToken = 0; } #endif sqlite3_free(aTC); } } fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc); |
︙ | |||
3897 3898 3899 3900 3901 3902 3903 | 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 | - | pExpr->bStart = 1; switch( pExpr->eType ){ case FTSQUERY_NEAR: case FTSQUERY_AND: { Fts3Expr *pLeft = pExpr->pLeft; Fts3Expr *pRight = pExpr->pRight; |
︙ | |||
3920 3921 3922 3923 3924 3925 3926 | 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 | - + | if( iDiff==0 ) break; if( iDiff<0 ){ fts3EvalNext(pCsr, pLeft, pRc); }else{ fts3EvalNext(pCsr, pRight, pRc); } } |
︙ | |||
3983 3984 3985 3986 3987 3988 3989 | 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 | - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - + + - + + + - - - - - + + + + + + + + | } default: { Fts3Phrase *pPhrase = pExpr->pPhrase; fts3EvalFreeDeferredDoclist(pPhrase); *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); pExpr->iDocid = pPhrase->doclist.iDocid; |
︙ | |||
4048 4049 4050 4051 4052 4053 4054 | 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 | - - - + + + + - - + + | ** ** Or, if no error occurs and it seems the current row does match the FTS ** query, return 0. */ static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ int rc = *pRc; int bMiss = 0; |
︙ | |||
4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 | 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 | + + + + + + + | if( pExpr->bStart && !pExpr->bEof ){ pExpr->bStart = 0; while( rc==SQLITE_OK && (pExpr->bStart==0 || pExpr->iDocid!=iDocid) ){ fts3EvalNext(pCsr, pExpr, &rc); assert( !pExpr->bEof ); } } } if( rc==SQLITE_OK && pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){ } *pnList = pPhrase->doclist.nAll; *ppList = pPhrase->doclist.aAll; return rc; } |
︙ |
Changes to test/fts3defer2.test.
︙ | |||
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | + + + + + - + - + | INSERT INTO t1(t1) VALUES('optimize'); } do_execsql_test 1.1.4 { SELECT count(*) FROM t1_segments WHERE length(block)>10000; UPDATE t1_segments SET block = zeroblob(length(block)) WHERE length(block)>10000; } {2} do_execsql_test 1.2.0 { SELECT content FROM t1 WHERE t1 MATCH 'f (e a)'; } {{a b c d e f a x y}} do_execsql_test 1.2.1 { SELECT content FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } {{a b c d e f a x y}} breakpoint do_execsql_test 1.2.2 { SELECT snippet(t1, '[', ']'), offsets(t1), mit(matchinfo(t1, 'pcxnal')) FROM t1 WHERE t1 MATCH 'f (e NEAR/2 a)'; } [list \ {a b c d [e] [f] [a] x y} \ {0 1 8 1 0 0 10 1 0 2 12 1} \ |
︙ | |||
95 96 97 98 99 100 101 102 | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | + + + + + + - + | do_execsql_test 2.2.$tn.1 { SELECT mit(matchinfo(t2, 'pcxnal')) FROM t2 WHERE t2 MATCH 'a b'; } [list \ [list 2 1 1 54 54 1 3 3 54 372 8] \ [list 2 1 1 54 54 1 3 3 54 372 7] \ ] do_execsql_test 2.2.$tn.2 { SELECT mit(matchinfo(t2, 'x')) FROM t2 WHERE t2 MATCH 'g z'; } [list \ [list 1 2 2 1 54 54] \ ] set sqlite_fts3_enable_parentheses 1 |
︙ |
Changes to test/fts3matchinfo.test.
︙ | |||
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | + + + + | CREATE VIRTUAL TABLE t3 USING fts3(mtchinfo=fts3); INSERT INTO t3(mtchinfo) VALUES('Beside the lake, beneath the trees'); SELECT mtchinfo FROM t3; } {{Beside the lake, beneath the trees}} do_execsql_test 3.2 { CREATE VIRTUAL TABLE xx USING FTS4; } do_execsql_test 3.3 { SELECT * FROM xx WHERE xx MATCH 'abc'; } do_execsql_test 3.4 { SELECT * FROM xx WHERE xx MATCH 'a b c'; } #-------------------------------------------------------------------------- # Proc [do_matchinfo_test] is used to test the FTSX matchinfo() function. # |
︙ |