Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add the "matchlen" column to the spellfix virtual table. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | spellfix-matchlen |
Files: | files | file ages | folders |
SHA1: |
4a582c4d30c4d42caf007f9f3ae76041 |
User & Date: | dan 2012-07-12 19:43:54.794 |
Context
2012-07-13
| ||
11:09 | Fix bug in spellfix1 xUpdate() method introduced by the previous commit. (Closed-Leaf check-in: b31aafa5a3 user: dan tags: spellfix-matchlen) | |
2012-07-12
| ||
19:43 | Add the "matchlen" column to the spellfix virtual table. (check-in: 4a582c4d30 user: dan tags: spellfix-matchlen) | |
2012-06-30
| ||
22:22 | Setup the necessary library paths for cross-compilation with MSVC. (check-in: 7fac56ed9f user: mistachkin tags: trunk) | |
Changes
Changes to src/test8.c.
︙ | ︙ | |||
1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 | rc = sqlite3_declare_vtab(db, Tcl_GetString(objv[2])); if( rc!=SQLITE_OK ){ Tcl_SetResult(interp, (char *)sqlite3_errmsg(db), TCL_VOLATILE); return TCL_ERROR; } return TCL_OK; } #endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */ /* ** Register commands with the TCL interpreter. */ int Sqlitetest8_Init(Tcl_Interp *interp){ #ifndef SQLITE_OMIT_VIRTUALTABLE static struct { char *zName; Tcl_ObjCmdProc *xProc; void *clientData; } aObjCmd[] = { { "register_echo_module", register_echo_module, 0 }, { "sqlite3_declare_vtab", declare_vtab, 0 }, }; int i; for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){ Tcl_CreateObjCommand(interp, aObjCmd[i].zName, aObjCmd[i].xProc, aObjCmd[i].clientData, 0); } #endif return TCL_OK; } | > > > > > > > > > > > > > > > > > > > > > > > > > > | 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 | rc = sqlite3_declare_vtab(db, Tcl_GetString(objv[2])); if( rc!=SQLITE_OK ){ Tcl_SetResult(interp, (char *)sqlite3_errmsg(db), TCL_VOLATILE); return TCL_ERROR; } return TCL_OK; } #include "test_spellfix.c" /* ** Register the spellfix virtual table module. */ static int register_spellfix_module( ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[] ){ static sqlite3_module aMod[3]; int iMod; sqlite3 *db; if( objc!=2 ){ Tcl_WrongNumArgs(interp, 1, objv, "DB"); return TCL_ERROR; } if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR; sqlite3Spellfix1Register(db); return TCL_OK; } #endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */ /* ** Register commands with the TCL interpreter. */ int Sqlitetest8_Init(Tcl_Interp *interp){ #ifndef SQLITE_OMIT_VIRTUALTABLE static struct { char *zName; Tcl_ObjCmdProc *xProc; void *clientData; } aObjCmd[] = { { "register_echo_module", register_echo_module, 0 }, { "register_spellfix_module", register_spellfix_module, 0 }, { "sqlite3_declare_vtab", declare_vtab, 0 }, }; int i; for(i=0; i<sizeof(aObjCmd)/sizeof(aObjCmd[0]); i++){ Tcl_CreateObjCommand(interp, aObjCmd[i].zName, aObjCmd[i].xProc, aObjCmd[i].clientData, 0); } #endif return TCL_OK; } |
Changes to src/test_spellfix.c.
︙ | ︙ | |||
496 497 498 499 500 501 502 | ** Smaller numbers mean a closer match. ** ** Negative values indicate an error: ** -1 One of the inputs is NULL ** -2 Non-ASCII characters on input ** -3 Unable to allocate memory */ | | > | > | 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 | ** Smaller numbers mean a closer match. ** ** Negative values indicate an error: ** -1 One of the inputs is NULL ** -2 Non-ASCII characters on input ** -3 Unable to allocate memory */ static int editdist(const char *zA, const char *zB, int *pnMatch){ int nA, nB; /* Number of characters in zA[] and zB[] */ int xA, xB; /* Loop counters for zA[] and zB[] */ char cA, cB; /* Current character of zA and zB */ char cAprev, cBprev; /* Previous character of zA and zB */ int d; /* North-west cost value */ int dc = 0; /* North-west character value */ int res; /* Final result */ int *m; /* The cost matrix */ char *cx; /* Corresponding character values */ int *toFree = 0; /* Malloced space */ int mStack[60+15]; /* Stack space to use if not too much is needed */ int nMatch = 0; /* Early out if either input is NULL */ if( zA==0 || zB==0 ) return -1; /* Skip any common prefix */ while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; } if( pnMatch ) *pnMatch = nMatch; if( zA[0]==0 && zB[0]==0 ) return 0; #if 0 printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' '); #endif /* Verify input strings and measure their lengths */ |
︙ | ︙ | |||
620 621 622 623 624 625 626 | cx[xB] = ncx; cBprev = cB; } cAprev = cA; } /* Free the wagner matrix and return the result */ | | | | | > > > > | | | 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 | cx[xB] = ncx; cBprev = cB; } cAprev = cA; } /* Free the wagner matrix and return the result */ if( cA=='*' && nB>=nA ){ res = m[0]; for(xB=1; xB<=nB; xB++){ if( m[xB]<res ){ res = m[xB]; if( pnMatch ) *pnMatch = nMatch + xB; } } }else{ res = m[nB]; if( pnMatch ) *pnMatch = -1; } sqlite3_free(toFree); return res; } /* ** Function: editdist(A,B) ** ** Return the cost of transforming string A into string B. Both strings ** must be pure ASCII text. If A ends with '*' then it is assumed to be ** a prefix of B and extra characters on the end of B have minimal additional ** cost. */ static void editdistSqlFunc( sqlite3_context *context, int argc, sqlite3_value **argv ){ int res = editdist((const char*)sqlite3_value_text(argv[0]), (const char*)sqlite3_value_text(argv[1]), 0); if( res<0 ){ if( res==(-3) ){ sqlite3_result_error_nomem(context); }else if( res==(-2) ){ sqlite3_result_error(context, "non-ASCII input to editdist()", -1); }else{ sqlite3_result_error(context, "NULL input to editdist()", -1); } }else{ sqlite3_result_int(context, res); } } #if !SQLITE_AMALGAMATION /* ** This lookup table is used to help decode the first byte of ** a multi-byte UTF8 character. */ static const unsigned char sqlite3Utf8Trans1[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
︙ | ︙ | |||
1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 | } if( c ) zOut[nOut++] = '?'; } } zOut[nOut] = 0; return zOut; } /* ** spellfix1_translit(X) ** ** Convert a string that contains non-ASCII Roman characters into ** pure ASCII. */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 | } if( c ) zOut[nOut++] = '?'; } } zOut[nOut] = 0; return zOut; } /* ** Return the number of characters in the shortest prefix of the input ** string that transliterates to an ASCII string nTrans bytes or longer. ** Or, if the transliteration of the input string is less than nTrans ** bytes in size, return the number of characters in the input string. */ static int translen_to_charlen(const char *zIn, int nIn, int nTrans){ int i, c, sz, nOut; int nChar; i = nOut = 0; for(nChar=0; i<nIn && nOut<nTrans; nChar++){ c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz); i += sz; nOut++; if( c>=128 ){ int xTop, xBtm, x; xTop = sizeof(translit)/sizeof(translit[0]) - 1; xBtm = 0; while( xTop>=xBtm ){ x = (xTop + xBtm)/2; if( translit[x].cFrom==c ){ if( translit[x].cTo1 ) nOut++; if( c==0x0429 || c== 0x0449 ) nOut += 2; break; }else if( translit[x].cFrom>c ){ xTop = x-1; }else{ xBtm = x+1; } } } } return nChar; } /* ** Return the number of characters in the utf-8 string in the nIn byte ** buffer pointed to by zIn. */ static int utf8_charlen(const char *zIn, int nIn){ int i; int nChar = 0; for(i=0; i<nIn; nChar++){ int sz; utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz); i += sz; } return nChar; } /* ** spellfix1_translit(X) ** ** Convert a string that contains non-ASCII Roman characters into ** pure ASCII. */ |
︙ | ︙ | |||
1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 | int nSearch; /* Number of vocabulary items checked */ struct spellfix1_row { /* For each row of content */ sqlite3_int64 iRowid; /* Rowid for this row */ char *zWord; /* Text for this row */ int iRank; /* Rank for this row */ int iDistance; /* Distance from pattern for this row */ int iScore; /* Score for sorting */ } *a; }; /* ** Construct one or more SQL statements from the format string given ** and then evaluate those statements. The success code is written ** into *pRc. | > | 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 | int nSearch; /* Number of vocabulary items checked */ struct spellfix1_row { /* For each row of content */ sqlite3_int64 iRowid; /* Rowid for this row */ char *zWord; /* Text for this row */ int iRank; /* Rank for this row */ int iDistance; /* Distance from pattern for this row */ int iScore; /* Score for sorting */ int iMatchlen; /* Length of prefix match */ } *a; }; /* ** Construct one or more SQL statements from the format string given ** and then evaluate those statements. The success code is written ** into *pRc. |
︙ | ︙ | |||
1348 1349 1350 1351 1352 1353 1354 | pNew->zTableName = sqlite3_mprintf("%s", zTableName); pNew->db = db; if( pNew->zTableName==0 ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_declare_vtab(db, "CREATE TABLE x(word,rank,distance,langid," | | | 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 | pNew->zTableName = sqlite3_mprintf("%s", zTableName); pNew->db = db; if( pNew->zTableName==0 ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_declare_vtab(db, "CREATE TABLE x(word,rank,distance,langid," "score,matchlen,top HIDDEN,scope HIDDEN,srchcnt HIDDEN," "soundslike HIDDEN)" ); } if( rc==SQLITE_OK && isCreate ){ sqlite3_uint64 r; spellfix1DbExec(&rc, db, "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n" |
︙ | ︙ | |||
1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 | " FROM \"%w\".\"%w_vocab\"" " WHERE langid=%d AND k2 GLOB '%q*'", p->zDbName, p->zTableName, iLang, zClass ); rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); sqlite3_free(zSql); if( rc==SQLITE_OK ){ const char *zK1; int iDist; int iRank; int iScore; int iWorst = 999999999; int idx; int idxWorst; int i; while( sqlite3_step(pStmt)==SQLITE_ROW ){ zK1 = (const char*)sqlite3_column_text(pStmt, 3); if( zK1==0 ) continue; pCur->nSearch++; iRank = sqlite3_column_int(pStmt, 2); | > > | > > > > > > > | 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 | " FROM \"%w\".\"%w_vocab\"" " WHERE langid=%d AND k2 GLOB '%q*'", p->zDbName, p->zTableName, iLang, zClass ); rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); sqlite3_free(zSql); if( rc==SQLITE_OK ){ int nK1; const char *zK1; int iDist; int iRank; int iScore; int iMatchlen = 0; int iWorst = 999999999; int idx; int idxWorst; int i; while( sqlite3_step(pStmt)==SQLITE_ROW ){ zK1 = (const char*)sqlite3_column_text(pStmt, 3); if( zK1==0 ) continue; pCur->nSearch++; iRank = sqlite3_column_int(pStmt, 2); iDist = editdist(zPattern, zK1, &iMatchlen); iScore = spellfix1Score(iDist,iRank); nK1 = sqlite3_column_bytes(pStmt, 3); if( iMatchlen>0 ){ iMatchlen = translen_to_charlen(zK1, nK1, iMatchlen); }else if( iMatchlen<0 ){ iMatchlen = utf8_charlen(zK1, nK1); } if( pCur->nRow<pCur->nAlloc ){ idx = pCur->nRow; }else if( iScore<iWorst ){ idx = idxWorst; sqlite3_free(pCur->a[idx].zWord); }else{ continue; } pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1)); pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0); pCur->a[idx].iRank = iRank; pCur->a[idx].iDistance = iDist; pCur->a[idx].iScore = iScore; pCur->a[idx].iMatchlen = iMatchlen; if( pCur->nRow<pCur->nAlloc ) pCur->nRow++; if( pCur->nRow==pCur->nAlloc ){ iWorst = pCur->a[0].iScore; idxWorst = 0; for(i=1; i<pCur->nRow; i++){ iScore = pCur->a[i].iScore; if( iWorst<iScore ){ |
︙ | ︙ | |||
1746 1747 1748 1749 1750 1751 1752 | break; } case 4: { sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore); break; } case 5: { | | | > > > > | 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 | break; } case 4: { sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore); break; } case 5: { sqlite3_result_int(ctx, pCur->a[pCur->iRow].iMatchlen); break; } case 6: { sqlite3_result_int(ctx, pCur->iTop); break; } case 7: { sqlite3_result_int(ctx, pCur->iScope); break; } case 8: { sqlite3_result_int(ctx, pCur->nSearch); break; } default: { sqlite3_result_null(ctx); break; } |
︙ | ︙ |
Added test/spellfix.test.
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | # 2012 July 12 # # The author disclaims copyright to this source code. In place of # a legal notice, here is a blessing: # # May you do good and not evil. # May you find forgiveness for yourself and forgive others. # May you share freely, never taking more than you give. # #*********************************************************************** # set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix spellfix register_spellfix_module db set vocab { rabbi rabbit rabbits rabble rabid rabies raccoon raccoons race raced racer racers races racetrack racial racially racing rack racked racket racketeer racketeering racketeers rackets racking racks radar radars radial radially radian radiance radiant radiantly radiate radiated radiates radiating radiation radiations radiator radiators radical radically radicals radices radii radio radioactive radioastronomy radioed radiography radioing radiology radios radish radishes radium radius radix radon raft rafter rafters rafts rag rage raged rages ragged raggedly raggedness raging rags ragweed raid raided raider raiders raiding raids rail railed railer railers railing railroad railroaded railroader railroaders railroading railroads rails railway railways raiment rain rainbow raincoat raincoats raindrop raindrops rained rainfall rainier rainiest raining rains rainstorm rainy raise raised raiser raisers raises raisin raising rake raked rakes raking rallied rallies rally rallying ram ramble rambler rambles rambling ramblings ramification ramifications ramp rampage rampant rampart ramps ramrod rams ran ranch ranched rancher ranchers ranches ranching rancid random randomization randomize randomized randomizes randomly randomness randy rang range ranged rangeland ranger rangers ranges ranging rangy rank ranked ranker rankers rankest ranking rankings rankle rankly rankness ranks ransack ransacked ransacking ransacks ransom ransomer ransoming ransoms rant ranted ranter ranters ranting rants rap rapacious rape raped raper rapes rapid rapidity rapidly rapids rapier raping rapport rapprochement raps rapt raptly rapture raptures rapturous rare rarely rareness rarer rarest rarity rascal rascally rascals rash rasher rashly rashness rasp raspberry rasped rasping rasps raster rat rate rated rater raters rates rather ratification ratified ratifies ratify ratifying rating ratings ratio ration rational rationale rationales rationalities rationality rationalization rationalizations rationalize rationalized rationalizes rationalizing rationally rationals rationing rations ratios rats rattle rattled rattler rattlers rattles rattlesnake rattlesnakes rattling raucous ravage ravaged ravager ravagers ravages ravaging rave raved raven ravening ravenous ravenously ravens raves ravine ravines raving ravings raw rawer rawest rawly rawness ray rays raze razor razors re reabbreviate reabbreviated reabbreviates reabbreviating reach reachability reachable reachably reached reacher reaches reaching reacquired react reacted reacting reaction reactionaries reactionary reactions reactivate reactivated reactivates reactivating reactivation reactive reactively reactivity reactor reactors reacts read readability readable reader readers readied readier readies readiest readily readiness reading readings readjusted readout readouts reads ready readying real realest realign realigned realigning realigns realism realist realistic realistically realists realities reality } do_test 1.1 { execsql { CREATE VIRTUAL TABLE t1 USING spellfix1 } foreach word $vocab { execsql { INSERT INTO t1(word) VALUES($word) } } } {} foreach {tn word res} { 1 laxpi* {rasping 5 rasped 5 raspberry 6 rasp 4 rasps 5} 2 ril* {rally 3 rallies 3 rallied 3 rallying 3 rawly 4} 3 rilis* {realist 6 realistic 6 realistically 6 realists 6 realism 6} 4 reail* {reality 4 real 4 realities 4 realest 4 realist 4} 5 ras* {rashness 3 rascal 3 rasher 3 rash 3 rascally 3} 6 realistss* {realistically 7 realists 8 realigns 8 realistic 9 realest 7} 7 realistss {realists 8 realist 7 realigns 8 realistic 9 realest 7} 8 lllation* { rationale 6 ration 6 rationally 6 rationalizing 6 rationality 6 } 9 renstom* {rainstorm 8 ransoming 6 ransomer 6 ransom 6 ransacks 6} } { do_execsql_test 1.2.$tn { SELECT word, matchlen FROM t1 WHERE word MATCH $word LIMIT 5 } $res } finish_test |