SQLite

Changes On Branch spellfix-matchlen
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch spellfix-matchlen Excluding Merge-Ins

This is equivalent to a diff from 4353e40b to f96d4e7b

2012-07-16
23:13
Merge the spellfix1 changes for supporting matchlen into trunk. (check-in: 6f167adf user: drh tags: trunk)
22:16
Fix compiler warnings about unused code in spellfix. Fix the editDist3Core() routine to return the matchlen in characters instead of bytes. (Closed-Leaf check-in: f96d4e7b user: drh tags: spellfix-matchlen)
14:52
Fix a bug in the phonetic-hash routine in spellfix1: Even if the first character of a word is deemed to be "silent", do not apply the special handling intended for the first character of each word to the second. (check-in: 6333b42d user: dan tags: spellfix-matchlen)
10:25
Merge trunk changes. (check-in: 90df64ab user: dan tags: spellfix-matchlen)
10:06
If a specific database is nominated as part of a "PRAGMA integrity_check" or "PRAGMA quick_check" command, search for problems in the nominated database only. i.e. "PRAGMA main.quick_check" now only scans the main database, not all attached databases as before. (check-in: 4353e40b user: dan tags: trunk)
2012-07-13
16:15
Update test_spellfix.c with latest changes. (check-in: cba2a658 user: dan tags: trunk)

Changes to src/test8.c.

1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
*/
static int register_spellfix_module(
  ClientData clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  static sqlite3_module aMod[3];
  int iMod;
  sqlite3 *db;

  if( objc!=2 ){
    Tcl_WrongNumArgs(interp, 1, objv, "DB");
    return TCL_ERROR;
  }
  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;







<
<







1377
1378
1379
1380
1381
1382
1383


1384
1385
1386
1387
1388
1389
1390
*/
static int register_spellfix_module(
  ClientData clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){


  sqlite3 *db;

  if( objc!=2 ){
    Tcl_WrongNumArgs(interp, 1, objv, "DB");
    return TCL_ERROR;
  }
  if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR;

Changes to src/test_spellfix.c.

96
97
98
99
100
101
102





103
104
105
106
107
108
109
**                  For any given query this value is the same on all rows.
**
**    score         The score is a combination of rank and distance.  The
**                  idea is that a lower score is better.  The virtual table
**                  attempts to find words with the lowest score and 
**                  by default (unless overridden by ORDER BY) returns
**                  results in order of increasing score.





**
**    top           (HIDDEN)  For any query, this value is the same on all
**                  rows.  It is an integer which is the maximum number of
**                  rows that will be output.  The actually number of rows
**                  output might be less than this number, but it will never
**                  be greater.  The default value for top is 20, but that
**                  can be changed for each query by including a term of







>
>
>
>
>







96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
**                  For any given query this value is the same on all rows.
**
**    score         The score is a combination of rank and distance.  The
**                  idea is that a lower score is better.  The virtual table
**                  attempts to find words with the lowest score and 
**                  by default (unless overridden by ORDER BY) returns
**                  results in order of increasing score.
**
**    matchlen      For prefix queries, the number of characters in the prefix
**                  of the returned value (word) that matched the query term.
**                  For non-prefix queries, the number of characters in the 
**                  returned value.
**
**    top           (HIDDEN)  For any query, this value is the same on all
**                  rows.  It is an integer which is the maximum number of
**                  rows that will be output.  The actually number of rows
**                  output might be less than this number, but it will never
**                  be greater.  The default value for top is 20, but that
**                  can be changed for each query by including a term of
470
471
472
473
474
475
476

477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
      if( i+2<nIn ){
        if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
      }
    }
    c = aClass[c&0x7f];
    if( c==CCLASS_SPACE ) continue;
    if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;

    if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
       continue; /* No vowels beside L or R */ 
    }
    if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
       nOut--;   /* No vowels beside L or R */
    }
    cPrev = c;
    if( c==CCLASS_SILENT ) continue;
    cPrevX = c;
    if( c==CCLASS_SPACE ) continue;
    aClass = midClass;
    c = className[c];
    if( c!=zOut[nOut-1] ) zOut[nOut++] = c;
  }
  zOut[nOut] = 0;
  return zOut;
}








>









<
<







475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491


492
493
494
495
496
497
498
      if( i+2<nIn ){
        if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
      }
    }
    c = aClass[c&0x7f];
    if( c==CCLASS_SPACE ) continue;
    if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
    aClass = midClass;
    if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
       continue; /* No vowels beside L or R */ 
    }
    if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
       nOut--;   /* No vowels beside L or R */
    }
    cPrev = c;
    if( c==CCLASS_SILENT ) continue;
    cPrevX = c;


    c = className[c];
    if( c!=zOut[nOut-1] ) zOut[nOut++] = c;
  }
  zOut[nOut] = 0;
  return zOut;
}

601
602
603
604
605
606
607






608
609
610
611
612
613
614
615
616
617
618
619
620
621

622
623
624
625
626
627

628
629
630
631
632
633
634
**
** Smaller numbers mean a closer match.
**
** Negative values indicate an error:
**    -1  One of the inputs is NULL
**    -2  Non-ASCII characters on input
**    -3  Unable to allocate memory 






*/
static int editdist1(const char *zA, const char *zB, int iLangId){
  int nA, nB;            /* Number of characters in zA[] and zB[] */
  int xA, xB;            /* Loop counters for zA[] and zB[] */
  char cA, cB;           /* Current character of zA and zB */
  char cAprev, cBprev;   /* Previous character of zA and zB */
  char cAnext, cBnext;   /* Next character in zA and zB */
  int d;                 /* North-west cost value */
  int dc = 0;            /* North-west character value */
  int res;               /* Final result */
  int *m;                /* The cost matrix */
  char *cx;              /* Corresponding character values */
  int *toFree = 0;       /* Malloced space */
  int mStack[60+15];     /* Stack space to use if not too much is needed */


  /* Early out if either input is NULL */
  if( zA==0 || zB==0 ) return -1;

  /* Skip any common prefix */
  while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; }

  if( zA[0]==0 && zB[0]==0 ) return 0;

#if 0
  printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
#endif

  /* Verify input strings and measure their lengths */







>
>
>
>
>
>

|












>





|
>







605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
**
** Smaller numbers mean a closer match.
**
** Negative values indicate an error:
**    -1  One of the inputs is NULL
**    -2  Non-ASCII characters on input
**    -3  Unable to allocate memory 
**
** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
** of zB that matched the pattern in zA. If zA does not end with a '*',
** then this value is always the number of bytes in zB (i.e. strlen(zB)).
** If zA does end in a '*', then it is the number of bytes in the prefix
** of zB that was deemed to match zA.
*/
static int editdist1(const char *zA, const char *zB, int iLangId, int *pnMatch){
  int nA, nB;            /* Number of characters in zA[] and zB[] */
  int xA, xB;            /* Loop counters for zA[] and zB[] */
  char cA, cB;           /* Current character of zA and zB */
  char cAprev, cBprev;   /* Previous character of zA and zB */
  char cAnext, cBnext;   /* Next character in zA and zB */
  int d;                 /* North-west cost value */
  int dc = 0;            /* North-west character value */
  int res;               /* Final result */
  int *m;                /* The cost matrix */
  char *cx;              /* Corresponding character values */
  int *toFree = 0;       /* Malloced space */
  int mStack[60+15];     /* Stack space to use if not too much is needed */
  int nMatch = 0;

  /* Early out if either input is NULL */
  if( zA==0 || zB==0 ) return -1;

  /* Skip any common prefix */
  while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
  if( pnMatch ) *pnMatch = nMatch;
  if( zA[0]==0 && zB[0]==0 ) return 0;

#if 0
  printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
#endif

  /* Verify input strings and measure their lengths */
733
734
735
736
737
738
739
740



741
742
743

744
745
746
747
748
749
750
    cAprev = cA;
  }

  /* Free the wagner matrix and return the result */
  if( cA=='*' ){
    res = m[1];
    for(xB=1; xB<=nB; xB++){
      if( m[xB]<res ) res = m[xB];



    }
  }else{
    res = m[nB];

  }
  sqlite3_free(toFree);
  return res;
}

/*
** Function:    editdist(A,B)







|
>
>
>



>







745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
    cAprev = cA;
  }

  /* Free the wagner matrix and return the result */
  if( cA=='*' ){
    res = m[1];
    for(xB=1; xB<=nB; xB++){
      if( m[xB]<res ){
        res = m[xB];
        if( pnMatch ) *pnMatch = xB+nMatch;
      }
    }
  }else{
    res = m[nB];
    if( pnMatch ) *pnMatch = -1;
  }
  sqlite3_free(toFree);
  return res;
}

/*
** Function:    editdist(A,B)
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
  int argc,
  sqlite3_value **argv
){
  int langid = argc==2 ? 0 : sqlite3_value_int(argv[2]);
  int res = editdist1(
                    (const char*)sqlite3_value_text(argv[0]),
                    (const char*)sqlite3_value_text(argv[1]),
                    langid);
  if( res<0 ){
    if( res==(-3) ){
      sqlite3_result_error_nomem(context);
    }else if( res==(-2) ){
      sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
    }else{
      sqlite3_result_error(context, "NULL input to editdist()", -1);







|







776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
  int argc,
  sqlite3_value **argv
){
  int langid = argc==2 ? 0 : sqlite3_value_int(argv[2]);
  int res = editdist1(
                    (const char*)sqlite3_value_text(argv[0]),
                    (const char*)sqlite3_value_text(argv[1]),
                    langid, 0);
  if( res<0 ){
    if( res==(-3) ){
      sqlite3_result_error_nomem(context);
    }else if( res==(-2) ){
      sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
    }else{
      sqlite3_result_error(context, "NULL input to editdist()", -1);
1093
1094
1095
1096
1097
1098
1099

1100
1101
1102
1103
1104
1105
1106
      pStr = 0;
      break;
    }
  }
  return pStr;
}


/*
** Return the number of bytes in the common prefix of two UTF8 strings.
** Only complete characters are considered.
*/
static int editDist3PrefixLen(const char *z1, const char *z2){
  int n = 0;
  while( z1[n] && z1[n]==z2[n] ){ n++; }







>







1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
      pStr = 0;
      break;
    }
  }
  return pStr;
}

#if 0 /* No longer used */
/*
** Return the number of bytes in the common prefix of two UTF8 strings.
** Only complete characters are considered.
*/
static int editDist3PrefixLen(const char *z1, const char *z2){
  int n = 0;
  while( z1[n] && z1[n]==z2[n] ){ n++; }
1114
1115
1116
1117
1118
1119
1120

1121
1122
1123
1124
1125
1126
1127
*/
static int editDist3SuffixLen(const char *z1, int n1, const char *z2, int n2){
  int origN1 = n1;
  while( n1>0 && n2>0 && z1[n1-1]==z2[n2-1] ){ n1--; n2--; }
  while( n1<origN1 && (z1[n1]&0xc0)==0x80 ){ n1++; n2++; }
  return origN1 - n1;
}


/*
** Update entry m[i] such that it is the minimum of its current value
** and m[j]+iCost.
**
** If the iCost is 1,000,000 or greater, then consider the cost to be
** infinite and skip the update.







>







1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
*/
static int editDist3SuffixLen(const char *z1, int n1, const char *z2, int n2){
  int origN1 = n1;
  while( n1>0 && n2>0 && z1[n1-1]==z2[n2-1] ){ n1--; n2--; }
  while( n1<origN1 && (z1[n1]&0xc0)==0x80 ){ n1++; n2++; }
  return origN1 - n1;
}
#endif /* 0 */

/*
** Update entry m[i] such that it is the minimum of its current value
** and m[j]+iCost.
**
** If the iCost is 1,000,000 or greater, then consider the cost to be
** infinite and skip the update.
1138
1139
1140
1141
1142
1143
1144







1145
1146
1147
1148
1149
1150

1151
1152
1153
1154
1155
1156
1157
    if( b<m[i] ) m[i] = b;
  }
}

/* Compute the edit distance between two strings.
**
** If an error occurs, return a negative number which is the error code.







*/
static int editDist3Core(
  EditDist3FromString *pFrom,  /* The FROM string */
  const char *z2,              /* The TO string */
  int n2,                      /* Length of the TO string */
  const EditDist3Lang *pLang   /* Edit weights for a particular language ID */

){
  int k, n;
  int i1, b1;
  int i2, b2;
  EditDist3FromString f = *pFrom;
  EditDist3To *a2;
  unsigned int *m;







>
>
>
>
>
>
>





|
>







1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
    if( b<m[i] ) m[i] = b;
  }
}

/* Compute the edit distance between two strings.
**
** If an error occurs, return a negative number which is the error code.
**
** If pnMatch is not NULL, then *pnMatch is set to the number of characters
** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
** not contain the pattern for a prefix-search, then this is always the number
** of characters in z2. If pFrom does contain a prefix search pattern, then
** it is the number of characters in the prefix of z2 that was deemed to 
** match pFrom.
*/
static int editDist3Core(
  EditDist3FromString *pFrom,  /* The FROM string */
  const char *z2,              /* The TO string */
  int n2,                      /* Length of the TO string */
  const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
  int *pnMatch                 /* OUT: Characters in matched prefix */
){
  int k, n;
  int i1, b1;
  int i2, b2;
  EditDist3FromString f = *pFrom;
  EditDist3To *a2;
  unsigned int *m;
1277
1278
1279
1280
1281
1282
1283

1284
1285
1286

1287

1288








1289
1290
1291
1292
1293
1294
1295
    }
    printf("\n");
  }
#endif

  /* Free memory allocations and return the result */
  res = (int)m[szRow*(n2+1)-1];

  if( f.isPrefix ){
    for(i2=f.n; i2<n2; i2++){
      int b = m[szRow*i2-1];

      if( b<res ) res = b;

    }








  }

editDist3Abort:
  for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
  sqlite3_free(m);
  return res;
}







>

|

>
|
>
|
>
>
>
>
>
>
>
>







1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
    }
    printf("\n");
  }
#endif

  /* Free memory allocations and return the result */
  res = (int)m[szRow*(n2+1)-1];
  n = n2;
  if( f.isPrefix ){
    for(i2=1; i2<=n2; i2++){
      int b = m[szRow*i2-1];
      if( b<=res ){ 
        res = b;
        n = i2 - 1;
      }
    }
  }
  if( pnMatch ){
    int nExtra = 0;
    for(k=0; k<n; k++){
      if( (z2[k] & 0xc0)==0x80 ) nExtra++;
    }
    *pnMatch = n - nExtra;
  }

editDist3Abort:
  for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
  sqlite3_free(m);
  return res;
}
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
    int dist;

    pFrom = editDist3FromStringNew(pLang, zA, nA);
    if( pFrom==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }
    dist = editDist3Core(pFrom, zB, nB, pLang);
    editDist3FromStringDelete(pFrom);
    sqlite3_result_int(context, dist);
  } 
}

/*
** Register the editDist3 function with SQLite







|







1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
    int dist;

    pFrom = editDist3FromStringNew(pLang, zA, nA);
    if( pFrom==0 ){
      sqlite3_result_error_nomem(context);
      return;
    }
    dist = editDist3Core(pFrom, zB, nB, pLang, 0);
    editDist3FromStringDelete(pFrom);
    sqlite3_result_int(context, dist);
  } 
}

/*
** Register the editDist3 function with SQLite
1413
1414
1415
1416
1417
1418
1419















1420
1421
1422
1423
1424
1425
1426
        c = (c<<6) + (0x3f & z[i++]);
      }
    }
  }
  *pSize = i;
  return c;
}
















/*
** Table of translations from unicode characters into ASCII.
*/
static const struct {
 unsigned short int cFrom;
 unsigned char cTo0, cTo1;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
        c = (c<<6) + (0x3f & z[i++]);
      }
    }
  }
  *pSize = i;
  return c;
}

/*
** Return the number of characters in the utf-8 string in the nIn byte
** buffer pointed to by zIn.
*/
static int utf8Charlen(const char *zIn, int nIn){
  int i;
  int nChar = 0;
  for(i=0; i<nIn; nChar++){
    int sz;
    utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
    i += sz;
  }
  return nChar;
}

/*
** Table of translations from unicode characters into ASCII.
*/
static const struct {
 unsigned short int cFrom;
 unsigned char cTo0, cTo1;
1863
1864
1865
1866
1867
1868
1869







































1870
1871
1872
1873
1874
1875
1876
      }
      if( c ) zOut[nOut++] = '?';
    }
  }
  zOut[nOut] = 0;
  return zOut;
}








































/*
**    spellfix1_translit(X)
**
** Convert a string that contains non-ASCII Roman characters into 
** pure ASCII.
*/







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
      }
      if( c ) zOut[nOut++] = '?';
    }
  }
  zOut[nOut] = 0;
  return zOut;
}

/*
** Return the number of characters in the shortest prefix of the input
** string that transliterates to an ASCII string nTrans bytes or longer.
** Or, if the transliteration of the input string is less than nTrans
** bytes in size, return the number of characters in the input string.
*/
static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
  int i, c, sz, nOut;
  int nChar;

  i = nOut = 0;
  for(nChar=0; i<nIn && nOut<nTrans; nChar++){
    c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
    i += sz;

    nOut++;
    if( c>=128 ){
      int xTop, xBtm, x;
      xTop = sizeof(translit)/sizeof(translit[0]) - 1;
      xBtm = 0;
      while( xTop>=xBtm ){
        x = (xTop + xBtm)/2;
        if( translit[x].cFrom==c ){
          if( translit[x].cTo1 ) nOut++;
          if( c==0x0429 || c== 0x0449 ) nOut += 2;
          break;
        }else if( translit[x].cFrom>c ){
          xTop = x-1;
        }else{
          xBtm = x+1;
        }
      }
    }
  }

  return nChar;
}


/*
**    spellfix1_translit(X)
**
** Convert a string that contains non-ASCII Roman characters into 
** pure ASCII.
*/
2088
2089
2090
2091
2092
2093
2094

2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107

2108
2109
2110
2111
2112
2113
2114
  EditDist3Config *pConfig3; /* Parsed edit distance costs */
};

/* Fuzzy-search cursor object */
struct spellfix1_cursor {
  sqlite3_vtab_cursor base;    /* Base class - must be first */
  spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */

  int nRow;                    /* Number of rows of content */
  int nAlloc;                  /* Number of allocated rows */
  int iRow;                    /* Current row of content */
  int iLang;                   /* Value of the lang= constraint */
  int iTop;                    /* Value of the top= constraint */
  int iScope;                  /* Value of the scope= constraint */
  int nSearch;                 /* Number of vocabulary items checked */
  struct spellfix1_row {       /* For each row of content */
    sqlite3_int64 iRowid;         /* Rowid for this row */
    char *zWord;                  /* Text for this row */
    int iRank;                    /* Rank for this row */
    int iDistance;                /* Distance from pattern for this row */
    int iScore;                   /* Score for sorting */

    char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
  } *a; 
};

/*
** Construct one or more SQL statements from the format string given
** and then evaluate those statements. The success code is written







>













>







2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
  EditDist3Config *pConfig3; /* Parsed edit distance costs */
};

/* Fuzzy-search cursor object */
struct spellfix1_cursor {
  sqlite3_vtab_cursor base;    /* Base class - must be first */
  spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
  char *zPattern;              /* rhs of MATCH clause */
  int nRow;                    /* Number of rows of content */
  int nAlloc;                  /* Number of allocated rows */
  int iRow;                    /* Current row of content */
  int iLang;                   /* Value of the lang= constraint */
  int iTop;                    /* Value of the top= constraint */
  int iScope;                  /* Value of the scope= constraint */
  int nSearch;                 /* Number of vocabulary items checked */
  struct spellfix1_row {       /* For each row of content */
    sqlite3_int64 iRowid;         /* Rowid for this row */
    char *zWord;                  /* Text for this row */
    int iRank;                    /* Rank for this row */
    int iDistance;                /* Distance from pattern for this row */
    int iScore;                   /* Score for sorting */
    int iMatchlen;                /* Value of matchlen column (or -1) */
    char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
  } *a; 
};

/*
** Construct one or more SQL statements from the format string given
** and then evaluate those statements. The success code is written
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210

/*
** xConnect/xCreate method for the spellfix1 module. Arguments are:
**
**   argv[0]   -> module name  ("spellfix1")
**   argv[1]   -> database name
**   argv[2]   -> table name
**   argv[3].. -> optional arguments (currently ignored)
*/
static int spellfix1Init(
  int isCreate,
  sqlite3 *db,
  void *pAux,
  int argc, const char *const*argv,
  sqlite3_vtab **ppVTab,







|







2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303

/*
** xConnect/xCreate method for the spellfix1 module. Arguments are:
**
**   argv[0]   -> module name  ("spellfix1")
**   argv[1]   -> database name
**   argv[2]   -> table name
**   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
*/
static int spellfix1Init(
  int isCreate,
  sqlite3 *db,
  void *pAux,
  int argc, const char *const*argv,
  sqlite3_vtab **ppVTab,
2234
2235
2236
2237
2238
2239
2240
2241

2242
2243
2244
2245
2246
2247
2248
2249

2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
      memcpy(pNew->zDbName, zDbName, nDbName+1);
      pNew->zTableName = sqlite3_mprintf("%s", zTableName);
      pNew->db = db;
      if( pNew->zTableName==0 ){
        rc = SQLITE_NOMEM;
      }else{
        rc = sqlite3_declare_vtab(db, 
             "CREATE TABLE x(word,rank,distance,langid,"

             "score, phonehash,top HIDDEN,scope HIDDEN,srchcnt HIDDEN,"
             "soundslike HIDDEN,command HIDDEN)"
        );
#define SPELLFIX_COL_WORD            0
#define SPELLFIX_COL_RANK            1
#define SPELLFIX_COL_DISTANCE        2
#define SPELLFIX_COL_LANGID          3
#define SPELLFIX_COL_SCORE           4

#define SPELLFIX_COL_PHONEHASH       5
#define SPELLFIX_COL_TOP             6
#define SPELLFIX_COL_SCOPE           7
#define SPELLFIX_COL_SRCHCNT         8
#define SPELLFIX_COL_SOUNDSLIKE      9
#define SPELLFIX_COL_COMMAND        10
      }
      if( rc==SQLITE_OK && isCreate ){
        sqlite3_uint64 r;
        spellfix1DbExec(&rc, db,
           "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
           "  id INTEGER PRIMARY KEY,\n"
           "  rank INT,\n"







|
>
|
|






>
|
|
|
|
|
|







2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
      memcpy(pNew->zDbName, zDbName, nDbName+1);
      pNew->zTableName = sqlite3_mprintf("%s", zTableName);
      pNew->db = db;
      if( pNew->zTableName==0 ){
        rc = SQLITE_NOMEM;
      }else{
        rc = sqlite3_declare_vtab(db, 
             "CREATE TABLE x(word,rank,distance,langid, "
             "score, matchlen, phonehash, "
             "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
             "soundslike HIDDEN, command HIDDEN)"
        );
#define SPELLFIX_COL_WORD            0
#define SPELLFIX_COL_RANK            1
#define SPELLFIX_COL_DISTANCE        2
#define SPELLFIX_COL_LANGID          3
#define SPELLFIX_COL_SCORE           4
#define SPELLFIX_COL_MATCHLEN        5
#define SPELLFIX_COL_PHONEHASH       6
#define SPELLFIX_COL_TOP             7
#define SPELLFIX_COL_SCOPE           8
#define SPELLFIX_COL_SRCHCNT         9
#define SPELLFIX_COL_SOUNDSLIKE     10
#define SPELLFIX_COL_COMMAND        11
      }
      if( rc==SQLITE_OK && isCreate ){
        sqlite3_uint64 r;
        spellfix1DbExec(&rc, db,
           "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
           "  id INTEGER PRIMARY KEY,\n"
           "  rank INT,\n"
2346
2347
2348
2349
2350
2351
2352

2353
2354
2355
2356
2357
2358
2359
/*
** Close a fuzzy-search cursor.
*/
static int spellfix1Close(sqlite3_vtab_cursor *cur){
  spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
  spellfix1ResetCursor(pCur);
  spellfix1ResizeCursor(pCur, 0);

  sqlite3_free(pCur);
  return SQLITE_OK;
}

/*
** Search for terms of these forms:
**







>







2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
/*
** Close a fuzzy-search cursor.
*/
static int spellfix1Close(sqlite3_vtab_cursor *cur){
  spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
  spellfix1ResetCursor(pCur);
  spellfix1ResizeCursor(pCur, 0);
  sqlite3_free(pCur->zPattern);
  sqlite3_free(pCur);
  return SQLITE_OK;
}

/*
** Search for terms of these forms:
**
2579
2580
2581
2582
2583
2584
2585

2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
  for(i=0; i<pCur->nRow; i++){
    if( pCur->a[i].iScore>iWorst ){
      iWorst = pCur->a[i].iScore;
      idxWorst = i;
    }
  }
  while( sqlite3_step(pStmt)==SQLITE_ROW ){

    iRank = sqlite3_column_int(pStmt, 2);
    if( p->pMatchStr3 ){
      int nWord = sqlite3_column_bytes(pStmt, 1);
      zWord = (const char*)sqlite3_column_text(pStmt, 1);
      iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang);
    }else{
      zK1 = (const char*)sqlite3_column_text(pStmt, 3);
      if( zK1==0 ) continue;
      iDist = editdist1(p->zPattern, zK1, pCur->iLang);
    }
    pCur->nSearch++;
    iScore = spellfix1Score(iDist,iRank);
    if( p->iMaxDist>=0 ){
      if( iDist>p->iMaxDist ) continue;
      if( pCur->nRow>=pCur->nAlloc-1 ){
        spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);







>




|



|







2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
  for(i=0; i<pCur->nRow; i++){
    if( pCur->a[i].iScore>iWorst ){
      iWorst = pCur->a[i].iScore;
      idxWorst = i;
    }
  }
  while( sqlite3_step(pStmt)==SQLITE_ROW ){
    int iMatchlen = -1;
    iRank = sqlite3_column_int(pStmt, 2);
    if( p->pMatchStr3 ){
      int nWord = sqlite3_column_bytes(pStmt, 1);
      zWord = (const char*)sqlite3_column_text(pStmt, 1);
      iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
    }else{
      zK1 = (const char*)sqlite3_column_text(pStmt, 3);
      if( zK1==0 ) continue;
      iDist = editdist1(p->zPattern, zK1, pCur->iLang, 0);
    }
    pCur->nSearch++;
    iScore = spellfix1Score(iDist,iRank);
    if( p->iMaxDist>=0 ){
      if( iDist>p->iMaxDist ) continue;
      if( pCur->nRow>=pCur->nAlloc-1 ){
        spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2611
2612
2613
2614
2615
2616
2617

2618
2619
2620
2621
2622
2623
2624
      continue;
    }
    pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
    pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
    pCur->a[idx].iRank = iRank;
    pCur->a[idx].iDistance = iDist;
    pCur->a[idx].iScore = iScore;

    memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
    if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
    if( pCur->nRow==pCur->nAlloc ){
      iWorst = pCur->a[0].iScore;
      idxWorst = 0;
      for(i=1; i<pCur->nRow; i++){
        iScore = pCur->a[i].iScore;







>







2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
      continue;
    }
    pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
    pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
    pCur->a[idx].iRank = iRank;
    pCur->a[idx].iDistance = iDist;
    pCur->a[idx].iScore = iScore;
    pCur->a[idx].iMatchlen = iMatchlen;
    memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
    if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
    if( pCur->nRow==pCur->nAlloc ){
      iWorst = pCur->a[0].iScore;
      idxWorst = 0;
      for(i=1; i<pCur->nRow; i++){
        iScore = pCur->a[i].iScore;
2692
2693
2694
2695
2696
2697
2698


2699
2700
2701
2702
2703
2704
2705
  if( p->pConfig3 ){
    x.pLang = editDist3FindLang(p->pConfig3, iLang);
    pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
  }else{
    x.pLang = 0;
  }
  zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));


  if( zPattern==0 ) return SQLITE_NOMEM;
  nPattern = strlen(zPattern);
  if( zPattern[nPattern-1]=='*' ) nPattern--;
  zSql = sqlite3_mprintf(
     "SELECT id, word, rank, k1"
     "  FROM \"%w\".\"%w_vocab\""
     " WHERE langid=%d AND k2>=?1 AND k2<?2",







>
>







2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
  if( p->pConfig3 ){
    x.pLang = editDist3FindLang(p->pConfig3, iLang);
    pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
  }else{
    x.pLang = 0;
  }
  zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
  sqlite3_free(pCur->zPattern);
  pCur->zPattern = zPattern;
  if( zPattern==0 ) return SQLITE_NOMEM;
  nPattern = strlen(zPattern);
  if( zPattern[nPattern-1]=='*' ) nPattern--;
  zSql = sqlite3_mprintf(
     "SELECT id, word, rank, k1"
     "  FROM \"%w\".\"%w_vocab\""
     " WHERE langid=%d AND k2>=?1 AND k2<?2",
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756

  if( pCur->a ){
    qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
    pCur->iTop = iLimit;
    pCur->iScope = iScope;
  }
  sqlite3_finalize(pStmt);
  sqlite3_free(zPattern);
  editDist3FromStringDelete(pMatchStr3);
  return pCur->a ? x.rc : SQLITE_NOMEM;
}

/*
** This version of xFilter handles a full-table scan case
*/







<







2842
2843
2844
2845
2846
2847
2848

2849
2850
2851
2852
2853
2854
2855

  if( pCur->a ){
    qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
    pCur->iTop = iLimit;
    pCur->iScope = iScope;
  }
  sqlite3_finalize(pStmt);

  editDist3FromStringDelete(pMatchStr3);
  return pCur->a ? x.rc : SQLITE_NOMEM;
}

/*
** This version of xFilter handles a full-table scan case
*/
2825
2826
2827
2828
2829
2830
2831
























2832
2833
2834
2835
2836
2837
2838
    case SPELLFIX_COL_LANGID: {
      sqlite3_result_int(ctx, pCur->iLang);
      break;
    }
    case SPELLFIX_COL_SCORE: {
      sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
      break;
























    }
    case SPELLFIX_COL_PHONEHASH: {
      sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
      break;
    }
    case SPELLFIX_COL_TOP: {
      sqlite3_result_int(ctx, pCur->iTop);







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
    case SPELLFIX_COL_LANGID: {
      sqlite3_result_int(ctx, pCur->iLang);
      break;
    }
    case SPELLFIX_COL_SCORE: {
      sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
      break;
    }
    case SPELLFIX_COL_MATCHLEN: {
      int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
      if( iMatchlen<0 ){
        int nPattern = strlen(pCur->zPattern);
        char *zWord = pCur->a[pCur->iRow].zWord;
        int nWord = strlen(zWord);

        if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
          char *zTranslit;
          int res;
          zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
          if( !zTranslit ) return SQLITE_NOMEM;
          res = editdist1(pCur->zPattern, zTranslit, pCur->iLang, &iMatchlen);
          sqlite3_free(zTranslit);
          if( res<0 ) return SQLITE_NOMEM;
          iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
        }else{
          iMatchlen = utf8Charlen(zWord, nWord);
        }
      }

      sqlite3_result_int(ctx, iMatchlen);
      break;
    }
    case SPELLFIX_COL_PHONEHASH: {
      sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
      break;
    }
    case SPELLFIX_COL_TOP: {
      sqlite3_result_int(ctx, pCur->iTop);

Added test/spellfix.test.







































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# 2012 July 12
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#***********************************************************************
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix spellfix

register_spellfix_module db

set vocab {
rabbi rabbit rabbits rabble rabid rabies raccoon raccoons race raced racer
racers races racetrack racial racially racing rack racked racket racketeer
racketeering racketeers rackets racking racks radar radars radial radially
radian radiance radiant radiantly radiate radiated radiates radiating radiation
radiations radiator radiators radical radically radicals radices radii radio
radioactive radioastronomy radioed radiography radioing radiology radios radish
radishes radium radius radix radon raft rafter rafters rafts rag rage raged
rages ragged raggedly raggedness raging rags ragweed raid raided raider raiders
raiding raids rail railed railer railers railing railroad railroaded railroader
railroaders railroading railroads rails railway railways raiment rain rainbow
raincoat raincoats raindrop raindrops rained rainfall rainier rainiest raining
rains rainstorm rainy raise raised raiser raisers raises raisin raising rake
raked rakes raking rallied rallies rally rallying ram ramble rambler rambles
rambling ramblings ramification ramifications ramp rampage rampant rampart
ramps ramrod rams ran ranch ranched rancher ranchers ranches ranching rancid
random randomization randomize randomized randomizes randomly randomness randy
rang range ranged rangeland ranger rangers ranges ranging rangy rank ranked
ranker rankers rankest ranking rankings rankle rankly rankness ranks ransack
ransacked ransacking ransacks ransom ransomer ransoming ransoms rant ranted
ranter ranters ranting rants rap rapacious rape raped raper rapes rapid
rapidity rapidly rapids rapier raping rapport rapprochement raps rapt raptly
rapture raptures rapturous rare rarely rareness rarer rarest rarity rascal
rascally rascals rash rasher rashly rashness rasp raspberry rasped rasping
rasps raster rat rate rated rater raters rates rather ratification ratified
ratifies ratify ratifying rating ratings ratio ration rational rationale
rationales rationalities rationality rationalization rationalizations
rationalize rationalized rationalizes rationalizing rationally rationals
rationing rations ratios rats rattle rattled rattler rattlers rattles
rattlesnake rattlesnakes rattling raucous ravage ravaged ravager ravagers
ravages ravaging rave raved raven ravening ravenous ravenously ravens raves
ravine ravines raving ravings raw rawer rawest rawly rawness ray rays raze
razor razors re reabbreviate reabbreviated reabbreviates reabbreviating reach
reachability reachable reachably reached reacher reaches reaching reacquired
react reacted reacting reaction reactionaries reactionary reactions reactivate
reactivated reactivates reactivating reactivation reactive reactively
reactivity reactor reactors reacts read readability readable reader readers
readied readier readies readiest readily readiness reading readings readjusted
readout readouts reads ready readying real realest realign realigned realigning
realigns realism realist realistic realistically realists realities reality
}

do_test 1.1 {
  execsql { CREATE VIRTUAL TABLE t1 USING spellfix1 }
  foreach word $vocab {
    execsql { INSERT INTO t1(word) VALUES($word) }
  }
} {}

foreach {tn word res} {
  1   raxpi*     {rasping 5 rasped 5 raspberry 6 rasp 4 rasps 4}
  2   ril*       {rail 4 railway 4 railing 4 rails 4 railways 4}
  3   rilis*     {realist 6 realistic 6 realistically 6 realists 6 realism 6}
  4   reail*     {realities 3 reality 3 real 3 realest 3 realist 3}
  5   ras*       {rasp 3 rash 3 rasped 3 rasping 3 rasps 3}
  6   realistss* {realists 8 realigns 8 realistic 9 realistically 9 realest 7}
  7   realistss  {realists 8 realist 7 realigns 8 realistic 9 realest 7}
  8   rllation*  {realities 9 reality 7 rallied 7 railed 4}
  9   renstom*   {rainstorm 8 ransomer 6 ransom 6 ransoming 6 ransoms 6}
} {
  do_execsql_test 1.2.$tn {
    SELECT word, matchlen FROM t1 WHERE word MATCH $word LIMIT 5
  } $res
}


do_execsql_test 2.1 {
  CREATE VIRTUAL TABLE t2 USING spellfix1;
  INSERT INTO t2 (word, soundslike) VALUES('school', 'skuul');
  INSERT INTO t2 (word, soundslike) VALUES('psalm', 'sarm');
  SELECT word, matchlen FROM t2 WHERE word MATCH 'sar*' LIMIT 5;
} {psalm 4}

do_execsql_test 2.2 {
  SELECT word, matchlen FROM t2 WHERE word MATCH 'skol*' LIMIT 5;
} {school 6}

set vocab {
kangaroo kanji kappa karate keel keeled keeling keels keen keener keenest
keenly keenness keep keeper keepers keeping keeps ken kennel kennels kept
kerchief kerchiefs kern kernel kernels kerosene ketchup kettle
kettles key keyboard keyboards keyed keyhole keying keynote keypad keypads keys
keystroke keystrokes keyword keywords kick kicked kicker kickers kicking
kickoff kicks kid kidded kiddie kidding kidnap kidnapper kidnappers kidnapping
kidnappings kidnaps kidney kidneys kids kill killed killer killers killing
killingly killings killjoy kills kilobit kilobits kiloblock kilobyte kilobytes
kilogram kilograms kilohertz kilohm kilojoule kilometer kilometers kiloton
kilovolt kilowatt kiloword kimono kin kind kinder kindergarten kindest
kindhearted kindle kindled kindles kindling kindly kindness kindred kinds
kinetic king kingdom kingdoms kingly kingpin kings kink kinky kinship kinsman
kiosk kiss kissed kisser kissers kisses kissing kit kitchen kitchenette
kitchens kite kited kites kiting kits kitten kittenish kittens kitty klaxon
kludge kludges klystron knack knapsack knapsacks knave knaves knead kneads knee
kneecap kneed kneeing kneel kneeled kneeling kneels knees knell knells knelt
knew knife knifed knifes knifing knight knighted knighthood knighting knightly
knights knit knits knives knob knobs knock knockdown knocked knocker knockers
knocking knockout knocks knoll knolls knot knots knotted knotting know knowable
knower knowhow knowing knowingly knowledge knowledgeable known knows knuckle
knuckled knuckles koala kosher kudo
}

do_execsql_test 3.1 {
  CREATE TABLE costs(iLang, cFrom, cTo, iCost);
  INSERT INTO costs VALUES(0, 'a', 'e', 1);
  INSERT INTO costs VALUES(0, 'e', 'i', 1);
  INSERT INTO costs VALUES(0, 'i', 'o', 1);
  INSERT INTO costs VALUES(0, 'o', 'u', 1);
  INSERT INTO costs VALUES(0, 'u', 'a', 1);
  CREATE VIRTUAL TABLE t3 USING spellfix1(edit_cost_table=costs);
}

do_test 3.2 {
  foreach w $vocab {
    execsql { INSERT INTO t3(word) VALUES($w) }
  }
} {}

breakpoint
foreach {tn word res} {
  1   kos*     {kosher 3 kiosk 4 kudo 2 kappa 1 keypad 1}
  2   kellj*   {killjoy 5 killed 4 killingly 4 kill 4 killer 4}
  3   kellj    {kill 4 kills 5 killjoy 7 keel 4 killed 6}
} {
  do_execsql_test 1.2.$tn {
    SELECT word, matchlen FROM t3 WHERE word MATCH $word LIMIT 5
  } $res
} 

finish_test