/ Check-in [dc734c5b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve performance of editdist3() by keeping the costs in sorted order. Also add a new regression test to editdist3().
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:dc734c5b61464dfd6bfa7963f2ecce32e405a0c2ba1ef6f453ec9389da080256
User & Date: drh 2018-02-15 03:56:33
Context
2018-02-15
21:00
Do not allow parameters or schema references inside of WITH clause of triggers and views. This fixes a bug discovered by OSSFuzz and present since common-table-expressions were first added in 2014-02-03. check-in: b918d4b4 user: drh tags: trunk
03:56
Improve performance of editdist3() by keeping the costs in sorted order. Also add a new regression test to editdist3(). check-in: dc734c5b user: drh tags: trunk
03:05
Reduce the number of calls to strncmp() required to run editDist3Core(). check-in: afd6fbc0 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/misc/spellfix.c.

653
654
655
656
657
658
659









































































660
661
662
663
664
665
666
...
725
726
727
728
729
730
731






732
733
734
735
736
737
738
...
939
940
941
942
943
944
945
946
947

948
949
950
951
952
953
954
  memset(p, 0, sizeof(*p));
}
static void editDist3ConfigDelete(void *pIn){
  EditDist3Config *p = (EditDist3Config*)pIn;
  editDist3ConfigClear(p);
  sqlite3_free(p);
}










































































/*
** Load all edit-distance weights from a table.
*/
static int editDist3ConfigLoad(
  EditDist3Config *p,      /* The edit distance configuration to load */
  sqlite3 *db,            /* Load from this database */
................................................................................
      memcpy(pCost->a + nFrom, zTo, nTo);
      pCost->pNext = pLang->pCost;
      pLang->pCost = pCost; 
    }
  }
  rc2 = sqlite3_finalize(pStmt);
  if( rc==SQLITE_OK ) rc = rc2;






  return rc;
}

/*
** Return the length (in bytes) of a utf-8 character.  Or return a maximum
** of N.
*/
................................................................................
  memset(a2, 0, sizeof(a2[0])*n2);

  /* Fill in the a1[] matrix for all characters of the TO string */
  for(i2=0; i2<n2; i2++){
    a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
    for(p=pLang->pCost; p; p=p->pNext){
      EditDist3Cost **apNew;
      if( p->nFrom>0 ) continue;
      if( i2+p->nTo>n2 ) continue;

      if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
      a2[i2].nIns++;
      apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
      if( apNew==0 ){
        res = -1;  /* Out of memory */
        goto editDist3Abort;
      }







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>
>
>
>







 







|

>







653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
...
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
....
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
  memset(p, 0, sizeof(*p));
}
static void editDist3ConfigDelete(void *pIn){
  EditDist3Config *p = (EditDist3Config*)pIn;
  editDist3ConfigClear(p);
  sqlite3_free(p);
}

/* Compare the FROM values of two EditDist3Cost objects, for sorting.
** Return negative, zero, or positive if the A is less than, equal to,
** or greater than B.
*/
static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB){
  int n = pA->nFrom;
  int rc;
  if( n>pB->nFrom ) n = pB->nFrom;
  rc = strncmp(pA->a, pB->a, n);
  if( rc==0 ) rc = pA->nFrom - pB->nFrom;
  return rc;
}

/*
** Merge together two sorted lists of EditDist3Cost objects, in order
** of increasing FROM.
*/
static EditDist3Cost *editDist3CostMerge(
  EditDist3Cost *pA,
  EditDist3Cost *pB
){
  EditDist3Cost *pHead = 0;
  EditDist3Cost **ppTail = &pHead;
  EditDist3Cost *p;
  while( pA && pB ){
    if( editDist3CostCompare(pA,pB)<=0 ){
      p = pA;
      pA = pA->pNext;
    }else{
      p = pB;
      pB = pB->pNext;
    }
    *ppTail = p;
    ppTail =  &p->pNext;
  }
  if( pA ){
    *ppTail = pA;
  }else{
    *ppTail = pB;
  }
  return pHead;
}

/*
** Sort a list of EditDist3Cost objects into order of increasing FROM
*/
static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList){
  EditDist3Cost *ap[60], *p;
  int i;
  int mx = 0;
  ap[0] = 0;
  ap[1] = 0;
  while( pList ){
    p = pList;
    pList = p->pNext;
    p->pNext = 0;
    for(i=0; ap[i]; i++){
      p = editDist3CostMerge(ap[i],p);
      ap[i] = 0;
    }
    ap[i] = p;
    if( i>mx ){
      mx = i;
      ap[i+1] = 0;
    }
  }
  p = 0;
  for(i=0; i<=mx; i++){
    if( ap[i] ) p = editDist3CostMerge(p,ap[i]);
  }
  return p;
}

/*
** Load all edit-distance weights from a table.
*/
static int editDist3ConfigLoad(
  EditDist3Config *p,      /* The edit distance configuration to load */
  sqlite3 *db,            /* Load from this database */
................................................................................
      memcpy(pCost->a + nFrom, zTo, nTo);
      pCost->pNext = pLang->pCost;
      pLang->pCost = pCost; 
    }
  }
  rc2 = sqlite3_finalize(pStmt);
  if( rc==SQLITE_OK ) rc = rc2;
  if( rc==SQLITE_OK ){
    int iLang;
    for(iLang=0; iLang<p->nLang; iLang++){
      p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost);
    }
  }
  return rc;
}

/*
** Return the length (in bytes) of a utf-8 character.  Or return a maximum
** of N.
*/
................................................................................
  memset(a2, 0, sizeof(a2[0])*n2);

  /* Fill in the a1[] matrix for all characters of the TO string */
  for(i2=0; i2<n2; i2++){
    a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
    for(p=pLang->pCost; p; p=p->pNext){
      EditDist3Cost **apNew;
      if( p->nFrom>0 ) break;
      if( i2+p->nTo>n2 ) continue;
      if( p->a[0]>z2[i2] ) break;
      if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
      a2[i2].nIns++;
      apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
      if( apNew==0 ){
        res = -1;  /* Out of memory */
        goto editDist3Abort;
      }

Changes to test/spellfix4.test.

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
...
336
337
338
339
340
341
342
343






344

345
346
  INSERT INTO cost1 VALUES
    (0, '', '?',  97),
    (0, '?', '',  98),
    (0, '?', '?', 99),
    (0, 'a', 'e', 50),
    (0, 'a', 'i', 70),
    (0, 'a', 'o', 75),
    (0, 'a', 'u', 85),
    (0, 'e', 'a', 50),
    (0, 'e', 'i', 50),
    (0, 'e', 'o', 75),
    (0, 'e', 'u', 85),
    (0, 'i', 'a', 70),
    (0, 'i', 'e', 50),
    (0, 'i', 'o', 75),
    (0, 'i', 'u', 85),
    (0, 'o', 'a', 75),
    (0, 'o', 'e', 75),
    (0, 'o', 'i', 75),
    (0, 'o', 'u', 40),
    (0, 'u', 'a', 85),
    (0, 'u', 'e', 85),
    (0, 'u', 'i', 85),
    (0, 'u', 'o', 40),
    (0, 'm', 'n', 45),
    (0, 'n', 'm', 45)
  ;
  CREATE TABLE words(x TEXT);
  INSERT INTO words VALUES
   ('abraham'),
................................................................................
} {{}}
do_execsql_test 310 {
  SELECT editdist3(a.x,b.x), a.x, b.x
    FROM words a, words b
   WHERE a.x<b.x
   ORDER BY 1, 2
   LIMIT 20
} {139 bucket pocket 149 manual mental 150 meter motor 169 crack trick 173 sinatra sonata 174 edition emotion 174 major motor 174 risk rose 174 state stone 194 deal detail 196 alert talent 196 analog catalog 196 deal legal 196 ford forum 196 risk trick 196 stone strong 197 china tina 197 congo logo 197 diana tina 197 florida gloria}









finish_test







|

|
|
|

|

|

|


|
|
|







 







|
>
>
>
>
>
>
|
>


93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
...
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
  INSERT INTO cost1 VALUES
    (0, '', '?',  97),
    (0, '?', '',  98),
    (0, '?', '?', 99),
    (0, 'a', 'e', 50),
    (0, 'a', 'i', 70),
    (0, 'a', 'o', 75),
    (0, 'a', 'u', 81),
    (0, 'e', 'a', 50),
    (0, 'e', 'i', 52),
    (0, 'e', 'o', 72),
    (0, 'e', 'u', 82),
    (0, 'i', 'a', 70),
    (0, 'i', 'e', 52),
    (0, 'i', 'o', 75),
    (0, 'i', 'u', 83),
    (0, 'o', 'a', 75),
    (0, 'o', 'e', 72),
    (0, 'o', 'i', 75),
    (0, 'o', 'u', 40),
    (0, 'u', 'a', 81),
    (0, 'u', 'e', 82),
    (0, 'u', 'i', 83),
    (0, 'u', 'o', 40),
    (0, 'm', 'n', 45),
    (0, 'n', 'm', 45)
  ;
  CREATE TABLE words(x TEXT);
  INSERT INTO words VALUES
   ('abraham'),
................................................................................
} {{}}
do_execsql_test 310 {
  SELECT editdist3(a.x,b.x), a.x, b.x
    FROM words a, words b
   WHERE a.x<b.x
   ORDER BY 1, 2
   LIMIT 20
} {139 bucket pocket 144 meter motor 149 manual mental 169 crack trick 173 sinatra sonata 174 edition emotion 174 major motor 174 risk rose 174 state stone 194 deal detail 196 alert talent 196 analog catalog 196 deal legal 196 ford forum 196 risk trick 196 stone strong 197 china tina 197 congo logo 197 diana tina 197 florida gloria}
do_execsql_test 320 {
  SELECT md5sum(ed||'/'||sx||'/'||sy||',') FROM (
      SELECT editdist3(a.x,b.x) AS ed, a.x AS sx, b.x AS sy
        FROM words a, words b
       WHERE a.x<b.x
       ORDER BY 1, 2
  )
} {69d0a31872203a775e19325ea98cd053}

finish_test