/ Check-in [2df0ebf9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance improvement for GLOB and LIKE matching for patterns with two or more multi-character wildcards ("*" or "%").
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 2df0ebf95f6a25c77777c33685303e81550fd739
User & Date: drh 2016-12-01 18:57:58
Context
2016-12-01
19:58
Avoid clearing the EP_FromJoin flag from terms in ON clauses when flattening sub-selects. Possible fix for [2df0107b]. check-in: a427c405 user: dan tags: trunk
18:57
Performance improvement for GLOB and LIKE matching for patterns with two or more multi-character wildcards ("*" or "%"). check-in: 2df0ebf9 user: drh tags: trunk
18:49
Faster version of patternCompare() that uses new return values rather than an extra parameter to communicate wildcard information back up to parent searches. Closed-Leaf check-in: a1e2b6ce user: drh tags: pattern-compare-optimization
2016-11-30
16:54
Add the remember(V,PTR) extension function which copies an SQL value into an application variable. check-in: d2d30914 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/func.c.

   594    594   ** case.  Thus  'a' LIKE 'A' would be true. */
   595    595   static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
   596    596   /* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
   597    597   ** is case sensitive causing 'a' LIKE 'A' to be false */
   598    598   static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };
   599    599   
   600    600   /*
   601         -** Compare two UTF-8 strings for equality where the first string can
   602         -** potentially be a "glob" or "like" expression.  Return true (1) if they
   603         -** are the same and false (0) if they are different.
          601  +** Possible error returns from patternMatch()
          602  +*/
          603  +#define SQLITE_MATCH             0
          604  +#define SQLITE_NOMATCH           1
          605  +#define SQLITE_NOWILDCARDMATCH   2
          606  +
          607  +/*
          608  +** Compare two UTF-8 strings for equality where the first string is
          609  +** a GLOB or LIKE expression.  Return values:
          610  +**
          611  +**    SQLITE_MATCH:            Match
          612  +**    SQLITE_NOMATCH:          No match
          613  +**    SQLITE_NOWILDCARDMATCH:  No match in spite of having * or % wildcards.
   604    614   **
   605    615   ** Globbing rules:
   606    616   **
   607    617   **      '*'       Matches any sequence of zero or more characters.
   608    618   **
   609    619   **      '?'       Matches exactly one character.
   610    620   **
................................................................................
   647    657     while( (c = Utf8Read(zPattern))!=0 ){
   648    658       if( c==matchAll ){  /* Match "*" */
   649    659         /* Skip over multiple "*" characters in the pattern.  If there
   650    660         ** are also "?" characters, skip those as well, but consume a
   651    661         ** single character of the input string for each "?" skipped */
   652    662         while( (c=Utf8Read(zPattern)) == matchAll || c == matchOne ){
   653    663           if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
   654         -          return 0;
          664  +          return SQLITE_NOWILDCARDMATCH;
   655    665           }
   656    666         }
   657    667         if( c==0 ){
   658         -        return 1;   /* "*" at the end of the pattern matches */
          668  +        return SQLITE_MATCH;   /* "*" at the end of the pattern matches */
   659    669         }else if( c==matchOther ){
   660    670           if( pInfo->matchSet==0 ){
   661    671             c = sqlite3Utf8Read(&zPattern);
   662         -          if( c==0 ) return 0;
          672  +          if( c==0 ) return SQLITE_NOWILDCARDMATCH;
   663    673           }else{
   664    674             /* "[...]" immediately follows the "*".  We have to do a slow
   665    675             ** recursive search in this case, but it is an unusual case. */
   666    676             assert( matchOther<0x80 );  /* '[' is a single-byte character */
   667         -          while( *zString
   668         -                 && patternCompare(&zPattern[-1],zString,pInfo,matchOther)==0 ){
          677  +          while( *zString ){
          678  +            int bMatch = patternCompare(&zPattern[-1],zString,pInfo,matchOther);
          679  +            if( bMatch!=SQLITE_NOMATCH ) return bMatch;
   669    680               SQLITE_SKIP_UTF8(zString);
   670    681             }
   671         -          return *zString!=0;
          682  +          return SQLITE_NOWILDCARDMATCH;
   672    683           }
   673    684         }
   674    685   
   675    686         /* At this point variable c contains the first character of the
   676    687         ** pattern string past the "*".  Search in the input string for the
   677         -      ** first matching character and recursively contine the match from
          688  +      ** first matching character and recursively continue the match from
   678    689         ** that point.
   679    690         **
   680    691         ** For a case-insensitive search, set variable cx to be the same as
   681    692         ** c but in the other case and search the input string for either
   682    693         ** c or cx.
   683    694         */
   684    695         if( c<=0x80 ){
   685    696           u32 cx;
          697  +        int bMatch;
   686    698           if( noCase ){
   687    699             cx = sqlite3Toupper(c);
   688    700             c = sqlite3Tolower(c);
   689    701           }else{
   690    702             cx = c;
   691    703           }
   692    704           while( (c2 = *(zString++))!=0 ){
   693    705             if( c2!=c && c2!=cx ) continue;
   694         -          if( patternCompare(zPattern,zString,pInfo,matchOther) ) return 1;
          706  +          bMatch = patternCompare(zPattern,zString,pInfo,matchOther);
          707  +          if( bMatch!=SQLITE_NOMATCH ) return bMatch;
   695    708           }
   696    709         }else{
          710  +        int bMatch;
   697    711           while( (c2 = Utf8Read(zString))!=0 ){
   698    712             if( c2!=c ) continue;
   699         -          if( patternCompare(zPattern,zString,pInfo,matchOther) ) return 1;
          713  +          bMatch = patternCompare(zPattern,zString,pInfo,matchOther);
          714  +          if( bMatch!=SQLITE_NOMATCH ) return bMatch;
   700    715           }
   701    716         }
   702         -      return 0;
          717  +      return SQLITE_NOWILDCARDMATCH;
   703    718       }
   704    719       if( c==matchOther ){
   705    720         if( pInfo->matchSet==0 ){
   706    721           c = sqlite3Utf8Read(&zPattern);
   707         -        if( c==0 ) return 0;
          722  +        if( c==0 ) return SQLITE_NOMATCH;
   708    723           zEscaped = zPattern;
   709    724         }else{
   710    725           u32 prior_c = 0;
   711    726           int seen = 0;
   712    727           int invert = 0;
   713    728           c = sqlite3Utf8Read(&zString);
   714         -        if( c==0 ) return 0;
          729  +        if( c==0 ) return SQLITE_NOMATCH;
   715    730           c2 = sqlite3Utf8Read(&zPattern);
   716    731           if( c2=='^' ){
   717    732             invert = 1;
   718    733             c2 = sqlite3Utf8Read(&zPattern);
   719    734           }
   720    735           if( c2==']' ){
   721    736             if( c==']' ) seen = 1;
................................................................................
   731    746                 seen = 1;
   732    747               }
   733    748               prior_c = c2;
   734    749             }
   735    750             c2 = sqlite3Utf8Read(&zPattern);
   736    751           }
   737    752           if( c2==0 || (seen ^ invert)==0 ){
   738         -          return 0;
          753  +          return SQLITE_NOMATCH;
   739    754           }
   740    755           continue;
   741    756         }
   742    757       }
   743    758       c2 = Utf8Read(zString);
   744    759       if( c==c2 ) continue;
   745    760       if( noCase  && sqlite3Tolower(c)==sqlite3Tolower(c2) && c<0x80 && c2<0x80 ){
   746    761         continue;
   747    762       }
   748    763       if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue;
   749         -    return 0;
          764  +    return SQLITE_NOMATCH;
   750    765     }
   751         -  return *zString==0;
          766  +  return *zString==0 ? SQLITE_MATCH : SQLITE_NOMATCH;
   752    767   }
   753    768   
   754    769   /*
   755         -** The sqlite3_strglob() interface.
          770  +** The sqlite3_strglob() interface.  Return 0 on a match (like strcmp()) and
          771  +** non-zero if there is no match.
   756    772   */
   757    773   int sqlite3_strglob(const char *zGlobPattern, const char *zString){
   758         -  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '[')==0;
          774  +  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '[');
   759    775   }
   760    776   
   761    777   /*
   762         -** The sqlite3_strlike() interface.
          778  +** The sqlite3_strlike() interface.  Return 0 on a match and non-zero for
          779  +** a miss - like strcmp().
   763    780   */
   764    781   int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc){
   765         -  return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc)==0;
          782  +  return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc);
   766    783   }
   767    784   
   768    785   /*
   769    786   ** Count the number of times that the LIKE operator (or GLOB which is
   770    787   ** just a variation of LIKE) gets called.  This is used for testing
   771    788   ** only.
   772    789   */
................................................................................
   839    856     }else{
   840    857       escape = pInfo->matchSet;
   841    858     }
   842    859     if( zA && zB ){
   843    860   #ifdef SQLITE_TEST
   844    861       sqlite3_like_count++;
   845    862   #endif
   846         -    sqlite3_result_int(context, patternCompare(zB, zA, pInfo, escape));
          863  +    sqlite3_result_int(context, patternCompare(zB, zA, pInfo, escape)==SQLITE_MATCH);
   847    864     }
   848    865   }
   849    866   
   850    867   /*
   851    868   ** Implementation of the NULLIF(x,y) function.  The result is the first
   852    869   ** argument if the arguments are different.  The result is NULL if the
   853    870   ** arguments are equal to each other.