/ Check-in [a1e2b6ce]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Faster version of patternCompare() that uses new return values rather than an extra parameter to communicate wildcard information back up to parent searches.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | pattern-compare-optimization
Files: files | file ages | folders
SHA1: a1e2b6ce3af690ae91bda3d056357205c4018da7
User & Date: drh 2016-12-01 18:49:40
Context
2016-12-01
18:57
Performance improvement for GLOB and LIKE matching for patterns with two or more multi-character wildcards ("*" or "%"). check-in: 2df0ebf9 user: drh tags: trunk
18:49
Faster version of patternCompare() that uses new return values rather than an extra parameter to communicate wildcard information back up to parent searches. Closed-Leaf check-in: a1e2b6ce user: drh tags: pattern-compare-optimization
17:34
Modify the patternCompare() function (used for GLOB, LIKE) to better handle patterns containing multiple wildcard characters ("*", "%"). check-in: c5e5614d user: dan tags: pattern-compare-optimization
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/func.c.

   594    594   ** case.  Thus  'a' LIKE 'A' would be true. */
   595    595   static const struct compareInfo likeInfoNorm = { '%', '_',   0, 1 };
   596    596   /* If SQLITE_CASE_SENSITIVE_LIKE is defined, then the LIKE operator
   597    597   ** is case sensitive causing 'a' LIKE 'A' to be false */
   598    598   static const struct compareInfo likeInfoAlt = { '%', '_',   0, 0 };
   599    599   
   600    600   /*
   601         -** Compare two UTF-8 strings for equality where the first string can
   602         -** potentially be a "glob" or "like" expression.  Return true (1) if they
   603         -** are the same and false (0) if they are different.
          601  +** Possible error returns from patternMatch()
          602  +*/
          603  +#define SQLITE_MATCH             0
          604  +#define SQLITE_NOMATCH           1
          605  +#define SQLITE_NOWILDCARDMATCH   2
          606  +
          607  +/*
          608  +** Compare two UTF-8 strings for equality where the first string is
          609  +** a GLOB or LIKE expression.  Return values:
          610  +**
          611  +**    SQLITE_MATCH:            Match
          612  +**    SQLITE_NOMATCH:          No match
          613  +**    SQLITE_NOWILDCARDMATCH:  No match in spite of having * or % wildcards.
   604    614   **
   605    615   ** Globbing rules:
   606    616   **
   607    617   **      '*'       Matches any sequence of zero or more characters.
   608    618   **
   609    619   **      '?'       Matches exactly one character.
   610    620   **
................................................................................
   632    642   **
   633    643   ** This routine is usually quick, but can be N**2 in the worst case.
   634    644   */
   635    645   static int patternCompare(
   636    646     const u8 *zPattern,              /* The glob pattern */
   637    647     const u8 *zString,               /* The string to compare against the glob */
   638    648     const struct compareInfo *pInfo, /* Information about how to do the compare */
   639         -  u32 matchOther,                  /* The escape char (LIKE) or '[' (GLOB) */
   640         -  int *pbSeenMatchAll              /* OUT: True if have seen matchAll */
          649  +  u32 matchOther                   /* The escape char (LIKE) or '[' (GLOB) */
   641    650   ){
   642    651     u32 c, c2;                       /* Next pattern and input string chars */
   643    652     u32 matchOne = pInfo->matchOne;  /* "?" or "_" */
   644    653     u32 matchAll = pInfo->matchAll;  /* "*" or "%" */
   645    654     u8 noCase = pInfo->noCase;       /* True if uppercase==lowercase */
   646    655     const u8 *zEscaped = 0;          /* One past the last escaped input char */
   647    656     
   648    657     while( (c = Utf8Read(zPattern))!=0 ){
   649    658       if( c==matchAll ){  /* Match "*" */
   650    659         /* Skip over multiple "*" characters in the pattern.  If there
   651    660         ** are also "?" characters, skip those as well, but consume a
   652    661         ** single character of the input string for each "?" skipped */
   653         -      *pbSeenMatchAll = 1;
   654    662         while( (c=Utf8Read(zPattern)) == matchAll || c == matchOne ){
   655    663           if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){
   656         -          return 0;
          664  +          return SQLITE_NOWILDCARDMATCH;
   657    665           }
   658    666         }
   659    667         if( c==0 ){
   660         -        return 1;   /* "*" at the end of the pattern matches */
          668  +        return SQLITE_MATCH;   /* "*" at the end of the pattern matches */
   661    669         }else if( c==matchOther ){
   662    670           if( pInfo->matchSet==0 ){
   663    671             c = sqlite3Utf8Read(&zPattern);
   664         -          if( c==0 ) return 0;
          672  +          if( c==0 ) return SQLITE_NOWILDCARDMATCH;
   665    673           }else{
   666         -          int bMA = 0;            /* True if patternCompare sees matchAll */
   667    674             /* "[...]" immediately follows the "*".  We have to do a slow
   668    675             ** recursive search in this case, but it is an unusual case. */
   669    676             assert( matchOther<0x80 );  /* '[' is a single-byte character */
   670         -          while( *zString
   671         -            && patternCompare(&zPattern[-1],zString,pInfo,matchOther,&bMA)==0
   672         -          ){
   673         -            if( bMA ) return 0;
          677  +          while( *zString ){
          678  +            int bMatch = patternCompare(&zPattern[-1],zString,pInfo,matchOther);
          679  +            if( bMatch!=SQLITE_NOMATCH ) return bMatch;
   674    680               SQLITE_SKIP_UTF8(zString);
   675    681             }
   676         -          return *zString!=0;
          682  +          return SQLITE_NOWILDCARDMATCH;
   677    683           }
   678    684         }
   679    685   
   680    686         /* At this point variable c contains the first character of the
   681    687         ** pattern string past the "*".  Search in the input string for the
   682    688         ** first matching character and recursively continue the match from
   683    689         ** that point.
................................................................................
   684    690         **
   685    691         ** For a case-insensitive search, set variable cx to be the same as
   686    692         ** c but in the other case and search the input string for either
   687    693         ** c or cx.
   688    694         */
   689    695         if( c<=0x80 ){
   690    696           u32 cx;
   691         -        int bMatchAll = 0;
          697  +        int bMatch;
   692    698           if( noCase ){
   693    699             cx = sqlite3Toupper(c);
   694    700             c = sqlite3Tolower(c);
   695    701           }else{
   696    702             cx = c;
   697    703           }
   698    704           while( (c2 = *(zString++))!=0 ){
   699    705             if( c2!=c && c2!=cx ) continue;
   700         -          if( patternCompare(zPattern,zString,pInfo,matchOther, &bMatchAll) ){
   701         -            return 1;
   702         -          }
   703         -          if( bMatchAll ) break;
          706  +          bMatch = patternCompare(zPattern,zString,pInfo,matchOther);
          707  +          if( bMatch!=SQLITE_NOMATCH ) return bMatch;
   704    708           }
   705    709         }else{
   706         -        int bMatchAll = 0;
          710  +        int bMatch;
   707    711           while( (c2 = Utf8Read(zString))!=0 ){
   708    712             if( c2!=c ) continue;
   709         -          if( patternCompare(zPattern,zString,pInfo,matchOther, &bMatchAll) ){
   710         -            return 1;
   711         -          }
   712         -          if( bMatchAll ) break;
          713  +          bMatch = patternCompare(zPattern,zString,pInfo,matchOther);
          714  +          if( bMatch!=SQLITE_NOMATCH ) return bMatch;
   713    715           }
   714    716         }
   715         -      return 0;
          717  +      return SQLITE_NOWILDCARDMATCH;
   716    718       }
   717    719       if( c==matchOther ){
   718    720         if( pInfo->matchSet==0 ){
   719    721           c = sqlite3Utf8Read(&zPattern);
   720         -        if( c==0 ) return 0;
          722  +        if( c==0 ) return SQLITE_NOMATCH;
   721    723           zEscaped = zPattern;
   722    724         }else{
   723    725           u32 prior_c = 0;
   724    726           int seen = 0;
   725    727           int invert = 0;
   726    728           c = sqlite3Utf8Read(&zString);
   727         -        if( c==0 ) return 0;
          729  +        if( c==0 ) return SQLITE_NOMATCH;
   728    730           c2 = sqlite3Utf8Read(&zPattern);
   729    731           if( c2=='^' ){
   730    732             invert = 1;
   731    733             c2 = sqlite3Utf8Read(&zPattern);
   732    734           }
   733    735           if( c2==']' ){
   734    736             if( c==']' ) seen = 1;
................................................................................
   744    746                 seen = 1;
   745    747               }
   746    748               prior_c = c2;
   747    749             }
   748    750             c2 = sqlite3Utf8Read(&zPattern);
   749    751           }
   750    752           if( c2==0 || (seen ^ invert)==0 ){
   751         -          return 0;
          753  +          return SQLITE_NOMATCH;
   752    754           }
   753    755           continue;
   754    756         }
   755    757       }
   756    758       c2 = Utf8Read(zString);
   757    759       if( c==c2 ) continue;
   758    760       if( noCase  && sqlite3Tolower(c)==sqlite3Tolower(c2) && c<0x80 && c2<0x80 ){
   759    761         continue;
   760    762       }
   761    763       if( c==matchOne && zPattern!=zEscaped && c2!=0 ) continue;
   762         -    return 0;
          764  +    return SQLITE_NOMATCH;
   763    765     }
   764         -  return *zString==0;
          766  +  return *zString==0 ? SQLITE_MATCH : SQLITE_NOMATCH;
   765    767   }
   766    768   
   767    769   /*
   768         -** The sqlite3_strglob() interface.
          770  +** The sqlite3_strglob() interface.  Return 0 on a match (like strcmp()) and
          771  +** non-zero if there is no match.
   769    772   */
   770    773   int sqlite3_strglob(const char *zGlobPattern, const char *zString){
   771         -  int dummy = 0;
   772         -  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '[', &dummy)==0;
          774  +  return patternCompare((u8*)zGlobPattern, (u8*)zString, &globInfo, '[');
   773    775   }
   774    776   
   775    777   /*
   776         -** The sqlite3_strlike() interface.
          778  +** The sqlite3_strlike() interface.  Return 0 on a match and non-zero for
          779  +** a miss - like strcmp().
   777    780   */
   778    781   int sqlite3_strlike(const char *zPattern, const char *zStr, unsigned int esc){
   779         -  int dummy = 0;
   780         -  return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc, &dummy)==0;
          782  +  return patternCompare((u8*)zPattern, (u8*)zStr, &likeInfoNorm, esc);
   781    783   }
   782    784   
   783    785   /*
   784    786   ** Count the number of times that the LIKE operator (or GLOB which is
   785    787   ** just a variation of LIKE) gets called.  This is used for testing
   786    788   ** only.
   787    789   */
................................................................................
   851    853         return;
   852    854       }
   853    855       escape = sqlite3Utf8Read(&zEsc);
   854    856     }else{
   855    857       escape = pInfo->matchSet;
   856    858     }
   857    859     if( zA && zB ){
   858         -    int dummy = 0;
   859    860   #ifdef SQLITE_TEST
   860    861       sqlite3_like_count++;
   861    862   #endif
   862         -    sqlite3_result_int(context, patternCompare(zB, zA, pInfo, escape, &dummy));
          863  +    sqlite3_result_int(context, patternCompare(zB, zA, pInfo, escape)==SQLITE_MATCH);
   863    864     }
   864    865   }
   865    866   
   866    867   /*
   867    868   ** Implementation of the NULLIF(x,y) function.  The result is the first
   868    869   ** argument if the arguments are different.  The result is NULL if the
   869    870   ** arguments are equal to each other.