/ Check-in [13fb4aa6]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Performance enhancement for fts5 column filter queries on detail=full tables.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 13fb4aa6a87c5c6258979953da82eedc1a7bf14f
User & Date: dan 2016-01-30 19:16:11
Context
2016-01-30
21:09
Fix new test cases in stat.test so that they work with -DSQLITE_DEFAULT_AUTOVACUUM=1 builds. check-in: a2810cf6 user: dan tags: trunk
19:16
Performance enhancement for fts5 column filter queries on detail=full tables. check-in: 13fb4aa6 user: dan tags: trunk
16:59
Merge the implementation of OP_IdxRowid and OP_Seek so that OP_Seek no longer requires the rowid register and a separate OP_IdxRowid call. Shorter and faster prepared statements result. check-in: 9bec50a1 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to ext/fts5/fts5_index.c.

  4329   4329     const u8 **pa,                  /* IN/OUT: Pointer to poslist */
  4330   4330     int n,                          /* IN: Size of poslist in bytes */
  4331   4331     int iCol                        /* Column to extract from poslist */
  4332   4332   ){
  4333   4333     int iCurrent = 0;               /* Anything before the first 0x01 is col 0 */
  4334   4334     const u8 *p = *pa;
  4335   4335     const u8 *pEnd = &p[n];         /* One byte past end of position list */
  4336         -  u8 prev = 0;
  4337   4336   
  4338   4337     while( iCol>iCurrent ){
  4339   4338       /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  4340         -    ** not part of a varint */
  4341         -    while( (prev & 0x80) || *p!=0x01 ){
  4342         -      prev = *p++;
  4343         -      if( p==pEnd ) return 0;
         4339  +    ** not part of a varint. Note that it is not possible for a negative
         4340  +    ** or extremely large varint to occur within an uncorrupted position 
         4341  +    ** list. So the last byte of each varint may be assumed to have a clear
         4342  +    ** 0x80 bit.  */
         4343  +    while( *p!=0x01 ){
         4344  +      while( *p++ & 0x80 );
         4345  +      if( p>=pEnd ) return 0;
  4344   4346       }
  4345   4347       *pa = p++;
  4346         -    p += fts5GetVarint32(p, iCurrent);
         4348  +    iCurrent = *p++;
         4349  +    if( iCurrent & 0x80 ){
         4350  +      p--;
         4351  +      p += fts5GetVarint32(p, iCurrent);
         4352  +    }
  4347   4353     }
  4348   4354     if( iCol!=iCurrent ) return 0;
  4349   4355   
  4350   4356     /* Advance pointer p until it points to pEnd or an 0x01 byte that is
  4351   4357     ** not part of a varint */
  4352         -  assert( (prev & 0x80)==0 );
  4353         -  while( p<pEnd && ((prev & 0x80) || *p!=0x01) ){
  4354         -    prev = *p++;
         4358  +  while( p<pEnd && *p!=0x01 ){
         4359  +    while( *p++ & 0x80 );
  4355   4360     }
         4361  +
  4356   4362     return p - (*pa);
  4357   4363   }
  4358   4364   
  4359   4365   static int fts5AppendRowid(
  4360   4366     Fts5Index *p,
  4361   4367     i64 iDelta,
  4362   4368     Fts5Iter *pMulti,

Changes to ext/fts5/tool/fts5speed.tcl.

     7      7     {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'loaned OR mobility OR popcore OR sunk'"}
     8      8     {100 "SELECT count(*) FROM t1 WHERE t1 MATCH 'enron AND myapps'"}
     9      9     {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'en* AND my*'"}
    10     10   
    11     11     {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:t*'"}
    12     12     {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"}
    13     13     {1   "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"}
    14         -
    15     14     {2   "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"}
    16     15   }
    17     16   
    18     17   proc usage {} {
    19     18     global Q
    20     19     puts stderr "Usage: $::argv0 DATABASE QUERY"
    21     20     puts stderr ""

Changes to ext/fts5/tool/fts5txt2db.tcl.

     1         -
     2         -
     3         -#-------------------------------------------------------------------------
     4         -# Command line options processor.
            1  +##########################################################################
            2  +# 2016 Jan 27
            3  +#
            4  +# The author disclaims copyright to this source code.  In place of
            5  +# a legal notice, here is a blessing:
            6  +#
            7  +#    May you do good and not evil.
            8  +#    May you find forgiveness for yourself and forgive others.
            9  +#    May you share freely, never taking more than you give.
     5     10   #
     6         -proc command_line_error {O E {msg ""}} {
     7         -  if {$msg != ""} {
     8         -    puts stderr "Error: $msg"
     9         -    puts stderr ""
    10         -  }
    11         -
    12         -  set L [list]
    13         -  foreach o $O {
    14         -    if {[llength $o]==1} {
    15         -      lappend L [string toupper $o]
    16         -    }
    17         -  }
    18         -
    19         -  puts stderr "Usage: $::argv0 ?SWITCHES? $L"
    20         -  puts stderr ""
    21         -  puts stderr "Switches are:"
    22         -  foreach o $O {
    23         -    if {[llength $o]==3} {
    24         -      foreach {a b c} $o {}
    25         -      puts stderr [format "    -%-15s %s (default \"%s\")" "$a VAL" $c $b]
    26         -    } elseif {[llength $o]==2} {
    27         -      foreach {a b} $o {}
    28         -      puts stderr [format "    -%-15s %s" $a $b]
    29         -    }
    30         -  }
    31         -  puts stderr ""
    32         -  puts stderr $E
    33         -  exit -1
           11  +proc process_cmdline {} { 
           12  +  cmdline::process ::A $::argv {
           13  +    {fts5                 "use fts5 (this is the default)"}
           14  +    {fts4                 "use fts4"}
           15  +    {colsize   "10 10 10" "list of column sizes"}
           16  +    {tblname   "t1"       "table name to create"}
           17  +    {detail    "full"     "Fts5 detail mode to use"}
           18  +    {repeat    1          "Load each file this many times"}
           19  +    {prefix    ""         "Fts prefix= option"}
           20  +    database
           21  +    file...
           22  +  } {
           23  +  This script is designed to create fts4/5 tables with more than one column.
           24  +  The -colsize option should be set to a Tcl list of integer values, one for
           25  +  each column in the table. Each value is the number of tokens that will be
           26  +  inserted into the column value for each row. For example, setting the -colsize
           27  +  option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
           28  +  tokens per row in each, respectively.
           29  +  
           30  +  Each "FILE" argument should be a text file. The contents of these text files
           31  +  is split on whitespace characters to form a list of tokens. The first N1
           32  +  tokens are used for the first column of the first row, where N1 is the first
           33  +  element of the -colsize list. The next N2 are used for the second column of
           34  +  the first row, and so on. Rows are added to the table until the entire list
           35  +  of tokens is exhausted.
           36  +  }
    34     37   }
    35     38   
    36         -proc process_command_line {avar lArgs O E} {
    37         -
    38         -  upvar $avar A
    39         -  set zTrailing ""       ;# True if ... is present in $O
    40         -  set lPosargs [list]
    41         -
    42         -  # Populate A() with default values. Also, for each switch in the command
    43         -  # line spec, set an entry in the idx() array as follows:
    44         -  #
    45         -  #  {tblname t1 "table name to use"}  
    46         -  #      -> [set idx(-tblname) {tblname t1 "table name to use"}  
    47         -  #
    48         -  # For each position parameter, append its name to $lPosargs. If the ...
    49         -  # specifier is present, set $zTrailing to the name of the prefix.
    50         -  #
    51         -  foreach o $O {
    52         -    set nm [lindex $o 0]
    53         -    set nArg [llength $o]
    54         -    switch -- $nArg {
    55         -      1 {
    56         -        if {[string range $nm end-2 end]=="..."} {
    57         -          set zTrailing [string range $nm 0 end-3]
    58         -        } else {
    59         -          lappend lPosargs $nm
           39  +###########################################################################
           40  +###########################################################################
           41  +# Command line options processor. This is generic code that can be copied
           42  +# between scripts.
           43  +#
           44  +namespace eval cmdline {
           45  +  proc cmdline_error {O E {msg ""}} {
           46  +    if {$msg != ""} {
           47  +      puts stderr "Error: $msg"
           48  +      puts stderr ""
           49  +    }
           50  +  
           51  +    set L [list]
           52  +    foreach o $O {
           53  +      if {[llength $o]==1} {
           54  +        lappend L [string toupper $o]
           55  +      }
           56  +    }
           57  +  
           58  +    puts stderr "Usage: $::argv0 ?SWITCHES? $L"
           59  +    puts stderr ""
           60  +    puts stderr "Switches are:"
           61  +    foreach o $O {
           62  +      if {[llength $o]==3} {
           63  +        foreach {a b c} $o {}
           64  +        puts stderr [format "    -%-15s %s (default \"%s\")" "$a VAL" $c $b]
           65  +      } elseif {[llength $o]==2} {
           66  +        foreach {a b} $o {}
           67  +        puts stderr [format "    -%-15s %s" $a $b]
           68  +      }
           69  +    }
           70  +    puts stderr ""
           71  +    puts stderr $E
           72  +    exit -1
           73  +  }
           74  +  
           75  +  proc process {avar lArgs O E} {
           76  +    upvar $avar A
           77  +    set zTrailing ""       ;# True if ... is present in $O
           78  +    set lPosargs [list]
           79  +  
           80  +    # Populate A() with default values. Also, for each switch in the command
           81  +    # line spec, set an entry in the idx() array as follows:
           82  +    #
           83  +    #  {tblname t1 "table name to use"}  
           84  +    #      -> [set idx(-tblname) {tblname t1 "table name to use"}  
           85  +    #
           86  +    # For each position parameter, append its name to $lPosargs. If the ...
           87  +    # specifier is present, set $zTrailing to the name of the prefix.
           88  +    #
           89  +    foreach o $O {
           90  +      set nm [lindex $o 0]
           91  +      set nArg [llength $o]
           92  +      switch -- $nArg {
           93  +        1 {
           94  +          if {[string range $nm end-2 end]=="..."} {
           95  +            set zTrailing [string range $nm 0 end-3]
           96  +          } else {
           97  +            lappend lPosargs $nm
           98  +          }
           99  +        }
          100  +        2 {
          101  +          set A($nm) 0
          102  +          set idx(-$nm) $o
          103  +        }
          104  +        3 {
          105  +          set A($nm) [lindex $o 1]
          106  +          set idx(-$nm) $o
          107  +        }
          108  +        default {
          109  +          error "Error in command line specification"
    60    110           }
    61    111         }
    62         -      2 {
    63         -        set A($nm) 0
    64         -        set idx(-$nm) $o
    65         -      }
    66         -      3 {
    67         -        set A($nm) [lindex $o 1]
    68         -        set idx(-$nm) $o
    69         -      }
    70         -      default {
    71         -        error "Error in command line specification"
          112  +    }
          113  +  
          114  +    # Set explicitly specified option values
          115  +    #
          116  +    set nArg [llength $lArgs]
          117  +    for {set i 0} {$i < $nArg} {incr i} {
          118  +      set opt [lindex $lArgs $i]
          119  +      if {[string range $opt 0 0]!="-" || $opt=="--"} break
          120  +      set c [array names idx "${opt}*"]
          121  +      if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"}
          122  +      if {[llength $c]>1}  { cmdline_error $O $E "Ambiguous option: $opt"}
          123  +  
          124  +      if {[llength $idx($c)]==3} {
          125  +        if {$i==[llength $lArgs]-1} {
          126  +          cmdline_error $O $E "Option requires argument: $c" 
          127  +        }
          128  +        incr i
          129  +        set A([lindex $idx($c) 0]) [lindex $lArgs $i]
          130  +      } else {
          131  +        set A([lindex $idx($c) 0]) 1
    72    132         }
    73    133       }
    74         -  }
    75         -
    76         -  # Set explicitly specified option values
    77         -  #
    78         -  set nArg [llength $lArgs]
    79         -  for {set i 0} {$i < $nArg} {incr i} {
    80         -    set opt [lindex $lArgs $i]
    81         -    if {[string range $opt 0 0]!="-" || $opt=="--"} break
    82         -    set c [array names idx "${opt}*"]
    83         -    if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"}
    84         -    if {[llength $c]>1}  { command_line_error $O $E "Ambiguous option: $opt"}
    85         -
    86         -    if {[llength $idx($c)]==3} {
    87         -      if {$i==[llength $lArgs]-1} {
    88         -        command_line_error $O $E "Option requires argument: $c" 
    89         -      }
    90         -      incr i
    91         -      set A([lindex $idx($c) 0]) [lindex $lArgs $i]
    92         -    } else {
    93         -      set A([lindex $idx($c) 0]) 1
          134  +  
          135  +    # Deal with position arguments.
          136  +    #
          137  +    set nPosarg [llength $lPosargs]
          138  +    set nRem [expr $nArg - $i]
          139  +    if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
          140  +      cmdline_error $O $E
          141  +    }
          142  +    for {set j 0} {$j < $nPosarg} {incr j} {
          143  +      set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
          144  +    }
          145  +    if {$zTrailing!=""} {
          146  +      set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
    94    147       }
    95    148     }
    96         -
    97         -  # Deal with position arguments.
    98         -  #
    99         -  set nPosarg [llength $lPosargs]
   100         -  set nRem [expr $nArg - $i]
   101         -  if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} {
   102         -    command_line_error $O $E
   103         -  }
   104         -  for {set j 0} {$j < $nPosarg} {incr j} {
   105         -    set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]]
   106         -  }
   107         -  if {$zTrailing!=""} {
   108         -    set A($zTrailing) [lrange $lArgs [expr $j+$i] end]
   109         -  }
   110         -}
          149  +} ;# namespace eval cmdline
   111    150   # End of command line options processor.
   112         -#-------------------------------------------------------------------------
          151  +###########################################################################
          152  +###########################################################################
   113    153   
          154  +process_cmdline
   114    155   
   115         -process_command_line A $argv {
   116         -  {fts5                 "use fts5"}
   117         -  {fts4                 "use fts4"}
   118         -  {colsize   "10 10 10" "list of column sizes"}
   119         -  {tblname   "t1"       "table name to create"}
   120         -  {detail    "full"     "Fts5 detail mode to use"}
   121         -  {repeat    1          "Load each file this many times"}
   122         -  database
   123         -  file...
   124         -} {
   125         -This script is designed to create fts4/5 tables with more than one column.
   126         -The -colsize option should be set to a Tcl list of integer values, one for
   127         -each column in the table. Each value is the number of tokens that will be
   128         -inserted into the column value for each row. For example, setting the -colsize
   129         -option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10
   130         -tokens per row in each, respectively.
   131         -
   132         -Each "FILE" argument should be a text file. The contents of these text files is
   133         -split on whitespace characters to form a list of tokens. The first N1 tokens
   134         -are used for the first column of the first row, where N1 is the first element
   135         -of the -colsize list. The next N2 are used for the second column of the first
   136         -row, and so on. Rows are added to the table until the entire list of tokens
   137         -is exhausted.
   138         -}
   139         -
          156  +# If -fts4 was specified, use fts4. Otherwise, fts5.
   140    157   if {$A(fts4)} {
   141    158     set A(fts) fts4
   142    159   } else {
   143    160     set A(fts) fts5
   144    161   }
   145    162   
   146    163   sqlite3 db $A(database)
................................................................................
   152    169     set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z]
   153    170   
   154    171     set nCol [llength $A(colsize)]
   155    172     set cols [lrange $cols 0 [expr $nCol-1]]
   156    173   
   157    174     set sql    "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) ("
   158    175     append sql [join $cols ,]
   159         -  if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" }
          176  +  if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" }
          177  +  append sql ", prefix='$A(prefix)');"
   160    178   
   161    179     db eval $sql
   162    180     return $cols
   163    181   }
   164    182   
   165    183   # Return a list of tokens from the named file.
   166    184   #