/ Check-in [bd6bc3b8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Changes to support fragmentation analysis in sqlite3_analyzer. (CVS 3634)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: bd6bc3b8f06919000fb082087dff7bbd335d07e9
User & Date: drh 2007-02-10 19:22:36
Context
2007-02-13
01:38
Additional fixes to the new fragmentation feature of sqlite3_analyzer. (CVS 3635) check-in: 82aed271 user: drh tags: trunk
2007-02-10
19:22
Changes to support fragmentation analysis in sqlite3_analyzer. (CVS 3634) check-in: bd6bc3b8 user: drh tags: trunk
2007-02-07
13:09
Explicit collations always override implicit collations. This is backwards compatible since SQLite has not previously supported explicit collations. Need to add tests of this new behavior. (CVS 3633) check-in: 3638823a user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/btree.c.

     5      5   ** a legal notice, here is a blessing:
     6      6   **
     7      7   **    May you do good and not evil.
     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12         -** $Id: btree.c,v 1.334 2007/01/27 02:24:55 drh Exp $
           12  +** $Id: btree.c,v 1.335 2007/02/10 19:22:36 drh Exp $
    13     13   **
    14     14   ** This file implements a external (disk-based) database using BTrees.
    15     15   ** For a detailed discussion of BTrees, refer to
    16     16   **
    17     17   **     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
    18     18   **     "Sorting And Searching", pages 473-480. Addison-Wesley
    19     19   **     Publishing Company, Reading, Massachusetts.
................................................................................
  5886   5886   **   aResult[3] =  Cell size (local payload + header)
  5887   5887   **   aResult[4] =  Number of free bytes on this page
  5888   5888   **   aResult[5] =  Number of free blocks on the page
  5889   5889   **   aResult[6] =  Total payload size (local + overflow)
  5890   5890   **   aResult[7] =  Header size in bytes
  5891   5891   **   aResult[8] =  Local payload size
  5892   5892   **   aResult[9] =  Parent page number
         5893  +**   aResult[10]=  Page number of the first overflow page
  5893   5894   **
  5894   5895   ** This routine is used for testing and debugging only.
  5895   5896   */
  5896   5897   int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult, int upCnt){
  5897   5898     int cnt, idx;
  5898   5899     MemPage *pPage = pCur->pPage;
  5899   5900     BtCursor tmpCur;
................................................................................
  5933   5934       idx = get2byte(&pPage->aData[idx]);
  5934   5935     }
  5935   5936     aResult[5] = cnt;
  5936   5937     if( pPage->pParent==0 || isRootPage(pPage) ){
  5937   5938       aResult[9] = 0;
  5938   5939     }else{
  5939   5940       aResult[9] = pPage->pParent->pgno;
         5941  +  }
         5942  +  if( tmpCur.info.iOverflow ){
         5943  +    aResult[10] = get4byte(&tmpCur.info.pCell[tmpCur.info.iOverflow]);
         5944  +  }else{
         5945  +    aResult[10] = 0;
  5940   5946     }
  5941   5947     releaseTempCursor(&tmpCur);
  5942   5948     return SQLITE_OK;
  5943   5949   }
  5944   5950   #endif
  5945   5951   
  5946   5952   /*

Changes to src/test3.c.

     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12     12   ** Code for testing the btree.c module in SQLite.  This code
    13     13   ** is not included in the SQLite library.  It is used for automated
    14     14   ** testing of the SQLite library.
    15     15   **
    16         -** $Id: test3.c,v 1.69 2007/01/27 02:24:56 drh Exp $
           16  +** $Id: test3.c,v 1.70 2007/02/10 19:22:36 drh Exp $
    17     17   */
    18     18   #include "sqliteInt.h"
    19     19   #include "pager.h"
    20     20   #include "btree.h"
    21     21   #include "tcl.h"
    22     22   #include <stdlib.h>
    23     23   #include <string.h>
................................................................................
   573    573     if( argc<3 ){
   574    574       Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
   575    575          " ID ROOT ...\"", 0);
   576    576       return TCL_ERROR;
   577    577     }
   578    578     pBt = sqlite3TextToPtr(argv[1]);
   579    579     nRoot = argc-2;
   580         -  aRoot = malloc( sizeof(int)*(argc-2) );
          580  +  aRoot = (int*)malloc( sizeof(int)*(argc-2) );
   581    581     for(i=0; i<argc-2; i++){
   582    582       if( Tcl_GetInt(interp, argv[i+2], &aRoot[i]) ) return TCL_ERROR;
   583    583     }
   584    584   #ifndef SQLITE_OMIT_INTEGRITY_CHECK
   585    585     zResult = sqlite3BtreeIntegrityCheck(pBt, aRoot, nRoot, 10000, &nErr);
   586    586   #else
   587    587     zResult = 0;
   588    588   #endif
   589         -  free(aRoot);
          589  +  free((void*)aRoot);
   590    590     if( zResult ){
   591    591       Tcl_AppendResult(interp, zResult, 0);
   592    592       sqliteFree(zResult); 
   593    593     }
   594    594     return TCL_OK;
   595    595   }
   596    596   
................................................................................
  1182   1182   **   aResult[3] =  Cell size (local payload + header)
  1183   1183   **   aResult[4] =  Number of free bytes on this page
  1184   1184   **   aResult[5] =  Number of free blocks on the page
  1185   1185   **   aResult[6] =  Total payload size (local + overflow)
  1186   1186   **   aResult[7] =  Header size in bytes
  1187   1187   **   aResult[8] =  Local payload size
  1188   1188   **   aResult[9] =  Parent page number
         1189  +**   aResult[10]=  Page number of the first overflow page
  1189   1190   */
  1190   1191   static int btree_cursor_info(
  1191   1192     void *NotUsed,
  1192   1193     Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
  1193   1194     int argc,              /* Number of arguments */
  1194   1195     const char **argv      /* Text of each argument */
  1195   1196   ){
  1196   1197     BtCursor *pCur;
  1197   1198     int rc;
  1198   1199     int i, j;
  1199   1200     int up;
  1200         -  int aResult[10];
         1201  +  int aResult[11];
  1201   1202     char zBuf[400];
  1202   1203   
  1203   1204     if( argc!=2 && argc!=3 ){
  1204   1205       Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
  1205   1206          " ID ?UP-CNT?\"", 0);
  1206   1207       return TCL_ERROR;
  1207   1208     }
................................................................................
  1220   1221     for(i=0; i<sizeof(aResult)/sizeof(aResult[0]); i++){
  1221   1222       sqlite3_snprintf(40,&zBuf[j]," %d", aResult[i]);
  1222   1223       j += strlen(&zBuf[j]);
  1223   1224     }
  1224   1225     Tcl_AppendResult(interp, &zBuf[1], 0);
  1225   1226     return SQLITE_OK;
  1226   1227   }
         1228  +
         1229  +/*
         1230  +** Copied from btree.c:
         1231  +*/
         1232  +static u32 get4byte(unsigned char *p){
         1233  +  return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
         1234  +}
         1235  +
         1236  +/*
         1237  +**   btree_ovfl_info  BTREE  CURSOR
         1238  +**
         1239  +** Given a cursor, return the sequence of pages number that form the
         1240  +** overflow pages for the data of the entry that the cursor is point
         1241  +** to.
         1242  +*/ 
         1243  +static int btree_ovfl_info(
         1244  +  void *NotUsed,
         1245  +  Tcl_Interp *interp,    /* The TCL interpreter that invoked this command */
         1246  +  int argc,              /* Number of arguments */
         1247  +  const char **argv      /* Text of each argument */
         1248  +){
         1249  +  Btree *pBt;
         1250  +  BtCursor *pCur;
         1251  +  Pager *pPager;
         1252  +  int rc;
         1253  +  int n;
         1254  +  int dataSize;
         1255  +  u32 pgno;
         1256  +  void *pPage;
         1257  +  int aResult[11];
         1258  +  char zElem[100];
         1259  +  Tcl_DString str;
         1260  +
         1261  +  if( argc!=3 ){
         1262  +    Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0], 
         1263  +                    " BTREE CURSOR", 0);
         1264  +    return TCL_ERROR;
         1265  +  }
         1266  +  pBt = sqlite3TextToPtr(argv[1]);
         1267  +  pCur = sqlite3TextToPtr(argv[2]);
         1268  +  if( (*(void**)pCur) != (void*)pBt ){
         1269  +    Tcl_AppendResult(interp, "Cursor ", argv[2], " does not belong to btree ",
         1270  +       argv[1], 0);
         1271  +    return TCL_ERROR;
         1272  +  }
         1273  +  pPager = sqlite3BtreePager(pBt);
         1274  +  rc = sqlite3BtreeCursorInfo(pCur, aResult, 0);
         1275  +  if( rc ){
         1276  +    Tcl_AppendResult(interp, errorName(rc), 0);
         1277  +    return TCL_ERROR;
         1278  +  }
         1279  +  dataSize = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt);
         1280  +  Tcl_DStringInit(&str);
         1281  +  n = aResult[6] - aResult[8];
         1282  +  n = (n + dataSize - 1)/dataSize;
         1283  +  pgno = (u32)aResult[10];
         1284  +  while( pgno && n-- ){
         1285  +    sprintf(zElem, "%d", pgno);
         1286  +    Tcl_DStringAppendElement(&str, zElem);
         1287  +    if( sqlite3pager_get(pPager, pgno, &pPage)!=SQLITE_OK ){
         1288  +      Tcl_DStringFree(&str);
         1289  +      Tcl_AppendResult(interp, "unable to get page ", zElem, 0);
         1290  +      return TCL_ERROR;
         1291  +    }
         1292  +    pgno = get4byte((unsigned char*)pPage);
         1293  +    sqlite3pager_unref(pPage);
         1294  +  }
         1295  +  Tcl_DStringResult(interp, &str);
         1296  +  return SQLITE_OK;
         1297  +}
  1227   1298   
  1228   1299   /*
  1229   1300   ** The command is provided for the purpose of setting breakpoints.
  1230   1301   ** in regression test scripts.
  1231   1302   **
  1232   1303   ** By setting a GDB breakpoint on this procedure and executing the
  1233   1304   ** btree_breakpoint command in a test script, we can stop GDB at
................................................................................
  1436   1507        { "btree_varint_test",        (Tcl_CmdProc*)btree_varint_test        },
  1437   1508        { "btree_begin_statement",    (Tcl_CmdProc*)btree_begin_statement    },
  1438   1509        { "btree_commit_statement",   (Tcl_CmdProc*)btree_commit_statement   },
  1439   1510        { "btree_rollback_statement", (Tcl_CmdProc*)btree_rollback_statement },
  1440   1511        { "btree_from_db",            (Tcl_CmdProc*)btree_from_db            },
  1441   1512        { "btree_set_cache_size",     (Tcl_CmdProc*)btree_set_cache_size     },
  1442   1513        { "btree_cursor_info",        (Tcl_CmdProc*)btree_cursor_info        },
         1514  +     { "btree_ovfl_info",          (Tcl_CmdProc*)btree_ovfl_info          },
  1443   1515        { "btree_cursor_list",        (Tcl_CmdProc*)btree_cursor_list        },
  1444   1516     };
  1445   1517     int i;
  1446   1518   
  1447   1519     for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
  1448   1520       Tcl_CreateCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0);
  1449   1521     }

Added tool/fragck.tcl.

            1  +# Run this TCL script using "testfixture" to get a report that shows
            2  +# the sequence of database pages used by a particular table or index.
            3  +# This information is used for fragmentation analysis.
            4  +#
            5  +
            6  +# Get the name of the database to analyze
            7  +#
            8  +
            9  +if {[llength $argv]!=2} {
           10  +  puts stderr "Usage: $argv0 database-name table-or-index-name"
           11  +  exit 1
           12  +}
           13  +set file_to_analyze [lindex $argv 0]
           14  +if {![file exists $file_to_analyze]} {
           15  +  puts stderr "No such file: $file_to_analyze"
           16  +  exit 1
           17  +}
           18  +if {![file readable $file_to_analyze]} {
           19  +  puts stderr "File is not readable: $file_to_analyze"
           20  +  exit 1
           21  +}
           22  +if {[file size $file_to_analyze]<512} {
           23  +  puts stderr "Empty or malformed database: $file_to_analyze"
           24  +  exit 1
           25  +}
           26  +set objname [lindex $argv 1]
           27  +
           28  +# Open the database
           29  +#
           30  +sqlite3 db [lindex $argv 0]
           31  +set DB [btree_open [lindex $argv 0] 1000 0]
           32  +
           33  +# This proc is a wrapper around the btree_cursor_info command. The
           34  +# second argument is an open btree cursor returned by [btree_cursor].
           35  +# The first argument is the name of an array variable that exists in
           36  +# the scope of the caller. If the third argument is non-zero, then
           37  +# info is returned for the page that lies $up entries upwards in the
           38  +# tree-structure. (i.e. $up==1 returns the parent page, $up==2 the 
           39  +# grandparent etc.)
           40  +#
           41  +# The following entries in that array are filled in with information retrieved
           42  +# using [btree_cursor_info]:
           43  +#
           44  +#   $arrayvar(page_no)             =  The page number
           45  +#   $arrayvar(entry_no)            =  The entry number
           46  +#   $arrayvar(page_entries)        =  Total number of entries on this page
           47  +#   $arrayvar(cell_size)           =  Cell size (local payload + header)
           48  +#   $arrayvar(page_freebytes)      =  Number of free bytes on this page
           49  +#   $arrayvar(page_freeblocks)     =  Number of free blocks on the page
           50  +#   $arrayvar(payload_bytes)       =  Total payload size (local + overflow)
           51  +#   $arrayvar(header_bytes)        =  Header size in bytes
           52  +#   $arrayvar(local_payload_bytes) =  Local payload size
           53  +#   $arrayvar(parent)              =  Parent page number
           54  +# 
           55  +proc cursor_info {arrayvar csr {up 0}} {
           56  +  upvar $arrayvar a
           57  +  foreach [list a(page_no) \
           58  +                a(entry_no) \
           59  +                a(page_entries) \
           60  +                a(cell_size) \
           61  +                a(page_freebytes) \
           62  +                a(page_freeblocks) \
           63  +                a(payload_bytes) \
           64  +                a(header_bytes) \
           65  +                a(local_payload_bytes) \
           66  +                a(parent) \
           67  +                a(first_ovfl) ] [btree_cursor_info $csr $up] break
           68  +}
           69  +
           70  +# Determine the page-size of the database. This global variable is used
           71  +# throughout the script.
           72  +#
           73  +set pageSize [db eval {PRAGMA page_size}]
           74  +
           75  +# Find the root page of table or index to be analyzed.  Also find out
           76  +# if the object is a table or an index.
           77  +#
           78  +if {$objname=="sqlite_master"} {
           79  +  set rootpage 1
           80  +  set type table
           81  +} else {
           82  +  db eval {
           83  +    SELECT rootpage, type FROM sqlite_master
           84  +     WHERE name=$objname
           85  +  } break
           86  +  if {![info exists rootpage]} {
           87  +    puts stderr "no such table or index: $objname"
           88  +    exit 1
           89  +  }
           90  +  if {$type!="table" && $type!="index"} {
           91  +    puts stderr "$objname is something other than a table or index"
           92  +    exit 1
           93  +  }
           94  +  if {![string is integer -strict $rootpage]} {
           95  +    puts stderr "invalid root page for $objname: $rootpage"
           96  +    exit 1
           97  +  } 
           98  +}
           99  +
          100  +# The cursor $csr is pointing to an entry.  Print out information
          101  +# about the page that $up levels above that page that contains
          102  +# the entry.  If $up==0 use the page that contains the entry.
          103  +# 
          104  +# If information about the page has been printed already, then
          105  +# this is a no-op.
          106  +# 
          107  +proc page_info {csr up} {
          108  +  global seen
          109  +  cursor_info ci $csr $up
          110  +  set pg $ci(page_no)
          111  +  if {[info exists seen($pg)]} return
          112  +  set seen($pg) 1
          113  +
          114  +  # Do parent pages first
          115  +  #
          116  +  if {$ci(parent)} {
          117  +    page_info $csr [expr {$up+1}]
          118  +  }
          119  +
          120  +  # Find the depth of this page
          121  +  #
          122  +  set depth 1
          123  +  set i $up
          124  +  while {$ci(parent)} {
          125  +    incr i
          126  +    incr depth
          127  +    cursor_info ci $csr $i
          128  +  }
          129  +
          130  +  # print the results
          131  +  #
          132  +  puts [format {LEVEL %d:  %6d} $depth $pg]
          133  +}  
          134  +
          135  +  
          136  +  
          137  +
          138  +# Loop through the object and print out page numbers
          139  +#
          140  +set csr [btree_cursor $DB $rootpage 0]
          141  +for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} {
          142  +  page_info $csr 0
          143  +  set i 1
          144  +  foreach pg [btree_ovfl_info $DB $csr] {
          145  +    puts [format {OVFL %3d: %6d} $i $pg]
          146  +    incr i
          147  +  }
          148  +}
          149  +exit 0

Changes to tool/spaceanal.tcl.

    21     21     puts stderr "File is not readable: $file_to_analyze"
    22     22     exit 1
    23     23   }
    24     24   if {[file size $file_to_analyze]<512} {
    25     25     puts stderr "Empty or malformed database: $file_to_analyze"
    26     26     exit 1
    27     27   }
           28  +
           29  +# Maximum distance between pages before we consider it a "gap"
           30  +#
           31  +set MAXGAP 3
    28     32   
    29     33   # Open the database
    30     34   #
    31     35   sqlite3 db [lindex $argv 0]
    32     36   set DB [btree_open [lindex $argv 0] 1000 0]
    33     37   
    34     38   # In-memory database for collecting statistics. This script loops through
................................................................................
    49     53      ovfl_cnt int,     -- Number of entries that use overflow
    50     54      mx_payload int,   -- Maximum payload size
    51     55      int_pages int,    -- Number of interior pages used
    52     56      leaf_pages int,   -- Number of leaf pages used
    53     57      ovfl_pages int,   -- Number of overflow pages used
    54     58      int_unused int,   -- Number of unused bytes on interior pages
    55     59      leaf_unused int,  -- Number of unused bytes on primary pages
    56         -   ovfl_unused int   -- Number of unused bytes on overflow pages
           60  +   ovfl_unused int,  -- Number of unused bytes on overflow pages
           61  +   gap_cnt int       -- Number of gaps in the page layout
    57     62   );}
    58     63   mem eval $tabledef
    59     64   
    60     65   proc integerify {real} {
    61     66     return [expr int($real)]
    62     67   }
    63     68   mem function int integerify
................................................................................
   101    106                   a(page_entries) \
   102    107                   a(cell_size) \
   103    108                   a(page_freebytes) \
   104    109                   a(page_freeblocks) \
   105    110                   a(payload_bytes) \
   106    111                   a(header_bytes) \
   107    112                   a(local_payload_bytes) \
   108         -                a(parent) ] [btree_cursor_info $csr $up] {}
          113  +                a(parent) \
          114  +                a(first_ovfl) ] [btree_cursor_info $csr $up] break
   109    115   }
   110    116   
   111    117   # Determine the page-size of the database. This global variable is used
   112    118   # throughout the script.
   113    119   #
   114    120   set pageSize [db eval {PRAGMA page_size}]
   115    121   
................................................................................
   141    147     set cnt_ovfl $wideZero             ;# Number of entries that use overflows
   142    148     set cnt_leaf_entry $wideZero       ;# Number of leaf entries
   143    149     set cnt_int_entry $wideZero        ;# Number of interor entries
   144    150     set mx_payload $wideZero           ;# Maximum payload size
   145    151     set ovfl_pages $wideZero           ;# Number of overflow pages used
   146    152     set leaf_pages $wideZero           ;# Number of leaf pages
   147    153     set int_pages $wideZero            ;# Number of interior pages
          154  +  set gap_cnt 0                      ;# Number of holes in the page sequence
          155  +  set prev_pgno 0                    ;# Last page number seen
   148    156   
   149    157     # As the btree is traversed, the array variable $seen($pgno) is set to 1
   150    158     # the first time page $pgno is encountered.
   151    159     #
   152    160     catch {unset seen}
   153    161   
   154    162     # The following loop runs once for each entry in table $name. The table
................................................................................
   176    184       set ovfl [expr {$ci(payload_bytes)-$ci(local_payload_bytes)}]
   177    185       if {$ovfl} {
   178    186         incr cnt_ovfl
   179    187         incr total_ovfl $ovfl
   180    188         set n [expr {int(ceil($ovfl/($pageSize-4.0)))}]
   181    189         incr ovfl_pages $n
   182    190         incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}]
          191  +      set pglist [btree_ovfl_info $DB $csr]
          192  +    } else {
          193  +      set pglist {}
   183    194       }
   184    195   
   185    196       # If this is the first table entry analyzed for the page, then update
   186    197       # the page-related statistics $leaf_pages and $unused_leaf. Also, if
   187    198       # this page has a parent page that has not been analyzed, retrieve
   188    199       # info for the parent and update statistics for it too.
   189    200       #
   190    201       if {![info exists seen($ci(page_no))]} {
   191    202         set seen($ci(page_no)) 1
   192    203         incr leaf_pages
   193    204         incr unused_leaf $ci(page_freebytes)
          205  +      set pglist "$ci(page_no) $pglist"
   194    206   
   195    207         # Now check if the page has a parent that has not been analyzed. If
   196    208         # so, update the $int_pages, $cnt_int_entry and $unused_int statistics
   197    209         # accordingly. Then check if the parent page has a parent that has
   198    210         # not yet been analyzed etc.
   199    211         #
   200    212         # set parent $ci(parent_page_no)
................................................................................
   206    218           set seen($ci(parent)) 1
   207    219   
   208    220           # Retrieve info for the parent and update statistics.
   209    221           cursor_info ci $csr $up
   210    222           incr int_pages
   211    223           incr cnt_int_entry $ci(page_entries)
   212    224           incr unused_int $ci(page_freebytes)
          225  +
          226  +        # parent pages come before their first child
          227  +        set pglist "$ci(page_no) $pglist"
          228  +      }
          229  +    }
          230  +
          231  +    # Check the page list for fragmentation
          232  +    #
          233  +    foreach pg $pglist {
          234  +      if {($pg<$prev_pgno || $pg>$prev_pgno+$MAXGAP) && $prev_pgno>0} {
          235  +        incr gap_cnt
   213    236         }
          237  +      set prev_pgno $pg
   214    238       }
   215    239     }
   216    240     btree_close_cursor $csr
   217    241   
   218    242     # Handle the special case where a table contains no data. In this case
   219    243     # all statistics are zero, except for the number of leaf pages (1) and
   220    244     # the unused bytes on leaf pages ($pageSize - 8).
................................................................................
   246    270     append sql ",$mx_payload"
   247    271     append sql ",$int_pages"
   248    272     append sql ",$leaf_pages"
   249    273     append sql ",$ovfl_pages"
   250    274     append sql ",$unused_int"
   251    275     append sql ",$unused_leaf"
   252    276     append sql ",$unused_ovfl"
          277  +  append sql ",$gap_cnt"
   253    278     append sql );
   254    279     mem eval $sql
   255    280   }
   256    281   
   257    282   # Analyze every index in the database, one at a time.
   258    283   #
   259    284   # The query below returns the name, associated table and root-page number 
................................................................................
   275    300     set unused_leaf $wideZero          ;# Unused space on leaf nodes
   276    301     set unused_ovfl $wideZero          ;# Unused space on overflow pages
   277    302     set cnt_ovfl $wideZero             ;# Number of entries that use overflows
   278    303     set cnt_leaf_entry $wideZero       ;# Number of leaf entries
   279    304     set mx_payload $wideZero           ;# Maximum payload size
   280    305     set ovfl_pages $wideZero           ;# Number of overflow pages used
   281    306     set leaf_pages $wideZero           ;# Number of leaf pages
          307  +  set gap_cnt 0                      ;# Number of holes in the page sequence
          308  +  set prev_pgno 0                    ;# Last page number seen
   282    309   
   283    310     # As the btree is traversed, the array variable $seen($pgno) is set to 1
   284    311     # the first time page $pgno is encountered.
   285    312     #
   286    313     catch {unset seen}
   287    314   
   288    315     # The following loop runs once for each entry in index $name. The index
................................................................................
   320    347       # If this is the first table entry analyzed for the page, then update
   321    348       # the page-related statistics $leaf_pages and $unused_leaf.
   322    349       #
   323    350       if {![info exists seen($ci(page_no))]} {
   324    351         set seen($ci(page_no)) 1
   325    352         incr leaf_pages
   326    353         incr unused_leaf $ci(page_freebytes)
          354  +      set pg $ci(page_no)
          355  +      if {$prev_pgno>0 && ($prev_pgno<$pg-$MAXGAP || $prev_pgno>$pg)} {
          356  +        incr gap_cnt
          357  +      }
          358  +      set prev_pgno $ci(page_no)
   327    359       }
   328    360     }
   329    361     btree_close_cursor $csr
   330    362   
   331    363     # Handle the special case where a index contains no data. In this case
   332    364     # all statistics are zero, except for the number of leaf pages (1) and
   333    365     # the unused bytes on leaf pages ($pageSize - 8).
................................................................................
   351    383     append sql ",$mx_payload"
   352    384     append sql ",0"
   353    385     append sql ",$leaf_pages"
   354    386     append sql ",$ovfl_pages"
   355    387     append sql ",0"
   356    388     append sql ",$unused_leaf"
   357    389     append sql ",$unused_ovfl"
          390  +  append sql ",$gap_cnt"
   358    391     append sql );
   359    392     mem eval $sql
   360    393   }
   361    394   
   362    395   # Generate a single line of output in the statistics section of the
   363    396   # report.
   364    397   #
................................................................................
   416    449         max(mx_payload) AS mx_payload,
   417    450         int(sum(ovfl_cnt)) as ovfl_cnt,
   418    451         int(sum(leaf_pages)) AS leaf_pages,
   419    452         int(sum(int_pages)) AS int_pages,
   420    453         int(sum(ovfl_pages)) AS ovfl_pages,
   421    454         int(sum(leaf_unused)) AS leaf_unused,
   422    455         int(sum(int_unused)) AS int_unused,
   423         -      int(sum(ovfl_unused)) AS ovfl_unused
          456  +      int(sum(ovfl_unused)) AS ovfl_unused,
          457  +      int(sum(gap_cnt)) AS gap_cnt
   424    458       FROM space_used WHERE $where" {} {}
   425    459   
   426    460     # Output the sub-report title, nicely decorated with * characters.
   427    461     #
   428    462     puts ""
   429    463     set len [string length $title]
   430    464     set stars [string repeat * [expr 65-$len]]
................................................................................
   446    480     set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
   447    481     set total_pages_percent [percent $total_pages $file_pgcnt]
   448    482     set storage [expr {$total_pages*$pageSize}]
   449    483     set payload_percent [percent $payload $storage {of storage consumed}]
   450    484     set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
   451    485     set avg_payload [divide $payload $nleaf]
   452    486     set avg_unused [divide $total_unused $nleaf]
          487  +  set fragmentation [percent $gap_cnt $total_pages {fragmentation}]
   453    488     if {$int_pages>0} {
   454    489       # TODO: Is this formula correct?
   455    490       set nTab [mem eval "
   456    491         SELECT count(*) FROM (
   457    492             SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
   458    493         )
   459    494       "]
................................................................................
   472    507     statline {Bytes of storage consumed} $storage
   473    508     statline {Bytes of payload} $payload $payload_percent
   474    509     statline {Average payload per entry} $avg_payload
   475    510     statline {Average unused bytes per entry} $avg_unused
   476    511     if {[info exists avg_fanout]} {
   477    512       statline {Average fanout} $avg_fanout
   478    513     }
          514  +  if {$total_pages>1} {
          515  +    statline {Fragmentation} $fragmentation
          516  +  }
   479    517     statline {Maximum payload per entry} $mx_payload
   480    518     statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
   481    519     if {$int_pages>0} {
   482    520       statline {Index pages used} $int_pages
   483    521     }
   484    522     statline {Primary pages used} $leaf_pages
   485    523     statline {Overflow pages used} $ovfl_pages