/ Check-in [44f157e0]
Login
SQLite training in Houston TX on 2019-11-05 (details)
Part of the 2019 Tcl Conference

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix the dbhash utility so that it ignores the root page number when hashing the sqlite_master table. Add new command-line options. Add the ability to hash multiple databases with a single command.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | dbhash
Files: files | file ages | folders
SHA1: 44f157e0f0d5a76ef9002b2592164c4fdae89e34
User & Date: drh 2016-06-08 13:49:28
Context
2016-06-08
13:59
Fix an undersized buffer in the SHA1 implementation. Closed-Leaf check-in: fb276815 user: drh tags: dbhash
13:49
Fix the dbhash utility so that it ignores the root page number when hashing the sqlite_master table. Add new command-line options. Add the ability to hash multiple databases with a single command. check-in: 44f157e0 user: drh tags: dbhash
01:03
An initial attempt at a "dbhash" command-line utility. check-in: 2247649c user: drh tags: dbhash
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to tool/dbhash.c.

     6      6   **
     7      7   **    May you do good and not evil.
     8      8   **    May you find forgiveness for yourself and forgive others.
     9      9   **    May you share freely, never taking more than you give.
    10     10   **
    11     11   *************************************************************************
    12     12   **
    13         -** This is a utility program that computes a hash on the content
           13  +** This is a utility program that computes an SHA1 hash on the content
    14     14   ** of an SQLite database.
    15     15   **
    16     16   ** The hash is computed over just the content of the database.  Free
    17     17   ** space inside of the database file, and alternative on-disk representations
    18     18   ** of the same content (ex: UTF8 vs UTF16) do not affect the hash.  So,
    19     19   ** for example, the database file page size, encoding, and auto_vacuum setting
    20     20   ** can all be changed without changing the hash.
................................................................................
    36     36   };
    37     37   
    38     38   /*
    39     39   ** All global variables are gathered into the "g" singleton.
    40     40   */
    41     41   struct GlobalVars {
    42     42     const char *zArgv0;       /* Name of program */
    43         -  int bSchemaPK;            /* Use the schema-defined PK, not the true PK */
    44     43     unsigned fDebug;          /* Debug flags */
    45     44     sqlite3 *db;              /* The database connection */
    46     45     SHA1Context cx;           /* SHA1 hash context */
    47     46   } g;
    48     47   
           48  +/*
           49  +** Debugging flags
           50  +*/
           51  +#define DEBUG_FULLTRACE   0x00000001   /* Trace hash to stderr */
           52  +
    49     53   /******************************************************************************
    50     54   ** The Hash Engine
    51     55   **
    52     56   ** Modify these routines (and appropriate state fields in global variable 'g')
    53     57   ** in order to compute a different (better?) hash of the database.
    54     58   */
    55     59   /*
................................................................................
   196    200       i = 0;
   197    201     }
   198    202     (void)memcpy(&g.cx.buffer[j], &data[i], len - i);
   199    203   }
   200    204   
   201    205   
   202    206   /* Add padding and compute and output the message digest. */
   203         -static void hash_finish(void){
          207  +static void hash_finish(const char *zName){
   204    208     unsigned int i;
   205    209     unsigned char finalcount[8];
   206    210     unsigned char digest[20];
   207    211     static const char zEncode[] = "0123456789abcdef";
   208    212     char zOut[40];
   209    213   
   210    214     for (i = 0; i < 8; i++){
................................................................................
   220    224       digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
   221    225     }
   222    226     for(i=0; i<20; i++){
   223    227       zOut[i*2] = zEncode[(digest[i]>>4)&0xf];
   224    228       zOut[i*2+1] = zEncode[digest[i] & 0xf];
   225    229     }
   226    230     zOut[i*2]= 0;
   227         -  printf("%s\n", zOut);
          231  +  printf("%s %s\n", zOut, zName);
   228    232   }
   229    233   /* End of the hashing logic
   230    234   *******************************************************************************/
   231    235     
   232    236   /*
   233    237   ** Print an error resulting from faulting command-line arguments and
   234    238   ** abort the program.
................................................................................
   282    286     va_start(ap, zFormat);
   283    287     pStmt = db_vprepare(zFormat, ap);
   284    288     va_end(ap);
   285    289     return pStmt;
   286    290   }
   287    291   
   288    292   /*
   289         -** Compute the hash for a single table named zTab
          293  +** Compute the hash for all rows of the query formed from the printf-style
          294  +** zFormat and its argument.
   290    295   */
   291         -static void hash_one_table(const char *zTab){
   292         -  sqlite3_stmt *pStmt;
   293         -  int nCol;
   294         -  int i;
   295         -  pStmt = db_prepare("SELECT * FROM \"%w\";", zTab);
          296  +static void hash_one_query(const char *zFormat, ...){
          297  +  va_list ap;
          298  +  sqlite3_stmt *pStmt;        /* The query defined by zFormat and "..." */
          299  +  int nCol;                   /* Number of columns in the result set */
          300  +  int i;                      /* Loop counter */
          301  +
          302  +  /* Prepare the query defined by zFormat and "..." */
          303  +  va_start(ap, zFormat);
          304  +  pStmt = db_vprepare(zFormat, ap);
          305  +  va_end(ap);
   296    306     nCol = sqlite3_column_count(pStmt);
          307  +
          308  +  /* Compute a hash over the result of the query */
   297    309     while( SQLITE_ROW==sqlite3_step(pStmt) ){
   298    310       for(i=0; i<nCol; i++){
   299    311         switch( sqlite3_column_type(pStmt,i) ){
   300    312           case SQLITE_NULL: {
   301    313             hash_step((const unsigned char*)"0",1);
          314  +          if( g.fDebug & DEBUG_FULLTRACE ) fprintf(stderr, "NULL\n");
   302    315             break;
   303    316           }
   304    317           case SQLITE_INTEGER: {
   305    318             sqlite3_uint64 u;
   306    319             int j;
   307    320             unsigned char x[8];
   308    321             sqlite3_int64 v = sqlite3_column_int64(pStmt,i);
................................................................................
   309    322             memcpy(&u, &v, 8);
   310    323             for(j=7; j>=0; j--){
   311    324               x[j] = u & 0xff;
   312    325               u >>= 8;
   313    326             }
   314    327             hash_step((const unsigned char*)"1",1);
   315    328             hash_step(x,8);
          329  +          if( g.fDebug & DEBUG_FULLTRACE ){
          330  +            fprintf(stderr, "INT %s\n", sqlite3_column_text(pStmt,i));
          331  +          }
   316    332             break;
   317    333           }
   318    334           case SQLITE_FLOAT: {
   319    335             sqlite3_uint64 u;
   320    336             int j;
   321    337             unsigned char x[8];
   322    338             double r = sqlite3_column_double(pStmt,i);
................................................................................
   323    339             memcpy(&u, &r, 8);
   324    340             for(j=7; j>=0; j--){
   325    341               x[j] = u & 0xff;
   326    342               u >>= 8;
   327    343             }
   328    344             hash_step((const unsigned char*)"2",1);
   329    345             hash_step(x,8);
          346  +          if( g.fDebug & DEBUG_FULLTRACE ){
          347  +            fprintf(stderr, "FLOAT %s\n", sqlite3_column_text(pStmt,i));
          348  +          }
   330    349             break;
   331    350           }
   332    351           case SQLITE_TEXT: {
   333    352             int n = sqlite3_column_bytes(pStmt, i);
   334    353             const unsigned char *z = sqlite3_column_text(pStmt, i);
   335    354             hash_step((const unsigned char*)"3", 1);
   336    355             hash_step(z, n);
          356  +          if( g.fDebug & DEBUG_FULLTRACE ){
          357  +            fprintf(stderr, "TEXT '%s'\n", sqlite3_column_text(pStmt,i));
          358  +          }
   337    359             break;
   338    360           }
   339    361           case SQLITE_BLOB: {
   340    362             int n = sqlite3_column_bytes(pStmt, i);
   341    363             const unsigned char *z = sqlite3_column_blob(pStmt, i);
   342    364             hash_step((const unsigned char*)"4", 1);
   343    365             hash_step(z, n);
          366  +          if( g.fDebug & DEBUG_FULLTRACE ){
          367  +            fprintf(stderr, "BLOB (%d bytes)\n", n);
          368  +          }
   344    369             break;
   345    370           }
   346    371         }
   347    372       }
   348    373     }
   349    374     sqlite3_finalize(pStmt);
   350    375   }
   351    376   
   352    377   
   353    378   /*
   354    379   ** Print sketchy documentation for this utility program
   355    380   */
   356    381   static void showHelp(void){
   357         -  printf("Usage: %s DB\n", g.zArgv0);
          382  +  printf("Usage: %s [options] FILE ...\n", g.zArgv0);
   358    383     printf(
   359         -"Compute a hash on the content of database DB\n"
          384  +"Compute a SHA1 hash on the content of database FILE.  System tables such as\n"
          385  +"sqlite_stat1, sqlite_stat4, and sqlite_sequence are omitted from the hash.\n"
          386  +"Options:\n"
          387  +"   --debug N           Set debugging flags to N (experts only)\n"
          388  +"   --like PATTERN      Only hash tables whose name is LIKE the pattern\n"
          389  +"   --schema-only       Only hash the schema - omit table content\n"
          390  +"   --without-schema    Only hash table content - omit the schema\n"
   360    391     );
   361    392   }
   362    393   
   363    394   int main(int argc, char **argv){
   364         -  const char *zDb = 0;
   365         -  int i;
   366         -  int rc;
   367         -  char *zErrMsg;
   368         -  sqlite3_stmt *pStmt;
          395  +  const char *zDb = 0;         /* Name of the database currently being hashed */
          396  +  int i;                       /* Loop counter */
          397  +  int rc;                      /* Subroutine return code */
          398  +  char *zErrMsg;               /* Error message when opening database */
          399  +  sqlite3_stmt *pStmt;         /* An SQLite query */
          400  +  const char *zLike = 0;       /* LIKE pattern of tables to hash */
          401  +  int omitSchema = 0;          /* True to compute hash on content only */
          402  +  int omitContent = 0;         /* True to compute hash on schema only */
          403  +  int nFile = 0;               /* Number of input filenames seen */
   369    404   
   370    405     g.zArgv0 = argv[0];
   371    406     sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
   372    407     for(i=1; i<argc; i++){
   373    408       const char *z = argv[i];
   374    409       if( z[0]=='-' ){
   375    410         z++;
................................................................................
   378    413           if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
   379    414           g.fDebug = strtol(argv[++i], 0, 0);
   380    415         }else
   381    416         if( strcmp(z,"help")==0 ){
   382    417           showHelp();
   383    418           return 0;
   384    419         }else
   385         -      if( strcmp(z,"primarykey")==0 ){
   386         -        g.bSchemaPK = 1;
          420  +      if( strcmp(z,"like")==0 ){
          421  +        if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
          422  +        if( zLike!=0 ) cmdlineError("only one --like allowed");
          423  +        zLike = argv[++i];
          424  +      }else
          425  +      if( strcmp(z,"schema-only")==0 ){
          426  +        omitContent = 1;
          427  +      }else
          428  +      if( strcmp(z,"without-schema")==0 ){
          429  +        omitSchema = 1;
   387    430         }else
   388    431         {
   389    432           cmdlineError("unknown option: %s", argv[i]);
   390    433         }
   391         -    }else if( zDb==0 ){
   392         -      zDb = argv[i];
   393    434       }else{
   394         -      cmdlineError("unknown argument: %s", argv[i]);
          435  +      nFile++;
          436  +      if( nFile<i ) argv[nFile] = argv[i];
   395    437       }
   396    438     }
   397         -  if( zDb==0 ){
   398         -    cmdlineError("database argument missing");
          439  +  if( nFile==0 ){
          440  +    cmdlineError("no input files specified - nothing to do");
   399    441     }
   400         -  rc = sqlite3_open(zDb, &g.db);
   401         -  if( rc ){
   402         -    cmdlineError("cannot open database file \"%s\"", zDb);
          442  +  if( omitSchema && omitContent ){
          443  +    cmdlineError("only one of --without-schema and --omit-schema allowed");
   403    444     }
   404         -  rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_master", 0, 0, &zErrMsg);
   405         -  if( rc || zErrMsg ){
   406         -    cmdlineError("\"%s\" does not appear to be a valid SQLite database", zDb);
   407         -  }
          445  +  if( zLike==0 ) zLike = "%";
   408    446   
   409         -  /* Handle tables one by one */
   410         -  pStmt = db_prepare(
   411         -    "SELECT name FROM sqlite_master\n"
   412         -    " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n"
   413         -    "UNION SELECT 'sqlite_master' AS name\n"
   414         -    " ORDER BY name;\n"
   415         -  );
   416         -  hash_init();
   417         -  while( SQLITE_ROW==sqlite3_step(pStmt) ){
   418         -    hash_one_table((const char*)sqlite3_column_text(pStmt,0));
   419         -  }
   420         -  hash_finish();
          447  +  for(i=1; i<=nFile; i++){
          448  +    static const int openFlags = 
          449  +       SQLITE_OPEN_READWRITE |     /* Read/write so hot journals can recover */
          450  +       SQLITE_OPEN_URI
          451  +    ;
          452  +    zDb = argv[i];
          453  +    rc = sqlite3_open_v2(zDb, &g.db, openFlags, 0);
          454  +    if( rc ){
          455  +      fprintf(stderr, "cannot open database file '%s'\n", zDb);
          456  +      continue;
          457  +    }
          458  +    rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_master", 0, 0, &zErrMsg);
          459  +    if( rc || zErrMsg ){
          460  +      sqlite3_close(g.db);
          461  +      g.db = 0;
          462  +      fprintf(stderr, "'%s' is not a valid SQLite database\n", zDb);
          463  +      continue;
          464  +    }
   421    465   
   422         -  sqlite3_close(g.db);
          466  +    /* Start the hash */
          467  +    hash_init();
          468  +  
          469  +    /* Hash table content */
          470  +    if( !omitContent ){
          471  +      pStmt = db_prepare(
          472  +        "SELECT name FROM sqlite_master\n"
          473  +        " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n"
          474  +        "   AND name NOT LIKE 'sqlite_%%'\n"
          475  +        "   AND name LIKE '%q'\n"
          476  +        " ORDER BY name COLLATE nocase;\n",
          477  +        zLike
          478  +      );
          479  +      while( SQLITE_ROW==sqlite3_step(pStmt) ){
          480  +        /* We want rows of the table to be hashed in PRIMARY KEY order.
          481  +        ** Technically, an ORDER BY clause is required to guarantee that
          482  +        ** order.  However, though not guaranteed by the documentation, every
          483  +        ** historical version of SQLite has always output rows in PRIMARY KEY
          484  +        ** order when there is no WHERE or GROUP BY clause, so the ORDER BY
          485  +        ** can be safely omitted. */
          486  +        hash_one_query("SELECT * FROM \"%w\"", sqlite3_column_text(pStmt,0));
          487  +      }
          488  +      sqlite3_finalize(pStmt);
          489  +    }
          490  +  
          491  +    /* Hash the database schema */
          492  +    if( !omitSchema ){
          493  +      hash_one_query(
          494  +         "SELECT type, name, tbl_name, sql FROM sqlite_master\n"
          495  +         " WHERE tbl_name LIKE '%q'\n"
          496  +         " ORDER BY name COLLATE nocase;\n",
          497  +         zLike
          498  +      );
          499  +    }
          500  +  
          501  +    /* Finish and output the hash and close the database connection. */
          502  +    hash_finish(zDb);
          503  +    sqlite3_close(g.db);
          504  +  }
   423    505     return 0;
   424    506   }