SQLite4
Check-in [0b940bfe17]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the format of compressed page records slightly so that the file format supports inserting padding records into sorted runs.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | compression-hooks
Files: files | file ages | folders
SHA1: 0b940bfe17487aaec091c1915715104a982af507
User & Date: dan 2012-10-27 08:57:49
Context
2012-10-28
10:07
Add padding records to segments in compressed databases in order to avoid partial writes to segments that have already been synced to disk. check-in: ae3c8da44d user: dan tags: compression-hooks
2012-10-27
08:57
Change the format of compressed page records slightly so that the file format supports inserting padding records into sorted runs. check-in: 0b940bfe17 user: dan tags: compression-hooks
2012-10-26
18:08
Enable assert() checking for lost blocks in compressed database mode. check-in: 6e7bc9099c user: dan tags: compression-hooks
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to lsm-test/lsmtest_main.c.

  1329   1329     if( pClose ) fclose(pClose);
  1330   1330     pEnv->xClose(pOut);
  1331   1331   
  1332   1332     return rc;
  1333   1333   }
  1334   1334   
  1335   1335   static int do_insert(int nArg, char **azArg){
  1336         -  const char *zConfig = 0;
  1337   1336     const char *zDb = "lsm";
  1338   1337     TestDb *pDb = 0;
  1339   1338     int i;
  1340   1339     int rc;
  1341   1340     const int nRow = 1 * 1000 * 1000;
  1342   1341   
  1343   1342     DatasourceDefn defn = { TEST_DATASOURCE_RANDOM, 8, 15, 80, 150 };
  1344   1343     Datasource *pData = 0;
  1345   1344   
  1346         -  if( nArg>2 ){
  1347         -    testPrintError("Usage: insert ?DATABASE? ?LSM-CONFIG?\n");
         1345  +  if( nArg>1 ){
         1346  +    testPrintError("Usage: insert ?DATABASE?\n");
  1348   1347       return 1;
  1349   1348     }
  1350   1349     if( nArg==1 ){ zDb = azArg[0]; }
  1351         -  if( nArg==2 ){ zConfig = azArg[1]; }
  1352   1350   
  1353   1351     testMallocUninstall(tdb_lsm_env());
  1354         -  rc = tdb_open(zDb, 0, 1, &pDb);
         1352  +  for(i=0; zDb[i] && zDb[i]!='='; i++);
         1353  +  if( zDb[i] ){
         1354  +    rc = tdb_lsm_open(zDb, "testdb.lsm", 1, &pDb);
         1355  +  }else{
         1356  +    rc = tdb_open(zDb, 0, 1, &pDb);
         1357  +  }
         1358  +
  1355   1359     if( rc!=0 ){
  1356   1360       testPrintError("Error opening db \"%s\": %d\n", zDb, rc);
  1357   1361     }else{
  1358   1362       InsertWriteHook hook;
  1359   1363       memset(&hook, 0, sizeof(hook));
  1360   1364       hook.pOut = fopen("writelog.txt", "w");
  1361   1365   
  1362   1366       pData = testDatasourceNew(&defn);
  1363   1367       tdb_lsm_config_work_hook(pDb, do_insert_work_hook, 0);
  1364   1368       tdb_lsm_write_hook(pDb, do_insert_write_hook, (void *)&hook);
  1365         -    if( zConfig ){
  1366         -      rc = tdb_lsm_config_str(pDb, zConfig);
  1367         -    }
  1368   1369   
  1369   1370       if( rc==0 ){
  1370   1371         for(i=0; i<nRow; i++){
  1371   1372           void *pKey; int nKey;     /* Database key to insert */
  1372   1373           void *pVal; int nVal;     /* Database value to insert */
  1373   1374           testDatasourceEntry(pData, i, &pKey, &nKey, &pVal, &nVal);
  1374   1375           tdb_write(pDb, pKey, nKey, pVal, nVal);

Changes to src/lsm_file.c.

    96     96   ** the first and last page of each block in uncompressed databases. From
    97     97   ** the point of view of the upper layer, all pages are the same size - this
    98     98   ** is different from the uncompressed format where the first and last pages
    99     99   ** on each block are 4 bytes smaller than the others.
   100    100   **
   101    101   ** Pages are stored in variable length compressed form, as follows:
   102    102   **
   103         -**     * Number of bytes in compressed page image, as a 3-byte big-endian
   104         -**       integer.
          103  +**     * 3-byte size field containing the size of the compressed page image
          104  +**       in bytes. The most significant bit of each byte of the size field
          105  +**       is always set. The remaining 7 bits are used to store a 21-bit
          106  +**       integer value (in big-endian order - the first byte in the field
          107  +**       contains the most significant 7 bits). Since the maximum allowed 
          108  +**       size of a compressed page image is (2^17 - 1) bytes, there are
          109  +**       actually 4 unused bits in the size field.
          110  +**
          111  +**       In other words, if the size of the compressed page image is nSz,
          112  +**       the header can be serialized as follows:
          113  +**
          114  +**         u8 aHdr[3]
          115  +**         aHdr[0] = 0x80 | (u8)(nSz >> 14);
          116  +**         aHdr[1] = 0x80 | (u8)(nSz >>  7);
          117  +**         aHdr[2] = 0x80 | (u8)(nSz >>  0);
   105    118   **
   106    119   **     * Compressed page image.
   107    120   **
   108         -**     * The number of bytes in the compressed page image, again as a 3-byte
   109         -**       big-endian integer.
          121  +**     * A second copy of the 3-byte record header.
   110    122   **
   111    123   ** A page number is a byte offset into the database file. So the smallest
   112    124   ** possible page number is 8192 (immediately after the two meta-pages).
   113    125   ** The first and root page of a segment are identified by a page number
   114    126   ** corresponding to the byte offset of the first byte in the corresponding
   115    127   ** page record. The last page of a segment is identified by the byte offset
   116    128   ** of the last byte in its record.
   117    129   **
   118    130   ** Unlike uncompressed pages, compressed page records may span blocks.
   119    131   **
   120         -** TODO:
   121         -**
   122    132   ** Sometimes, in order to avoid touching sectors that contain synced data
   123    133   ** when writing, it is necessary to insert unused space between compressed
   124    134   ** page records. This can be done as follows:
   125    135   **
   126         -**     * For less than 4 bytes of empty space, a series of 0x00 bytes.
          136  +**     * For less than 6 bytes of empty space, a series of 0x00 bytes.
   127    137   **
   128         -**     * For 4 or more bytes, the block of free space begins with an 
   129         -**       0x01 byte, followed by a varint containing the total size of the 
   130         -**       free space. Similarly, it ends with an (ABCD -> BCDA) transformed
   131         -**       varint an a final 0x01 byte.
          138  +**     * For 6 or more bytes of empty space, a record similar to a 
          139  +**       compressed page record is added to the segment. A padding record
          140  +**       is distinguished from a compressed page record by the most 
          141  +**       significant bit of the second byte of the size field, which is
          142  +**       cleared instead of set. 
   132    143   */
   133    144   #include "lsmInt.h"
   134    145   
   135    146   #include <sys/types.h>
   136    147   #include <sys/stat.h>
   137    148   #include <fcntl.h>
   138    149   
................................................................................
   931    942     }else{
   932    943       assert( 0 );
   933    944     }
   934    945     return rc;
   935    946   }
   936    947   
   937    948   /*
   938         -** Encode and decode routines for 24-bit big-endian integers.
          949  +** Encode and decode routines for record size fields.
   939    950   */
   940         -static u32 lsmGetU24(u8 *aBuf){
   941         -  return (((u32)aBuf[0]) << 16) + (((u32)aBuf[1]) << 8) + ((u32)aBuf[2]);
          951  +static void putRecordSize(u8 *aBuf, int nByte, int bFree){
          952  +  aBuf[0] = (u8)(nByte >> 14) | 0x80;
          953  +  aBuf[1] = ((u8)(nByte >>  7) & 0x7F) | (bFree ? 0x00 : 0x80);
          954  +  aBuf[2] = (u8)nByte | 0x80;
   942    955   }
   943         -static void lsmPutU24(u8 *aBuf, u32 iVal){
   944         -  aBuf[0] = (u8)(iVal >> 16);
   945         -  aBuf[1] = (u8)(iVal >>  8);
   946         -  aBuf[2] = (u8)(iVal >>  0);
          956  +static int getRecordSize(u8 *aBuf, int *pbFree){
          957  +  int nByte;
          958  +  nByte  = (aBuf[0] & 0x7F) << 14;
          959  +  nByte += (aBuf[1] & 0x7F) << 7;
          960  +  nByte += (aBuf[2] & 0x7F);
          961  +  *pbFree = !!(aBuf[1] & 0x80);
          962  +  return nByte;
   947    963   }
   948    964   
   949    965   static int fsSubtractOffset(FileSystem *pFS, i64 iOff, int iSub, i64 *piRes){
   950    966     i64 iStart;
   951    967     int iBlk;
   952    968     int rc;
   953    969   
................................................................................
  1011   1027     assert( p && pPg->nCompress==0 );
  1012   1028   
  1013   1029     if( fsAllocateBuffer(pFS) ) return LSM_NOMEM;
  1014   1030   
  1015   1031     rc = fsReadData(pFS, iOff, aSz, sizeof(aSz));
  1016   1032   
  1017   1033     if( rc==LSM_OK ){
  1018         -    pPg->nCompress = (int)lsmGetU24(aSz);
         1034  +    int bFree;
         1035  +    pPg->nCompress = (int)getRecordSize(aSz, &bFree);
  1019   1036       rc = fsAddOffset(pFS, iOff, 3, &iOff);
  1020   1037       if( rc==LSM_OK ){
  1021   1038         if( pPg->nCompress>pFS->nBuffer ){
  1022   1039           rc = LSM_CORRUPT_BKPT;
  1023   1040         }else{
  1024   1041           rc = fsReadData(pFS, iOff, pFS->aBuffer, pPg->nCompress);
  1025   1042         }
................................................................................
  1317   1334     u8 aSz[3];
  1318   1335     int rc;
  1319   1336     i64 iRead;
  1320   1337   
  1321   1338     rc = fsSubtractOffset(pFS, iOff, sizeof(aSz), &iRead);
  1322   1339     if( rc==LSM_OK ) rc = fsReadData(pFS, iRead, aSz, sizeof(aSz));
  1323   1340     if( rc==LSM_OK ){
  1324         -    int nSz = lsmGetU24(aSz) + sizeof(aSz)*2;
  1325         -    rc = fsSubtractOffset(pFS, iOff, nSz, piPrev);
         1341  +    int bFree;
         1342  +    int nSz = getRecordSize(aSz, &bFree);
         1343  +    rc = fsSubtractOffset(pFS, iOff, nSz + sizeof(aSz)*2, piPrev);
  1326   1344     }
  1327   1345   
  1328   1346     return rc;
  1329   1347   }
  1330   1348   
  1331   1349   /*
  1332   1350   ** The first argument to this function is a valid reference to a database
................................................................................
  1768   1786         u8 aSz[3];                  /* pPg->nCompress as a 24-bit big-endian */
  1769   1787         assert( pPg->pSeg && pPg->iPg==0 && pPg->nCompress==0 );
  1770   1788   
  1771   1789         /* Compress the page image. */
  1772   1790         rc = fsCompressIntoBuffer(pFS, pPg);
  1773   1791   
  1774   1792         /* Serialize the compressed size into buffer aSz[] */
  1775         -      lsmPutU24(aSz, pPg->nCompress);
         1793  +      putRecordSize(aSz, pPg->nCompress, 0);
  1776   1794   
  1777   1795         /* Write the serialized page record into the database file. */
  1778   1796         pPg->iPg = fsAppendData(pFS, pPg->pSeg, aSz, sizeof(aSz), &rc);
  1779   1797         fsAppendData(pFS, pPg->pSeg, pFS->aBuffer, pPg->nCompress, &rc);
  1780   1798         fsAppendData(pFS, pPg->pSeg, aSz, sizeof(aSz), &rc);
  1781   1799   
  1782   1800         /* Now that it has a page number, insert the page into the hash table */