SQLite4
Check-in [2ba0368e76]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the file-format to allow padding records smaller than 6 bytes in length.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | compression-hooks
Files: files | file ages | folders
SHA1: 2ba0368e764fc0597f89ccd37668d284332168f1
User & Date: dan 2012-10-28 11:28:05
Context
2012-10-28
11:34
Turn off LSM_CONFIG_MMAP automatically in compressed database mode. Leaf check-in: 676da8516d user: dan tags: compression-hooks
11:28
Enhance the file-format to allow padding records smaller than 6 bytes in length. check-in: 2ba0368e76 user: dan tags: compression-hooks
10:07
Add padding records to segments in compressed databases in order to avoid partial writes to segments that have already been synced to disk. check-in: ae3c8da44d user: dan tags: compression-hooks
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/lsm_file.c.

129
130
131
132
133
134
135
136






137
138
139
140
141
142
143
...
882
883
884
885
886
887
888







889
890
891
892
893
894
895
...
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
...
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
....
1035
1036
1037
1038
1039
1040
1041

1042




1043
1044
1045
1046
1047
1048
1049
....
1359
1360
1361
1362
1363
1364
1365

1366
1367


1368




1369
1370
1371
1372
1373
1374
1375
1376
....
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
....
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
....
1888
1889
1890
1891
1892
1893
1894

1895






1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931

1932
1933
1934
1935
1936
1937
1938
1939





1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
....
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
**
** Unlike uncompressed pages, compressed page records may span blocks.
**
** Sometimes, in order to avoid touching sectors that contain synced data
** when writing, it is necessary to insert unused space between compressed
** page records. This can be done as follows:
**
**     * For less than 6 bytes of empty space, a series of 0x00 bytes.






**
**     * For 6 or more bytes of empty space, a record similar to a 
**       compressed page record is added to the segment. A padding record
**       is distinguished from a compressed page record by the most 
**       significant bit of the second byte of the size field, which is
**       cleared instead of set. 
*/
................................................................................
    if( rc==LSM_OK ){
      *piNext = fsPageToBlock(pFS, lsmGetU32(&pLast->aData[pFS->nPagesize-4]));
      lsmFsPageRelease(pLast);
    }
  }
  return rc;
}








/*
** This function is only called in compressed database mode.
*/
static int fsReadData(
  FileSystem *pFS,                /* File-system handle */
  i64 iOff,                       /* Read data from this offset */
................................................................................
){
  i64 iEob;                       /* End of block */
  int nRead;
  int rc;

  assert( pFS->pCompress );

  iEob = fsLastPageOnBlock(pFS, fsPageToBlock(pFS, iOff)) + 1;
  nRead = LSM_MIN(iEob - iOff, nData);

  rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aData, nRead);
  if( rc==LSM_OK && nRead!=nData ){
    int iBlk;

    rc = fsBlockNext(pFS, fsPageToBlock(pFS, iOff), &iBlk);
................................................................................
static int fsAddOffset(FileSystem *pFS, i64 iOff, int iAdd, i64 *piRes){
  i64 iEob;
  int iBlk;
  int rc;

  assert( pFS->pCompress );

  iEob = fsLastPageOnBlock(pFS, fsPageToBlock(pFS, iOff));
  if( (iOff+iAdd)<=iEob ){
    *piRes = (iOff+iAdd);
    return LSM_OK;
  }

  rc = fsBlockNext(pFS, fsPageToBlock(pFS, iOff), &iBlk);
  *piRes = fsFirstPageOnBlock(pFS, iBlk) + iAdd - (iEob - iOff + 1);
................................................................................

  if( fsAllocateBuffer(pFS) ) return LSM_NOMEM;

  rc = fsReadData(pFS, iOff, aSz, sizeof(aSz));

  if( rc==LSM_OK ){
    int bFree;

    pPg->nCompress = (int)getRecordSize(aSz, &bFree);




    if( bFree ){
      if( pnSpace ){
        *pnSpace = pPg->nCompress + sizeof(aSz)*2;
      }else{
        rc = LSM_CORRUPT_BKPT;
      }
    }else{
................................................................................
static int fsGetPageBefore(FileSystem *pFS, i64 iOff, Pgno *piPrev){
  u8 aSz[3];
  int rc;
  i64 iRead;

  rc = fsSubtractOffset(pFS, iOff, sizeof(aSz), &iRead);
  if( rc==LSM_OK ) rc = fsReadData(pFS, iRead, aSz, sizeof(aSz));

  if( rc==LSM_OK ){
    int bFree;


    int nSz = getRecordSize(aSz, &bFree);




    rc = fsSubtractOffset(pFS, iOff, nSz + sizeof(aSz)*2, piPrev);
  }

  return rc;
}

/*
** The first argument to this function is a valid reference to a database
................................................................................
    ** If it is, then an extra block has already been allocated for this run.
    ** Shift this extra block back to the free-block list. 
    **
    ** Otherwise, add the first free page in the last block used by the run
    ** to the lAppend list.
    */
    iBlk = fsPageToBlock(pFS, p->iLastPg);
    if( fsLastPageOnBlock(pFS, fsPageToBlock(pFS, p->iLastPg) )!=p->iLastPg ){
      int i;
      Pgno *aiAppend = pFS->pDb->pWorker->aiAppend;
      for(i=0; i<LSM_APPLIST_SZ; i++){
        if( aiAppend[i]==0 ){
          aiAppend[i] = p->iLastPg+1;
          break;
        }
................................................................................
        rc = lsmBlockAllocate(pFS->pDb, &iBlk);
        pSeg->iFirst = iApp = fsFirstPageOnBlock(pFS, iBlk);
      }
    }
    iRet = iApp;

    /* Write as much data as is possible at iApp (usually all of it). */
    iLastOnBlock = fsLastPageOnBlock(pFS, fsPageToBlock(pFS, iApp));
    if( rc==LSM_OK ){
      int nSpace = iLastOnBlock - iApp + 1;
      nWrite = LSM_MIN(nData, nSpace);
      nRem = nData - nWrite;
      assert( nWrite>=0 );
      if( nWrite!=0 ){
        rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iApp, aData, nWrite);
................................................................................
    pFS->nWrite++;
  }

  return rc;
}

/*

** Add a padding record to the segment passed as the third argument.






*/
int lsmFsSortedPadding(
  FileSystem *pFS, 
  Snapshot *pSnapshot,
  Segment *pSeg
){
  int rc = LSM_OK;
  if( pFS->pCompress ){
    Pgno iLast2;
    Pgno iLast = pSeg->iLastPg;     /* Current last page of segment */
    int nPad;                       /* Bytes of padding required */
    u8 aSz[3];

    nPad = pFS->szSector - 1 - (iLast % pFS->szSector);
    if( nPad==0 
     || (nPad==4 && iLast==fsLastPageOnBlock(pFS, fsPageToBlock(pFS, iLast)) )
    ){
      return LSM_OK;
    }

    iLast2 = (1 + (iLast + 6)/pFS->szSector) * pFS->szSector - 1;
    assert( fsPageToBlock(pFS, iLast)==fsPageToBlock(pFS, iLast2) );
    nPad = iLast2 - iLast;

    if( iLast2>fsLastPageOnBlock(pFS, fsPageToBlock(pFS, iLast)) ){
      nPad -= 4;
      if( nPad<6 ){
        nPad += (pFS->szSector - 4);
      }
    }
    assert( nPad>=6 );

#if 0
    printf("padding segment with %d bytes at %d...\n", nPad, (int)iLast);
#endif


    pSeg->nSize += nPad;
    nPad -= 6;
    putRecordSize(aSz, nPad, 1);

    fsAppendData(pFS, pSeg, aSz, sizeof(aSz), &rc);
    memset(pFS->aBuffer, 0, nPad);
    fsAppendData(pFS, pSeg, pFS->aBuffer, nPad, &rc);
    fsAppendData(pFS, pSeg, aSz, sizeof(aSz), &rc);







    assert( rc!=LSM_OK 
     || pSeg->iLastPg==fsLastPageOnBlock(pFS, fsPageToBlock(pFS, pSeg->iLastPg))
     || ((pSeg->iLastPg + 1) % pFS->szSector)==0
    );
  }

  return rc;
}


................................................................................
        if( iBlk!=iLastBlk ){
          fsBlockNext(pFS, iBlk, &iBlk);
        }else{
          iBlk = 0;
        }
      }

      if( bExtra && iLast==fsLastPageOnBlock(pFS, fsPageToBlock(pFS, iLast)) ){
        fsBlockNext(pFS, iLastBlk, &iBlk);
        aUsed[iBlk-1] = 1;
      }
    }
  }
}








|
>
>
>
>
>
>







 







>
>
>
>
>
>
>







 







|







 







|







 







>
|
>
>
>
>







 







>


>
>
|
>
>
>
>
|







 







|







 







|







 







>
|
>
>
>
>
>
>













<
<
<
<
<
<
<
|



|

<
<
|
<
|

<
<
<
<
>
|
|
|
<
|
|
|
|
>
>
>
>
>
|


|
|







 







|







129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
...
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
...
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
...
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
....
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
....
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
....
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
....
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
....
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940







1941
1942
1943
1944
1945
1946


1947

1948
1949




1950
1951
1952
1953

1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
....
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
**
** Unlike uncompressed pages, compressed page records may span blocks.
**
** Sometimes, in order to avoid touching sectors that contain synced data
** when writing, it is necessary to insert unused space between compressed
** page records. This can be done as follows:
**
**     * For less than 6 bytes of empty space, the first and last byte
**       of the free space contain the total number of free bytes. For
**       example:
**
**         Block of 4 free bytes: 0x04 0x?? 0x?? 0x04
**         Block of 2 free bytes: 0x02 0x02
**         A single free byte:    0x01
**
**     * For 6 or more bytes of empty space, a record similar to a 
**       compressed page record is added to the segment. A padding record
**       is distinguished from a compressed page record by the most 
**       significant bit of the second byte of the size field, which is
**       cleared instead of set. 
*/
................................................................................
    if( rc==LSM_OK ){
      *piNext = fsPageToBlock(pFS, lsmGetU32(&pLast->aData[pFS->nPagesize-4]));
      lsmFsPageRelease(pLast);
    }
  }
  return rc;
}

/*
** Return the page number of the last page on the same block as page iPg.
*/
Pgno fsLastPageOnPagesBlock(FileSystem *pFS, Pgno iPg){
  return fsLastPageOnBlock(pFS, fsPageToBlock(pFS, iPg));
}

/*
** This function is only called in compressed database mode.
*/
static int fsReadData(
  FileSystem *pFS,                /* File-system handle */
  i64 iOff,                       /* Read data from this offset */
................................................................................
){
  i64 iEob;                       /* End of block */
  int nRead;
  int rc;

  assert( pFS->pCompress );

  iEob = fsLastPageOnPagesBlock(pFS, iOff) + 1;
  nRead = LSM_MIN(iEob - iOff, nData);

  rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aData, nRead);
  if( rc==LSM_OK && nRead!=nData ){
    int iBlk;

    rc = fsBlockNext(pFS, fsPageToBlock(pFS, iOff), &iBlk);
................................................................................
static int fsAddOffset(FileSystem *pFS, i64 iOff, int iAdd, i64 *piRes){
  i64 iEob;
  int iBlk;
  int rc;

  assert( pFS->pCompress );

  iEob = fsLastPageOnPagesBlock(pFS, iOff);
  if( (iOff+iAdd)<=iEob ){
    *piRes = (iOff+iAdd);
    return LSM_OK;
  }

  rc = fsBlockNext(pFS, fsPageToBlock(pFS, iOff), &iBlk);
  *piRes = fsFirstPageOnBlock(pFS, iBlk) + iAdd - (iEob - iOff + 1);
................................................................................

  if( fsAllocateBuffer(pFS) ) return LSM_NOMEM;

  rc = fsReadData(pFS, iOff, aSz, sizeof(aSz));

  if( rc==LSM_OK ){
    int bFree;
    if( aSz[0] & 0x80 ){
      pPg->nCompress = (int)getRecordSize(aSz, &bFree);
    }else{
      pPg->nCompress = (int)aSz[0] - sizeof(aSz)*2;
      bFree = 1;
    }
    if( bFree ){
      if( pnSpace ){
        *pnSpace = pPg->nCompress + sizeof(aSz)*2;
      }else{
        rc = LSM_CORRUPT_BKPT;
      }
    }else{
................................................................................
static int fsGetPageBefore(FileSystem *pFS, i64 iOff, Pgno *piPrev){
  u8 aSz[3];
  int rc;
  i64 iRead;

  rc = fsSubtractOffset(pFS, iOff, sizeof(aSz), &iRead);
  if( rc==LSM_OK ) rc = fsReadData(pFS, iRead, aSz, sizeof(aSz));

  if( rc==LSM_OK ){
    int bFree;
    int nSz;
    if( aSz[2] & 0x80 ){
      nSz = getRecordSize(aSz, &bFree) + sizeof(aSz)*2;
    }else{
      nSz = (int)(aSz[2] & 0x7F);
      bFree = 1;
    }
    rc = fsSubtractOffset(pFS, iOff, nSz, piPrev);
  }

  return rc;
}

/*
** The first argument to this function is a valid reference to a database
................................................................................
    ** If it is, then an extra block has already been allocated for this run.
    ** Shift this extra block back to the free-block list. 
    **
    ** Otherwise, add the first free page in the last block used by the run
    ** to the lAppend list.
    */
    iBlk = fsPageToBlock(pFS, p->iLastPg);
    if( fsLastPageOnPagesBlock(pFS, p->iLastPg)!=p->iLastPg ){
      int i;
      Pgno *aiAppend = pFS->pDb->pWorker->aiAppend;
      for(i=0; i<LSM_APPLIST_SZ; i++){
        if( aiAppend[i]==0 ){
          aiAppend[i] = p->iLastPg+1;
          break;
        }
................................................................................
        rc = lsmBlockAllocate(pFS->pDb, &iBlk);
        pSeg->iFirst = iApp = fsFirstPageOnBlock(pFS, iBlk);
      }
    }
    iRet = iApp;

    /* Write as much data as is possible at iApp (usually all of it). */
    iLastOnBlock = fsLastPageOnPagesBlock(pFS, iApp);
    if( rc==LSM_OK ){
      int nSpace = iLastOnBlock - iApp + 1;
      nWrite = LSM_MIN(nData, nSpace);
      nRem = nData - nWrite;
      assert( nWrite>=0 );
      if( nWrite!=0 ){
        rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iApp, aData, nWrite);
................................................................................
    pFS->nWrite++;
  }

  return rc;
}

/*
** For non-compressed databases, this function is a no-op. For compressed
** databases, it adds a padding record to the segment passed as the third
** argument.
**
** The size of the padding records is selected so that the last byte 
** written is the last byte of a disk sector. This means that if a 
** snapshot is taken and checkpointed, subsequent worker processes will
** not write to any sector that contains checkpointed data.
*/
int lsmFsSortedPadding(
  FileSystem *pFS, 
  Snapshot *pSnapshot,
  Segment *pSeg
){
  int rc = LSM_OK;
  if( pFS->pCompress ){
    Pgno iLast2;
    Pgno iLast = pSeg->iLastPg;     /* Current last page of segment */
    int nPad;                       /* Bytes of padding required */
    u8 aSz[3];








    iLast2 = (1 + iLast/pFS->szSector) * pFS->szSector - 1;
    assert( fsPageToBlock(pFS, iLast)==fsPageToBlock(pFS, iLast2) );
    nPad = iLast2 - iLast;

    if( iLast2>fsLastPageOnPagesBlock(pFS, iLast) ){
      nPad -= 4;


    }

    assert( nPad>=0 );





    if( nPad>=6 ){
      pSeg->nSize += nPad;
      nPad -= 6;
      putRecordSize(aSz, nPad, 1);

      fsAppendData(pFS, pSeg, aSz, sizeof(aSz), &rc);
      memset(pFS->aBuffer, 0, nPad);
      fsAppendData(pFS, pSeg, pFS->aBuffer, nPad, &rc);
      fsAppendData(pFS, pSeg, aSz, sizeof(aSz), &rc);
    }else if( nPad>0 ){
      u8 aBuf[5] = {0,0,0,0,0};
      aBuf[0] = (u8)nPad;
      aBuf[nPad-1] = (u8)nPad;
      fsAppendData(pFS, pSeg, aBuf, nPad, &rc);
    }

    assert( rc!=LSM_OK 
        || pSeg->iLastPg==fsLastPageOnPagesBlock(pFS, pSeg->iLastPg)
        || ((pSeg->iLastPg + 1) % pFS->szSector)==0
    );
  }

  return rc;
}


................................................................................
        if( iBlk!=iLastBlk ){
          fsBlockNext(pFS, iBlk, &iBlk);
        }else{
          iBlk = 0;
        }
      }

      if( bExtra && iLast==fsLastPageOnPagesBlock(pFS, iLast) ){
        fsBlockNext(pFS, iLastBlk, &iBlk);
        aUsed[iBlk-1] = 1;
      }
    }
  }
}