Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Do not open the log file until it is first written or read. This ensures that it is not opened before locks that prevent other processes from unlinking it have been obtained.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | multi-process
Files: files | file ages | folders
SHA1: 0f28f8317b4fe76e9bee0af3335591bda20a5664
User & Date: dan 2012-09-01 12:04:58.839
Context
2012-09-01
16:39
Fix a problem allowing shared memory to be recycled too early. check-in: c2f247c372 user: dan tags: multi-process
12:04
Do not open the log file until it is first written or read. This ensures that it is not opened before locks that prevent other processes from unlinking it have been obtained. check-in: 0f28f8317b user: dan tags: multi-process
11:09
When loading free-list elements from the lsm, merge them into the existing free-list so that free blocks are always sorted from least to most recently used. check-in: 4a2be461c1 user: dan tags: multi-process
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/lsm_ckpt.c.
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
  lsm_db *pDb,
  Freelist *pFreelist
){
  int rc;
  int nVal = 0;
  void *pVal = 0;
  assert( lsmShmAssertWorker(pDb) );
  
  /* Load the blob of data from the LSM. If that is successful (and the
  ** blob is greater than zero bytes in size), decode the contents and
  ** merge them into the current contents of *pFreelist.  */
  rc = lsmSortedLoadFreelist(pDb, &pVal, &nVal);
  if( pVal ){
    u32 *aFree = (u32 *)pVal;
    int nFree = nVal / sizeof(int);







|







727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
  lsm_db *pDb,
  Freelist *pFreelist
){
  int rc;
  int nVal = 0;
  void *pVal = 0;
  assert( lsmShmAssertWorker(pDb) );

  /* Load the blob of data from the LSM. If that is successful (and the
  ** blob is greater than zero bytes in size), decode the contents and
  ** merge them into the current contents of *pFreelist.  */
  rc = lsmSortedLoadFreelist(pDb, &pVal, &nVal);
  if( pVal ){
    u32 *aFree = (u32 *)pVal;
    int nFree = nVal / sizeof(int);
Changes to src/lsm_file.c.
29
30
31
32
33
34
35
36

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
**   exist - since it would always overlap with the meta pages. If the 
**   page-size is (say) 512 bytes, then the first usable page in the database
**   is page 33.
**
**   It is assumed that the first two meta pages and the data that follows
**   them are located on different disk sectors. So that if a power failure 
**   while writing to a meta page there is no risk of damage to the other
**   meta page or any other part of the database file.

**
** Blocks:
**
**   The database file is also divided into blocks. The default block size is
**   2MB. When writing to the database file, an attempt is made to write data
**   in contiguous block-sized chunks.
**
**   The first and last page on each block are special in that they are 4 
**   bytes smaller than all other pages. This is because the last four bytes 
**   of space on the first and last pages of each block are reserved for a 
**   pointers to other blocks (i.e. a 32-bit block number).
**
** Runs:
**
**   A run is a sequence of pages that the upper layer uses to store a 
**   sorted array of database keys (and accompanying data - values, FC 
**   pointers and so on). Given a page within a run, it is possible to







|
>









|







29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
**   exist - since it would always overlap with the meta pages. If the 
**   page-size is (say) 512 bytes, then the first usable page in the database
**   is page 33.
**
**   It is assumed that the first two meta pages and the data that follows
**   them are located on different disk sectors. So that if a power failure 
**   while writing to a meta page there is no risk of damage to the other
**   meta page or any other part of the database file. TODO: This may need
**   to be revisited.
**
** Blocks:
**
**   The database file is also divided into blocks. The default block size is
**   2MB. When writing to the database file, an attempt is made to write data
**   in contiguous block-sized chunks.
**
**   The first and last page on each block are special in that they are 4 
**   bytes smaller than all other pages. This is because the last four bytes 
**   of space on the first and last pages of each block are reserved for
**   pointers to other blocks (i.e. a 32-bit block number).
**
** Runs:
**
**   A run is a sequence of pages that the upper layer uses to store a 
**   sorted array of database keys (and accompanying data - values, FC 
**   pointers and so on). Given a page within a run, it is possible to
73
74
75
76
77
78
79

80
81
82
83
84
85
86
** THE LOG FILE 
**
** This file opens and closes the log file. But it does not contain any
** logic related to the log file format. Instead, it exports the following
** functions that are used by the code in lsm_log.c to read and write the
** log file:
**

**     lsmFsWriteLog
**     lsmFsSyncLog
**     lsmFsReadLog
**     lsmFsTruncateLog
**     lsmFsCloseAndDeleteLog
**
*/







>







74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
** THE LOG FILE 
**
** This file opens and closes the log file. But it does not contain any
** logic related to the log file format. Instead, it exports the following
** functions that are used by the code in lsm_log.c to read and write the
** log file:
**
**     lsmFsOpenLog
**     lsmFsWriteLog
**     lsmFsSyncLog
**     lsmFsReadLog
**     lsmFsTruncateLog
**     lsmFsCloseAndDeleteLog
**
*/
240
241
242
243
244
245
246

247
248
249
250
251
252
253

254
255
256
257
258
259
260
261
262

263
264
265
266
267
268
269
}

/*
** Write the contents of string buffer pStr into the log file, starting at
** offset iOff.
*/
int lsmFsWriteLog(FileSystem *pFS, i64 iOff, LsmString *pStr){

  return lsmEnvWrite(pFS->pEnv, pFS->fdLog, iOff, pStr->z, pStr->n);
}

/*
** fsync() the log file.
*/
int lsmFsSyncLog(FileSystem *pFS){

  return lsmEnvSync(pFS->pEnv, pFS->fdLog);
}

/*
** Read nRead bytes of data starting at offset iOff of the log file. Store
** the results in string buffer pStr.
*/
int lsmFsReadLog(FileSystem *pFS, i64 iOff, int nRead, LsmString *pStr){
  int rc;                         /* Return code */

  rc = lsmStringExtend(pStr, nRead);
  if( rc==LSM_OK ){
    rc = lsmEnvRead(pFS->pEnv, pFS->fdLog, iOff, &pStr->z[pStr->n], nRead);
    pStr->n += nRead;
  }
  return rc;
}







>







>




|
|



>







242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
}

/*
** Write the contents of string buffer pStr into the log file, starting at
** offset iOff.
*/
int lsmFsWriteLog(FileSystem *pFS, i64 iOff, LsmString *pStr){
  assert( pFS->fdLog );
  return lsmEnvWrite(pFS->pEnv, pFS->fdLog, iOff, pStr->z, pStr->n);
}

/*
** fsync() the log file.
*/
int lsmFsSyncLog(FileSystem *pFS){
  assert( pFS->fdLog );
  return lsmEnvSync(pFS->pEnv, pFS->fdLog);
}

/*
** Read nRead bytes of data starting at offset iOff of the log file. Append
** the results to string buffer pStr.
*/
int lsmFsReadLog(FileSystem *pFS, i64 iOff, int nRead, LsmString *pStr){
  int rc;                         /* Return code */
  assert( pFS->fdLog );
  rc = lsmStringExtend(pStr, nRead);
  if( rc==LSM_OK ){
    rc = lsmEnvRead(pFS->pEnv, pFS->fdLog, iOff, &pStr->z[pStr->n], nRead);
    pStr->n += nRead;
  }
  return rc;
}
319
320
321
322
323
324
325

















326
327
328
329
330
331
332
    }else{
      *pRc = lsmEnvOpen(pFS->pEnv, zName, &pFile);
    }
    lsmFree(pFS->pEnv, zName);
  }
  return pFile;
}


















/*
** Open a connection to a database stored within the file-system (the
** "system of files").
*/
int lsmFsOpen(lsm_db *pDb, const char *zDb){
  FileSystem *pFS;







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
    }else{
      *pRc = lsmEnvOpen(pFS->pEnv, zName, &pFile);
    }
    lsmFree(pFS->pEnv, zName);
  }
  return pFile;
}

/*
** If it is not already open, this function opens the log file. It returns
** LSM_OK if successful (or if the log file was already open) or an LSM
** error code otherwise.
**
** The log file must be opened before any of the following may be called:
**
**     lsmFsWriteLog
**     lsmFsSyncLog
**     lsmFsReadLog
*/
int lsmFsOpenLog(FileSystem *pFS){
  int rc = LSM_OK;
  if( 0==pFS->fdLog ){ pFS->fdLog = fsOpenFile(pFS, 1, &rc); }
  return rc;
}

/*
** Open a connection to a database stored within the file-system (the
** "system of files").
*/
int lsmFsOpen(lsm_db *pDb, const char *zDb){
  FileSystem *pFS;
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365

    /* Allocate the hash-table here. At some point, it should be changed
    ** so that it can grow dynamicly. */
    pFS->nCacheMax = 2048;
    pFS->nHash = 4096;
    pFS->apHash = lsmMallocZeroRc(pDb->pEnv, sizeof(Page *) * pFS->nHash, &rc);

    /* Open the files */
    pFS->fdDb = fsOpenFile(pFS, 0, &rc);
    pFS->fdLog = fsOpenFile(pFS, 1, &rc);

    if( rc!=LSM_OK ){
      lsmFsClose(pFS);
      pFS = 0;
    }
  }








|

<







371
372
373
374
375
376
377
378
379

380
381
382
383
384
385
386

    /* Allocate the hash-table here. At some point, it should be changed
    ** so that it can grow dynamicly. */
    pFS->nCacheMax = 2048;
    pFS->nHash = 4096;
    pFS->apHash = lsmMallocZeroRc(pDb->pEnv, sizeof(Page *) * pFS->nHash, &rc);

    /* Open the database file */
    pFS->fdDb = fsOpenFile(pFS, 0, &rc);


    if( rc!=LSM_OK ){
      lsmFsClose(pFS);
      pFS = 0;
    }
  }

Changes to src/lsm_log.c.
306
307
308
309
310
311
312

313
314
315
316
317
318
319
*/
int lsmLogBegin(lsm_db *pDb){
  int rc = LSM_OK;
  LogWriter *pNew;
  LogRegion *aReg;

  if( pDb->bUseLog==0 ) return LSM_OK;

  pNew = lsmMallocZeroRc(pDb->pEnv, sizeof(LogWriter), &rc);
  if( pNew ){
    lsmStringInit(&pNew->buf, pDb->pEnv);
    rc = lsmStringExtend(&pNew->buf, 2);
  }
  if( rc!=LSM_OK ){
    assert( pNew==0 || pNew->buf.z==0 );







>







306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
*/
int lsmLogBegin(lsm_db *pDb){
  int rc = LSM_OK;
  LogWriter *pNew;
  LogRegion *aReg;

  if( pDb->bUseLog==0 ) return LSM_OK;
  rc = lsmFsOpenLog(pDb->pFS);
  pNew = lsmMallocZeroRc(pDb->pEnv, sizeof(LogWriter), &rc);
  if( pNew ){
    lsmStringInit(&pNew->buf, pDb->pEnv);
    rc = lsmStringExtend(&pNew->buf, 2);
  }
  if( rc!=LSM_OK ){
    assert( pNew==0 || pNew->buf.z==0 );
889
890
891
892
893
894
895



896
897
898
899
900
901
902
  LsmString buf2;                 /* Value buffer */
  LogReader reader;               /* Log reader object */
  int rc = LSM_OK;                /* Return code */
  int nCommit = 0;                /* Number of transactions to recover */
  int iPass;
  int nJump = 0;                  /* Number of LSM_LOG_JUMP records in pass 0 */
  DbLog *pLog;




  lsmTreeInit(pDb);
  pLog = &pDb->treehdr.log;
  lsmCheckpointLogoffset(pDb->pShmhdr->aWorker, pLog);

  logReaderInit(pDb, pLog, 1, &reader);
  lsmStringInit(&buf1, pDb->pEnv);







>
>
>







890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
  LsmString buf2;                 /* Value buffer */
  LogReader reader;               /* Log reader object */
  int rc = LSM_OK;                /* Return code */
  int nCommit = 0;                /* Number of transactions to recover */
  int iPass;
  int nJump = 0;                  /* Number of LSM_LOG_JUMP records in pass 0 */
  DbLog *pLog;

  rc = lsmFsOpenLog(pDb->pFS);
  if( rc!=LSM_OK ) return rc;

  lsmTreeInit(pDb);
  pLog = &pDb->treehdr.log;
  lsmCheckpointLogoffset(pDb->pShmhdr->aWorker, pLog);

  logReaderInit(pDb, pLog, 1, &reader);
  lsmStringInit(&buf1, pDb->pEnv);
Changes to src/lsm_main.c.
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
    ** than one purpose - to open both the database and log files, and 
    ** perhaps to unlink the log file during disconnection. An absolute
    ** path is required to ensure that the correct files are operated
    ** on even if the application changes the cwd.  */
    rc = getFullpathname(pDb->pEnv, zFilename, &zFull);
    assert( rc==LSM_OK || zFull==0 );

    /* Open the database and log files. 
    **
    ** TODO: Opening the log file before calling DbDatabaseConnect() is 
    ** incorrect. Some other connection could unlink() it. Should change
    ** the FileSystem object to open the log file lazily.
    */
    if( rc==LSM_OK ){
      rc = lsmFsOpen(pDb, zFull);
    }

    /* Connect to the database */
    if( rc==LSM_OK ){
      rc = lsmDbDatabaseConnect(pDb, zFilename);







|
<
<
<
<
<







168
169
170
171
172
173
174
175





176
177
178
179
180
181
182
    ** than one purpose - to open both the database and log files, and 
    ** perhaps to unlink the log file during disconnection. An absolute
    ** path is required to ensure that the correct files are operated
    ** on even if the application changes the cwd.  */
    rc = getFullpathname(pDb->pEnv, zFilename, &zFull);
    assert( rc==LSM_OK || zFull==0 );

    /* Open the database file. */





    if( rc==LSM_OK ){
      rc = lsmFsOpen(pDb, zFull);
    }

    /* Connect to the database */
    if( rc==LSM_OK ){
      rc = lsmDbDatabaseConnect(pDb, zFilename);