Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Avoid malloc calls in lsm_file.c when running in mmap mode. Also avoid many mutex operations when accessing the in-memory tree.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 1e661d0bad0322082383fe0e6d68fc8b4cd24faa
User & Date: dan 2012-09-18 19:39:51.968
Context
2012-09-20
19:33
Add lsm_tree_size() and lsm_ckpt_size(). check-in: 5062ffb017 user: dan tags: trunk
2012-09-18
19:39
Avoid malloc calls in lsm_file.c when running in mmap mode. Also avoid many mutex operations when accessing the in-memory tree. check-in: 1e661d0bad user: dan tags: trunk
15:48
Fix a bug preventing block recycling in multi-threaded tests. check-in: 93d9ff7c12 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to src/lsm_file.c.
125
126
127
128
129
130
131

132
133
134
135
136
137
138
  lsm_file *fdDb;                 /* Database file */
  lsm_file *fdLog;                /* Log file */

  /* mmap() mode things */
  int bUseMmap;                   /* True to use mmap() to access db file */
  void *pMap;                     /* Current mapping of database file */
  i64 nMap;                       /* Bytes mapped at pMap */


  /* Statistics */
  int nWrite;                     /* Total number of pages written */
  int nRead;                      /* Total number of pages read */

  /* Page cache parameters for non-mmap() mode */
  int nOut;                       /* Number of outstanding pages */







>







125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
  lsm_file *fdDb;                 /* Database file */
  lsm_file *fdLog;                /* Log file */

  /* mmap() mode things */
  int bUseMmap;                   /* True to use mmap() to access db file */
  void *pMap;                     /* Current mapping of database file */
  i64 nMap;                       /* Bytes mapped at pMap */
  Page *pFree;

  /* Statistics */
  int nWrite;                     /* Total number of pages written */
  int nRead;                      /* Total number of pages read */

  /* Page cache parameters for non-mmap() mode */
  int nOut;                       /* Number of outstanding pages */
721
722
723
724
725
726
727
728
729
730

731
732
733


734
735
736
737
738
739
740
741
742
743
744
745
746
747

748


749
750



751


752





753

754
755
756





757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789

790
791
792
793
794
795
796
){
  Page *p;
  int iHash;
  int rc = LSM_OK;

  assert( iPg>=fsFirstPageOnBlock(pFS, 1) );

  /* Search the hash-table for the page */
  iHash = fsHashKey(pFS->nHash, iPg);
  for(p=pFS->apHash[iHash]; p; p=p->pHashNext){

    if( p->iPg==iPg) break;
  }



  if( p==0 ){
    /* Set bRequireData to true if a buffer allocated by malloc() is required
    ** to store the page data (the alternative is to have the Page object
    ** carry a pointer into the mapped region at FileSystem.pMap). In 
    ** non-mmap mode, this should always be true. In mmap mode, it should
    ** always be false for readable pages (noContent==0), but may be set
    ** to either true or false for appended pages (noContent==1). Setting
    ** it to true in this case causes LSM to do "double-buffered" writes. */
    int bRequireData = (pFS->bUseMmap==0);

    rc = fsPageBuffer(pFS, bRequireData, &p);
    if( rc==LSM_OK ){
      p->iPg = iPg;
      p->nRef = 0;

      p->pFS = pFS;


      assert( p->flags==0 || p->flags==PAGE_FREE );
      if( fsIsLast(pFS, iPg) || fsIsFirst(pFS, iPg) ) p->flags |= PAGE_SHORT;






      if( pFS->bUseMmap && bRequireData==0 ){





        i64 iEnd = (i64)iPg * pFS->nPagesize;

        fsGrowMapping(pFS, iEnd, &rc);
        if( rc==LSM_OK ){
          p->aData = &((u8 *)pFS->pMap)[pFS->nPagesize * (i64)(iPg-1)];





        }
      }else{
#ifdef LSM_DEBUG
        memset(p->aData, 0x56, pFS->nPagesize);
#endif
        assert( p->pLruNext==0 && p->pLruPrev==0 );
        if( noContent==0 ){
          int nByte = pFS->nPagesize;
          i64 iOff;

          iOff = (i64)(iPg-1) * pFS->nPagesize;
          rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, p->aData, nByte);
          pFS->nRead++;
        }
      }

      /* If the xRead() call was successful (or not attempted), link the
      ** page into the page-cache hash-table. Otherwise, if it failed,
      ** free the buffer. */
      if( rc==LSM_OK ){
        p->pHashNext = pFS->apHash[iHash];
        p->nData =  pFS->nPagesize - (p->flags & PAGE_SHORT);
        pFS->apHash[iHash] = p;
      }else{
        fsPageBufferFree(p);
        p = 0;
      }
    }
  }else if( p->nRef==0 && pFS->bUseMmap==0 ){
    fsPageRemoveFromLru(pFS, p);
  }

  assert( (rc==LSM_OK && p) || (rc!=LSM_OK && p==0) );

  if( rc==LSM_OK ){
    pFS->nOut += (p->nRef==0);
    p->nRef++;
  }
  *ppPg = p;
  return rc;
}







<
|
|
>
|
|
|
>
>
|
<
<
|
<
<
<
<
<
|
<
|
<
<
>

>
>
|
|
>
>
>
|
>
>
|
>
>
>
>
>
|
>
|
|
<
>
>
>
>
>
|
<












|
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
>







722
723
724
725
726
727
728

729
730
731
732
733
734
735
736
737


738





739

740


741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762

763
764
765
766
767
768

769
770
771
772
773
774
775
776
777
778
779
780
781

782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
){
  Page *p;
  int iHash;
  int rc = LSM_OK;

  assert( iPg>=fsFirstPageOnBlock(pFS, 1) );


  if( pFS->bUseMmap ){
    i64 iEnd = (i64)iPg * pFS->nPagesize;
    fsGrowMapping(pFS, iEnd, &rc);
    if( rc!=LSM_OK ) return rc;

    if( pFS->pFree ){
      p = pFS->pFree;
      pFS->pFree = p->pHashNext;
      assert( p->nRef==0 );


    }else{





      p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);

      if( rc ) return rc;


      fsPageAddToLru(pFS, p);
      p->pFS = pFS;
    }
    p->aData = &((u8 *)pFS->pMap)[pFS->nPagesize * (i64)(iPg-1)];
    p->iPg = iPg;
    if( fsIsLast(pFS, iPg) || fsIsFirst(pFS, iPg) ){
      p->flags = PAGE_SHORT;
    }else{
      p->flags = 0;
    }
    p->nData = pFS->nPagesize - (p->flags & PAGE_SHORT);
  }else{

    /* Search the hash-table for the page */
    iHash = fsHashKey(pFS->nHash, iPg);
    for(p=pFS->apHash[iHash]; p; p=p->pHashNext){
      if( p->iPg==iPg) break;
    }

    if( p==0 ){
      rc = fsPageBuffer(pFS, 1, &p);
      if( rc==LSM_OK ){

        p->iPg = iPg;
        p->nRef = 0;
        p->pFS = pFS;
        assert( p->flags==0 || p->flags==PAGE_FREE );
        if( fsIsLast(pFS, iPg) || fsIsFirst(pFS, iPg) ) p->flags |= PAGE_SHORT;


#ifdef LSM_DEBUG
        memset(p->aData, 0x56, pFS->nPagesize);
#endif
        assert( p->pLruNext==0 && p->pLruPrev==0 );
        if( noContent==0 ){
          int nByte = pFS->nPagesize;
          i64 iOff;

          iOff = (i64)(iPg-1) * pFS->nPagesize;
          rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, p->aData, nByte);
          pFS->nRead++;
        }


        /* If the xRead() call was successful (or not attempted), link the
         ** page into the page-cache hash-table. Otherwise, if it failed,
         ** free the buffer. */
        if( rc==LSM_OK ){
          p->pHashNext = pFS->apHash[iHash];
          p->nData =  pFS->nPagesize - (p->flags & PAGE_SHORT);
          pFS->apHash[iHash] = p;
        }else{
          fsPageBufferFree(p);
          p = 0;
        }
      }
    }else if( p->nRef==0 ){
      fsPageRemoveFromLru(pFS, p);
    }

    assert( (rc==LSM_OK && p) || (rc!=LSM_OK && p==0) );
  }
  if( rc==LSM_OK ){
    pFS->nOut += (p->nRef==0);
    p->nRef++;
  }
  *ppPg = p;
  return rc;
}
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297

1298
1299


1300
1301
1302

1303
1304
1305
1306
1307
1308
1309
  int rc = LSM_OK;
  if( pPg ){
    assert( pPg->nRef>0 );
    pPg->nRef--;
    if( pPg->nRef==0 && pPg->iPg!=0 ){
      FileSystem *pFS = pPg->pFS;
      rc = lsmFsPagePersist(pPg);

      pFS->nOut--;
      assert( pFS->bUseMmap || pPg->pLruNext==0 );
      assert( pFS->bUseMmap || pPg->pLruPrev==0 );
#if 0

      fsPageAddToLru(pFS, pPg);
#else


      fsPageRemoveFromHash(pFS, pPg);
      fsPageBufferFree(pPg);
#endif

    }
  }

  return rc;
}

/*







<

|
|
<
>
|
|
>
>
|
|
<
>







1296
1297
1298
1299
1300
1301
1302

1303
1304
1305

1306
1307
1308
1309
1310
1311
1312

1313
1314
1315
1316
1317
1318
1319
1320
  int rc = LSM_OK;
  if( pPg ){
    assert( pPg->nRef>0 );
    pPg->nRef--;
    if( pPg->nRef==0 && pPg->iPg!=0 ){
      FileSystem *pFS = pPg->pFS;
      rc = lsmFsPagePersist(pPg);

      pFS->nOut--;

      if( pFS->bUseMmap ){

        pPg->pHashNext = pFS->pFree;
        pFS->pFree = pPg;
      }else{
        assert( pPg->pLruNext==0 );
        assert( pPg->pLruPrev==0 );
        fsPageRemoveFromHash(pFS, pPg);
        fsPageBufferFree(pPg);

      }
    }
  }

  return rc;
}

/*
Changes to src/lsm_main.c.
81
82
83
84
85
86
87

88
89
90
91
92
93
94
  pDb->nDfltPgsz = LSM_PAGE_SIZE;
  pDb->nDfltBlksz = LSM_BLOCK_SIZE;
  pDb->nMerge = LSM_DEFAULT_NMERGE;
  pDb->nMaxFreelist = LSM_MAX_FREELIST_ENTRIES;
  pDb->bUseLog = 1;
  pDb->iReader = -1;
  pDb->bMultiProc = 1;

  return LSM_OK;
}

lsm_env *lsm_get_env(lsm_db *pDb){
  assert( pDb->pEnv );
  return pDb->pEnv;
}







>







81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
  pDb->nDfltPgsz = LSM_PAGE_SIZE;
  pDb->nDfltBlksz = LSM_BLOCK_SIZE;
  pDb->nMerge = LSM_DEFAULT_NMERGE;
  pDb->nMaxFreelist = LSM_MAX_FREELIST_ENTRIES;
  pDb->bUseLog = 1;
  pDb->iReader = -1;
  pDb->bMultiProc = 1;
  pDb->bMmap = LSM_IS_64_BIT;
  return LSM_OK;
}

lsm_env *lsm_get_env(lsm_db *pDb){
  assert( pDb->pEnv );
  return pDb->pEnv;
}
250
251
252
253
254
255
256

257
258
259
260
261
262
263
    assert_db_state(pDb);
    if( pDb->pCsr || pDb->nTransOpen ){
      rc = LSM_MISUSE_BKPT;
    }else{
      lsmDbDatabaseRelease(pDb);
      lsmFsClose(pDb->pFS);
      lsmFree(pDb->pEnv, pDb->aTrans);

      lsmFree(pDb->pEnv, pDb);
    }
  }
  return rc;
}

int lsm_config(lsm_db *pDb, int eParam, ...){







>







251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
    assert_db_state(pDb);
    if( pDb->pCsr || pDb->nTransOpen ){
      rc = LSM_MISUSE_BKPT;
    }else{
      lsmDbDatabaseRelease(pDb);
      lsmFsClose(pDb->pFS);
      lsmFree(pDb->pEnv, pDb->aTrans);
      lsmFree(pDb->pEnv, pDb->apShm);
      lsmFree(pDb->pEnv, pDb);
    }
  }
  return rc;
}

int lsm_config(lsm_db *pDb, int eParam, ...){
Changes to src/lsm_shared.c.
956
957
958
959
960
961
962













963
964
965
966
967
968
969
** starting from 0 (i.e. the header chunk is chunk 0).
*/
int lsmShmChunk(lsm_db *db, int iChunk, void **ppData){
  int rc = LSM_OK;
  void *pRet = 0;
  Database *p = db->pDatabase;
  lsm_env *pEnv = db->pEnv;














  /* Enter the client mutex */
  assert( iChunk>=0 );
  lsmMutexEnter(pEnv, p->pClientMutex);

  if( iChunk>=p->nShmChunk ){
    int nNew = iChunk+1;







>
>
>
>
>
>
>
>
>
>
>
>
>







956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
** starting from 0 (i.e. the header chunk is chunk 0).
*/
int lsmShmChunk(lsm_db *db, int iChunk, void **ppData){
  int rc = LSM_OK;
  void *pRet = 0;
  Database *p = db->pDatabase;
  lsm_env *pEnv = db->pEnv;

  while( iChunk>=db->nShm ){
    void **apShm;
    apShm = lsmRealloc(pEnv, db->apShm, sizeof(void*)*(db->nShm+16));
    if( !apShm ) return LSM_NOMEM_BKPT;
    memset(&apShm[db->nShm], 0, sizeof(void*)*16);
    db->apShm = apShm;
    db->nShm += 16;
  }
  if( db->apShm[iChunk] ){
    *ppData = db->apShm[iChunk];
    return rc;
  }

  /* Enter the client mutex */
  assert( iChunk>=0 );
  lsmMutexEnter(pEnv, p->pClientMutex);

  if( iChunk>=p->nShmChunk ){
    int nNew = iChunk+1;
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
  if( rc==LSM_OK ){
    pRet = p->apShmChunk[iChunk];
  }

  /* Release the client mutex */
  lsmMutexLeave(pEnv, p->pClientMutex);

  *ppData = pRet; 
  return rc;
}

/*
** Attempt to obtain the lock identified by the iLock and bExcl parameters.
** If successful, return LSM_OK. If the lock cannot be obtained because 
** there exists some other conflicting lock, return LSM_BUSY. If some other







|







1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
  if( rc==LSM_OK ){
    pRet = p->apShmChunk[iChunk];
  }

  /* Release the client mutex */
  lsmMutexLeave(pEnv, p->pClientMutex);

  *ppData = db->apShm[iChunk] = pRet; 
  return rc;
}

/*
** Attempt to obtain the lock identified by the iLock and bExcl parameters.
** If successful, return LSM_OK. If the lock cannot be obtained because 
** there exists some other conflicting lock, return LSM_BUSY. If some other
Changes to tool/lsmperf.tcl.
154
155
156
157
158
159
160
161
162

163
164
165
166
167
  append script $data2
  append script $data3

  append script "pause -1\n"
  exec_gnuplot_script $script $zPng
}

do_write_test x.png 60 10000 20000 1000 {
  LSM   "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0 worker_nmerge=2" 

}











|
|
>





154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
  append script $data2
  append script $data3

  append script "pause -1\n"
  exec_gnuplot_script $script $zPng
}

do_write_test x.png 40 20000 40000 1000 {
  LSM     "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0"
  LevelDB leveldb
}