Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fix an mmap mode bug. Improve upon multi-threaded setups in lsmtest_tdb3.c.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 21db5f73f5d8ae85b917ebe63323b40af5fbd9ec
User & Date: dan 2012-11-28 14:54:23.277
Context
2012-11-28
19:39
Further updates to multi-threaded tests. check-in: f43bee2c1b user: dan tags: trunk
14:54
Fix an mmap mode bug. Improve upon multi-threaded setups in lsmtest_tdb3.c. check-in: 21db5f73f5 user: dan tags: trunk
2012-11-23
16:41
Fix a problem in compressed database mode causing pages to be incorrectly marked as dirty. check-in: b55b092602 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to lsm-test/lsmtest_tdb3.c.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20













21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

#include "lsmtest_tdb.h"
#include "lsm.h"

#include "lsmtest.h"

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <stdio.h>

#include <sys/time.h>

typedef struct LsmDb LsmDb;
typedef struct LsmWorker LsmWorker;
typedef struct LsmFile LsmFile;

#ifdef LSM_MUTEX_PTHREADS
#include <pthread.h>













struct LsmWorker {
  LsmDb *pDb;                     /* Main database structure */
  lsm_db *pWorker;                /* Worker database handle */
  pthread_t worker_thread;        /* Worker thread */
  pthread_cond_t worker_cond;     /* Condition var the worker waits on */
  pthread_mutex_t worker_mutex;   /* Mutex used with worker_cond */
  int bDoWork;                    /* Set to true by client when there is work */
  int worker_rc;                  /* Store error code here */

  int lsm_work_flags;             /* Flags to pass to lsm_work() */
  int lsm_work_npage;             /* nPage parameter to pass to lsm_work() */
  int bCkpt;                      /* True to call lsm_checkpoint() */
};
#else
struct LsmWorker { int worker_rc; };
#endif

static void mt_shutdown(LsmDb *);

lsm_env *tdb_lsm_env(void){
  static int bInit = 0;
  static lsm_env env;



<
















>
>
>
>
>
>
>
>
>
>
>
>
>








|
<
<
|


|







1
2
3

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41


42
43
44
45
46
47
48
49
50
51
52

#include "lsmtest_tdb.h"
#include "lsm.h"

#include "lsmtest.h"

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <stdio.h>

#include <sys/time.h>

typedef struct LsmDb LsmDb;
typedef struct LsmWorker LsmWorker;
typedef struct LsmFile LsmFile;

#ifdef LSM_MUTEX_PTHREADS
#include <pthread.h>

#define LSMTEST_THREAD_CKPT      1
#define LSMTEST_THREAD_WORKER    2
#define LSMTEST_THREAD_WORKER_AC 3

/*
** There are several different types of worker threads that run in different
** test configurations, depending on the value of LsmWorker.eType.
**
**   1. Checkpointer.
**   2. Worker with auto-checkpoint.
**   3. Worker without auto-checkpoint.
*/
struct LsmWorker {
  LsmDb *pDb;                     /* Main database structure */
  lsm_db *pWorker;                /* Worker database handle */
  pthread_t worker_thread;        /* Worker thread */
  pthread_cond_t worker_cond;     /* Condition var the worker waits on */
  pthread_mutex_t worker_mutex;   /* Mutex used with worker_cond */
  int bDoWork;                    /* Set to true by client when there is work */
  int worker_rc;                  /* Store error code here */
  int eType;                      /* LSMTEST_THREAD_XXX constant */


  int bBlock;
};
#else
struct LsmWorker { int worker_rc; int bBlock; };
#endif

static void mt_shutdown(LsmDb *);

lsm_env *tdb_lsm_env(void){
  static int bInit = 0;
  static lsm_env env;
477
478
479
480
481
482
483
484
485
486
487
488







489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504

505
506
507
508
509
510
511
  return rc;
}

static int test_lsm_write(
  TestDb *pTestDb, 
  void *pKey, 
  int nKey, 
  void *pVal, 
  int nVal
){
  LsmDb *pDb = (LsmDb *)pTestDb;








  if( pDb->aWorker ){
    int nLimit = -1;
    int nSleep = 0;
    lsm_config(pDb->db, LSM_CONFIG_AUTOFLUSH, &nLimit);
    do {
      int bOld, nNew, rc;
      rc = lsm_info(pDb->db, LSM_INFO_TREE_SIZE, &bOld, &nNew);
      if( rc!=LSM_OK ) return rc;
      if( bOld==0 || nNew<nLimit ) break;
      usleep(1000);
      nSleep += 1;
    }while( 1 );
#if 0
    if( nSleep ) printf("nSleep=%d\n", nSleep);
#endif
  }


  return lsm_insert(pDb->db, pKey, nKey, pVal, nVal);
}

static int test_lsm_delete(TestDb *pTestDb, void *pKey, int nKey){
  LsmDb *pDb = (LsmDb *)pTestDb;
  return lsm_delete(pDb->db, pKey, nKey);







|




>
>
>
>
>
>
>
















>







487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
  return rc;
}

static int test_lsm_write(
  TestDb *pTestDb, 
  void *pKey, 
  int nKey, 
  void *pVal,
  int nVal
){
  LsmDb *pDb = (LsmDb *)pTestDb;

  int nSleep = 0;
  while( pDb->aWorker && pDb->aWorker[0].bBlock ){
    usleep(1000);
    nSleep++;
  }
#if 0
  if( nSleep ) printf("nSleep=%d\n", nSleep);
  if( pDb->aWorker ){
    int nLimit = -1;
    int nSleep = 0;
    lsm_config(pDb->db, LSM_CONFIG_AUTOFLUSH, &nLimit);
    do {
      int bOld, nNew, rc;
      rc = lsm_info(pDb->db, LSM_INFO_TREE_SIZE, &bOld, &nNew);
      if( rc!=LSM_OK ) return rc;
      if( bOld==0 || nNew<nLimit ) break;
      usleep(1000);
      nSleep += 1;
    }while( 1 );
#if 0
    if( nSleep ) printf("nSleep=%d\n", nSleep);
#endif
  }
#endif

  return lsm_insert(pDb->db, pKey, nKey, pVal, nVal);
}

static int test_lsm_delete(TestDb *pTestDb, void *pKey, int nKey){
  LsmDb *pDb = (LsmDb *)pTestDb;
  return lsm_delete(pDb->db, pKey, nKey);
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
    lsm_config_log(pDb->db, xLog, 0);
    lsm_config_work_hook(pDb->db, xWorkHook, (void *)pDb);

    rc = test_lsm_config_str(pDb, pDb->db, 0, zCfg, &nThread);
    if( rc==LSM_OK ) rc = lsm_open(pDb->db, zFilename);

#ifdef LSM_MUTEX_PTHREADS
    if( rc==LSM_OK && (nThread==2 || nThread==3) ){
      testLsmStartWorkers(pDb, nThread-1, zFilename, zCfg);
    }
#endif

    if( rc!=LSM_OK ){
      test_lsm_close((TestDb *)pDb);
      pDb = 0;
    }







|
|







887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
    lsm_config_log(pDb->db, xLog, 0);
    lsm_config_work_hook(pDb->db, xWorkHook, (void *)pDb);

    rc = test_lsm_config_str(pDb, pDb->db, 0, zCfg, &nThread);
    if( rc==LSM_OK ) rc = lsm_open(pDb->db, zFilename);

#ifdef LSM_MUTEX_PTHREADS
    if( rc==LSM_OK && nThread>1 ){
      testLsmStartWorkers(pDb, nThread, zFilename, zCfg);
    }
#endif

    if( rc!=LSM_OK ){
      test_lsm_close((TestDb *)pDb);
      pDb = 0;
    }
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
  }
  return 0;
}

void tdb_lsm_enable_log(TestDb *pDb, int bEnable){
  lsm_db *db = tdb_lsm(pDb);
  if( db ){
    LsmDb *p = (LsmDb *)pDb;
    int i;
    lsm_config_log(db, (bEnable ? xLog : 0), (void *)"client");
  }
}

void tdb_lsm_application_crash(TestDb *pDb){
  if( tdb_lsm(pDb) ){
    LsmDb *p = (LsmDb *)pDb;







<
<







951
952
953
954
955
956
957


958
959
960
961
962
963
964
  }
  return 0;
}

void tdb_lsm_enable_log(TestDb *pDb, int bEnable){
  lsm_db *db = tdb_lsm(pDb);
  if( db ){


    lsm_config_log(db, (bEnable ? xLog : 0), (void *)"client");
  }
}

void tdb_lsm_application_crash(TestDb *pDb){
  if( tdb_lsm(pDb) ){
    LsmDb *p = (LsmDb *)pDb;
1023
1024
1025
1026
1027
1028
1029



1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042




1043


1044
1045
1046
1047
1048
1049
1050
1051
1052
1053


1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064

1065

1066
1067
1068
1069
1070
1071
1072
  LsmWorker *p = &pDb->aWorker[iWorker];
  pthread_mutex_lock(&p->worker_mutex);
  p->bDoWork = 1;
  pthread_cond_signal(&p->worker_cond);
  pthread_mutex_unlock(&p->worker_mutex);
}




static void *worker_main(void *pArg){
  LsmWorker *p = (LsmWorker *)pArg;
  lsm_db *pWorker;                /* Connection to access db through */

  pthread_mutex_lock(&p->worker_mutex);
  while( (pWorker = p->pWorker) ){
    int rc = LSM_OK;
    int nCkpt = -1;

    /* Do some work. If an error occurs, exit. */
    pthread_mutex_unlock(&p->worker_mutex);

    if( p->bCkpt ){




      rc = lsm_checkpoint(pWorker, 0);


    }else{
      int nWrite = 0;             /* Pages written by lsm_work() call */
      int nAuto = -1;             /* Configured AUTOCHECKPOINT value */
      int nLimit = -1;            /* Configured AUTOFLUSH value */

      lsm_config(pWorker, LSM_CONFIG_AUTOFLUSH, &nLimit);
      lsm_config(pWorker, LSM_CONFIG_AUTOCHECKPOINT, &nAuto);
      do {
        int nSleep = 0;
        lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nCkpt);


        while( nAuto==0 && nCkpt>(nLimit*4) ){
          usleep(1000);
          mt_signal_worker(p->pDb, 1);
          nSleep++;
          lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nCkpt);
        }
#if 0
          if( nSleep ) printf("nLimit=%d nSleep=%d (worker)\n", nLimit, nSleep);
#endif

        rc = lsm_work(pWorker, p->lsm_work_flags, p->lsm_work_npage, &nWrite);

        if( nAuto==0 && nWrite && rc==LSM_OK ) mt_signal_worker(p->pDb, 1);

      }while( nWrite && p->pWorker );
    }
    pthread_mutex_lock(&p->worker_mutex);

    if( rc!=LSM_OK && rc!=LSM_BUSY ){
      p->worker_rc = rc;
      break;







>
>
>












|
>
>
>
>
|
>
>








<

>
>






<


<
|
>
|
>







1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076

1077
1078
1079
1080
1081
1082
1083
1084
1085

1086
1087

1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
  LsmWorker *p = &pDb->aWorker[iWorker];
  pthread_mutex_lock(&p->worker_mutex);
  p->bDoWork = 1;
  pthread_cond_signal(&p->worker_cond);
  pthread_mutex_unlock(&p->worker_mutex);
}

/*
** This routine is used as the main() for all worker threads.
*/
static void *worker_main(void *pArg){
  LsmWorker *p = (LsmWorker *)pArg;
  lsm_db *pWorker;                /* Connection to access db through */

  pthread_mutex_lock(&p->worker_mutex);
  while( (pWorker = p->pWorker) ){
    int rc = LSM_OK;
    int nCkpt = -1;

    /* Do some work. If an error occurs, exit. */
    pthread_mutex_unlock(&p->worker_mutex);

    if( p->eType==LSMTEST_THREAD_CKPT ){
      int nByte = 0;
      rc = lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nByte);
      if( rc==LSM_OK && nByte>=(2*1024*1024) ){
        if( nByte>(8*1024*1024) ) p->bBlock = 1;
        rc = lsm_checkpoint(pWorker, 0);
        p->bBlock = 0;
      }
    }else{
      int nWrite = 0;             /* Pages written by lsm_work() call */
      int nAuto = -1;             /* Configured AUTOCHECKPOINT value */
      int nLimit = -1;            /* Configured AUTOFLUSH value */

      lsm_config(pWorker, LSM_CONFIG_AUTOFLUSH, &nLimit);
      lsm_config(pWorker, LSM_CONFIG_AUTOCHECKPOINT, &nAuto);
      do {

        lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nCkpt);
#if 0
        int nSleep = 0;
        while( nAuto==0 && nCkpt>(nLimit*4) ){
          usleep(1000);
          mt_signal_worker(p->pDb, 1);
          nSleep++;
          lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nCkpt);
        }

          if( nSleep ) printf("nLimit=%d nSleep=%d (worker)\n", nLimit, nSleep);
#endif

        rc = lsm_work(pWorker, 0, 256, &nWrite);
        if( p->eType==LSMTEST_THREAD_WORKER_AC && nWrite && rc==LSM_OK ){
          mt_signal_worker(p->pDb, 1);
        }
      }while( nWrite && p->pWorker );
    }
    pthread_mutex_lock(&p->worker_mutex);

    if( rc!=LSM_OK && rc!=LSM_BUSY ){
      p->worker_rc = rc;
      break;
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
** This callback is invoked by LSM when the client database writes to
** the database file (i.e. to flush the contents of the in-memory tree).
** This implies there may be work to do on the database, so signal
** the worker threads.
*/
static void mt_client_work_hook(lsm_db *db, void *pArg){
  LsmDb *pDb = (LsmDb *)pArg;     /* LsmDb database handle */
  int i;                          /* Iterator variable */

  /* Invoke the user level work-hook, if any. */
  if( pDb->xWork ) pDb->xWork(db, pDb->pWorkCtx);

  /* Signal the lsm_work() thread */
  mt_signal_worker(pDb, 0);
}







<







1148
1149
1150
1151
1152
1153
1154

1155
1156
1157
1158
1159
1160
1161
** This callback is invoked by LSM when the client database writes to
** the database file (i.e. to flush the contents of the in-memory tree).
** This implies there may be work to do on the database, so signal
** the worker threads.
*/
static void mt_client_work_hook(lsm_db *db, void *pArg){
  LsmDb *pDb = (LsmDb *)pArg;     /* LsmDb database handle */


  /* Invoke the user level work-hook, if any. */
  if( pDb->xWork ) pDb->xWork(db, pDb->pWorkCtx);

  /* Signal the lsm_work() thread */
  mt_signal_worker(pDb, 0);
}
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160




1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
/*
** Launch worker thread iWorker for database connection pDb.
*/
static int mt_start_worker(
  LsmDb *pDb,                     /* Main database structure */
  int iWorker,                    /* Worker number to start */
  const char *zFilename,          /* File name of database to open */
  const char *zCfg,
  int flags,                      /* flags parameter to lsm_work() */
  int nPage,                      /* nPage parameter to lsm_work() */
  int bCkpt                       /* True to call lsm_checkpoint() */
){
  int rc = 0;                     /* Return code */
  LsmWorker *p;                   /* Object to initialize */

  assert( iWorker<pDb->nWorker );





  p = &pDb->aWorker[iWorker];
  p->lsm_work_flags = flags;
  p->lsm_work_npage = nPage;
  p->bCkpt = bCkpt;
  p->pDb = pDb;

  /* Open the worker connection */
  if( rc==0 ) rc = lsm_new(&pDb->env, &p->pWorker);
  if( zCfg ){
    test_lsm_config_str(pDb, p->pWorker, 1, zCfg, 0);
  }







|
|
<
<





>
>
>
>


<
<
|







1170
1171
1172
1173
1174
1175
1176
1177
1178


1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189


1190
1191
1192
1193
1194
1195
1196
1197
/*
** Launch worker thread iWorker for database connection pDb.
*/
static int mt_start_worker(
  LsmDb *pDb,                     /* Main database structure */
  int iWorker,                    /* Worker number to start */
  const char *zFilename,          /* File name of database to open */
  const char *zCfg,               /* Connection configuration string */
  int eType                       /* Type of worker thread */


){
  int rc = 0;                     /* Return code */
  LsmWorker *p;                   /* Object to initialize */

  assert( iWorker<pDb->nWorker );
  assert( eType==LSMTEST_THREAD_CKPT 
       || eType==LSMTEST_THREAD_WORKER 
       || eType==LSMTEST_THREAD_WORKER_AC 
  );

  p = &pDb->aWorker[iWorker];


  p->eType = eType;
  p->pDb = pDb;

  /* Open the worker connection */
  if( rc==0 ) rc = lsm_new(&pDb->env, &p->pWorker);
  if( zCfg ){
    test_lsm_config_str(pDb, p->pWorker, 1, zCfg, 0);
  }
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195

1196
1197

1198
1199


1200
1201
1202
1203


1204

1205
1206
1207
1208

1209


1210




1211
1212
1213
1214
1215
1216
1217
  if( rc==0 ) rc = pthread_create(&p->worker_thread, 0, worker_main, (void *)p);

  return rc;
}


static int testLsmStartWorkers(
  LsmDb *pDb, int nWorker, const char *zFilename, const char *zCfg
){
  int rc;
  int bAutowork = 0;
  assert( nWorker==1 || nWorker==2 );


  /* Configure a work-hook for the client connection. */

  lsm_config_work_hook(pDb->db, mt_client_work_hook, (void *)pDb);



  pDb->aWorker = (LsmWorker *)testMalloc(sizeof(LsmWorker) * nWorker);
  memset(pDb->aWorker, 0, sizeof(LsmWorker) * nWorker);
  pDb->nWorker = nWorker;



  if( nWorker==1 ){

    rc = mt_start_worker(pDb, 0, zFilename, zCfg, 0, 256, 0);
  }else{
    rc = mt_start_worker(pDb, 0, zFilename, zCfg, 0, 256, 0);
    if( rc==LSM_OK ){

      rc = mt_start_worker(pDb, 1, zFilename, zCfg, 0, 0, 1);


    }




  }

  return rc;
}


int test_lsm_mt2(const char *zFilename, int bClear, TestDb **ppDb){







|


|
|
>

|
>


>
>
|
|
<

>
>
|
>
|
|
|
<
>
|
>
>
|
>
>
>
>







1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230

1231
1232
1233
1234
1235
1236
1237
1238

1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
  if( rc==0 ) rc = pthread_create(&p->worker_thread, 0, worker_main, (void *)p);

  return rc;
}


static int testLsmStartWorkers(
  LsmDb *pDb, int eModel, const char *zFilename, const char *zCfg
){
  int rc;

  if( eModel<1 || eModel>4 ) return 1;
  if( eModel==1 ) return 0;

  /* Configure a work-hook for the client connection. Worker 0 is signalled
  ** every time the users connection writes to the database.  */
  lsm_config_work_hook(pDb->db, mt_client_work_hook, (void *)pDb);

  /* Allocate space for two worker connections. They may not both be
  ** used, but both are allocated.  */
  pDb->aWorker = (LsmWorker *)testMalloc(sizeof(LsmWorker) * 2);
  memset(pDb->aWorker, 0, sizeof(LsmWorker) * 2);


  switch( eModel ){
    case 2:
      pDb->nWorker = 1;
      test_lsm_config_str(0, pDb->db, 0, "autocheckpoint=0", 0);
      rc = mt_start_worker(pDb, 0, zFilename, zCfg, LSMTEST_THREAD_CKPT);
      break;


    case 3:
      pDb->nWorker = 2;
      assert( 0 );
      break;

    case 4:
      pDb->nWorker = 2;
      assert( 0 );
      break;
  }

  return rc;
}


int test_lsm_mt2(const char *zFilename, int bClear, TestDb **ppDb){
Changes to src/lsm.h.
392
393
394
395
396
397
398


399
400
401
402
403
404
405
#define LSM_INFO_ARRAY_STRUCTURE  5
#define LSM_INFO_PAGE_ASCII_DUMP  6
#define LSM_INFO_PAGE_HEX_DUMP    7
#define LSM_INFO_FREELIST         8
#define LSM_INFO_ARRAY_PAGES      9
#define LSM_INFO_CHECKPOINT_SIZE 10
#define LSM_INFO_TREE_SIZE       11




/* 
** CAPI: Opening and Closing Write Transactions
**
** These functions are used to open and close transactions and nested 
** sub-transactions.







>
>







392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
#define LSM_INFO_ARRAY_STRUCTURE  5
#define LSM_INFO_PAGE_ASCII_DUMP  6
#define LSM_INFO_PAGE_HEX_DUMP    7
#define LSM_INFO_FREELIST         8
#define LSM_INFO_ARRAY_PAGES      9
#define LSM_INFO_CHECKPOINT_SIZE 10
#define LSM_INFO_TREE_SIZE       11

#define LSM_INFO_FREELIST_SIZE   12


/* 
** CAPI: Opening and Closing Write Transactions
**
** These functions are used to open and close transactions and nested 
** sub-transactions.
Changes to src/lsmInt.h.
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
int lsmLogCommit(lsm_db *);
void lsmLogEnd(lsm_db *pDb, int bCommit);
void lsmLogTell(lsm_db *, LogMark *);
void lsmLogSeek(lsm_db *, LogMark *);

int lsmLogRecover(lsm_db *);
int lsmInfoLogStructure(lsm_db *pDb, char **pzVal);
int lsmInfoFreelist(lsm_db *, char **pzVal);


/**************************************************************************
** Functions from file "lsm_shared.c".
*/

int lsmDbDatabaseConnect(lsm_db*, const char *);







<







786
787
788
789
790
791
792

793
794
795
796
797
798
799
int lsmLogCommit(lsm_db *);
void lsmLogEnd(lsm_db *pDb, int bCommit);
void lsmLogTell(lsm_db *, LogMark *);
void lsmLogSeek(lsm_db *, LogMark *);

int lsmLogRecover(lsm_db *);
int lsmInfoLogStructure(lsm_db *pDb, char **pzVal);



/**************************************************************************
** Functions from file "lsm_shared.c".
*/

int lsmDbDatabaseConnect(lsm_db*, const char *);
Changes to src/lsm_ckpt.c.
32
33
34
35
36
37
38

39
40
41
42
43
44
45
**     2. The checkpoint id LSW.
**     3. The number of integer values in the entire checkpoint, including 
**        the two checksum values.
**     4. The total number of blocks in the database.
**     5. The block size.
**     6. The number of levels.
**     7. The nominal database page size.

**
**   Log pointer:
**
**     1. The log offset MSW.
**     2. The log offset LSW.
**     3. Log checksum 0.
**     4. Log checksum 1.







>







32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
**     2. The checkpoint id LSW.
**     3. The number of integer values in the entire checkpoint, including 
**        the two checksum values.
**     4. The total number of blocks in the database.
**     5. The block size.
**     6. The number of levels.
**     7. The nominal database page size.
**     8. The number of pages (in total) written to the database file.
**
**   Log pointer:
**
**     1. The log offset MSW.
**     2. The log offset LSW.
**     3. Log checksum 0.
**     4. Log checksum 1.
68
69
70
71
72
73
74


75
76
77
78
79
80
81
82
**     9. Current pointer value (64-bits - 2 integers).
**
**   The in-memory freelist entries. Each entry is either an insert or a
**   delete. The in-memory freelist is to the free-block-list as the
**   in-memory tree is to the users database content.
**
**     1. Number of free-list entries stored in checkpoint header.


**     2. For each entry:
**        2a. Block number of free block.
**        2b. A 64-bit integer (MSW followed by LSW). -1 for a delete entry,
**            or the associated checkpoint id for an insert.
**
**   The checksum:
**
**     1. Checksum value 1.







>
>
|







69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
**     9. Current pointer value (64-bits - 2 integers).
**
**   The in-memory freelist entries. Each entry is either an insert or a
**   delete. The in-memory freelist is to the free-block-list as the
**   in-memory tree is to the users database content.
**
**     1. Number of free-list entries stored in checkpoint header.
**     2. Number of free blocks (in total).
**     3. Total number of blocks freed during database lifetime.
**     4. For each entry:
**        2a. Block number of free block.
**        2b. A 64-bit integer (MSW followed by LSW). -1 for a delete entry,
**            or the associated checkpoint id for an insert.
**
**   The checksum:
**
**     1. Checksum value 1.
Changes to src/lsm_file.c.
1114
1115
1116
1117
1118
1119
1120

1121
1122
1123
1124









1125
1126
1127
1128
1129
1130
1131
  int rc = LSM_OK;

  assert( iPg>=fsFirstPageOnBlock(pFS, 1) );
  *ppPg = 0;

  assert( pFS->bUseMmap==0 || pFS->pCompress==0 );
  if( pFS->bUseMmap ){

    i64 iEnd = (i64)iPg * pFS->nPagesize;
    fsGrowMapping(pFS, iEnd, &rc);
    if( rc!=LSM_OK ) return rc;










    if( pFS->pFree ){
      p = pFS->pFree;
      pFS->pFree = p->pHashNext;
      assert( p->nRef==0 );
    }else{
      p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);
      if( rc ) return rc;







>




>
>
>
>
>
>
>
>
>







1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
  int rc = LSM_OK;

  assert( iPg>=fsFirstPageOnBlock(pFS, 1) );
  *ppPg = 0;

  assert( pFS->bUseMmap==0 || pFS->pCompress==0 );
  if( pFS->bUseMmap ){
    Page *pTest;
    i64 iEnd = (i64)iPg * pFS->nPagesize;
    fsGrowMapping(pFS, iEnd, &rc);
    if( rc!=LSM_OK ) return rc;

    p = 0;
    for(pTest=pFS->pWaiting; pTest; pTest=pTest->pNextWaiting){
      if( pTest->iPg==iPg ){
        p = pTest;
        p->nRef++;
        *ppPg = p;
        return LSM_OK;
      }
    }
    if( pFS->pFree ){
      p = pFS->pFree;
      pFS->pFree = p->pHashNext;
      assert( p->nRef==0 );
    }else{
      p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);
      if( rc ) return rc;
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990

1991
1992
1993


1994
1995
1996
1997
1998
1999
2000
        ** lsmFsPagePersist() to write an out-of-order page. Instead a page 
        ** number is assigned here so that the page data will be appended
        ** to the current segment.
        */
        Page **pp;
        int iPrev = 0;
        int iNext = 0;
        int iHash;

        assert( pPg->pSeg->iFirst );
        assert( pPg->flags & PAGE_FREE );
        assert( (pPg->flags & PAGE_HASPREV)==0 );
        assert( pPg->nData==pFS->nPagesize-4 );

        rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext);
        if( rc!=LSM_OK ) return rc;


        iHash = fsHashKey(pFS->nHash, pPg->iPg);
        pPg->pHashNext = pFS->apHash[iHash];
        pFS->apHash[iHash] = pPg;



        if( iPrev ){
          assert( iNext==0 );
          memmove(&pPg->aData[4], pPg->aData, pPg->nData);
          lsmPutU32(pPg->aData, iPrev);
          pPg->flags |= PAGE_HASPREV;
          pPg->aData += 4;







<









>
|
|
|
>
>







1984
1985
1986
1987
1988
1989
1990

1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
        ** lsmFsPagePersist() to write an out-of-order page. Instead a page 
        ** number is assigned here so that the page data will be appended
        ** to the current segment.
        */
        Page **pp;
        int iPrev = 0;
        int iNext = 0;


        assert( pPg->pSeg->iFirst );
        assert( pPg->flags & PAGE_FREE );
        assert( (pPg->flags & PAGE_HASPREV)==0 );
        assert( pPg->nData==pFS->nPagesize-4 );

        rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext);
        if( rc!=LSM_OK ) return rc;

        if( pFS->bUseMmap==0 ){
          int iHash = fsHashKey(pFS->nHash, pPg->iPg);
          pPg->pHashNext = pFS->apHash[iHash];
          pFS->apHash[iHash] = pPg;
          assert( pPg->pHashNext==0 || pPg->pHashNext->iPg!=pPg->iPg );
        }

        if( iPrev ){
          assert( iNext==0 );
          memmove(&pPg->aData[4], pPg->aData, pPg->nData);
          lsmPutU32(pPg->aData, iPrev);
          pPg->flags |= PAGE_HASPREV;
          pPg->aData += 4;
2458
2459
2460
2461
2462
2463
2464






2465
2466
2467
2468
2469
2470
2471
  int i;
  int rc;
  Freelist freelist = {0, 0, 0};
  u8 *aUsed;
  Level *pLevel;
  Snapshot *pWorker = pDb->pWorker;
  int nBlock = pWorker->nBlock;







  aUsed = lsmMallocZero(pDb->pEnv, nBlock);
  if( aUsed==0 ){
    /* Malloc has failed. Since this function is only called within debug
    ** builds, this probably means the user is running an OOM injection test.
    ** Regardless, it will not be possible to run the integrity-check at this
    ** time, so assume the database is Ok and return non-zero. */







>
>
>
>
>
>







2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
  int i;
  int rc;
  Freelist freelist = {0, 0, 0};
  u8 *aUsed;
  Level *pLevel;
  Snapshot *pWorker = pDb->pWorker;
  int nBlock = pWorker->nBlock;

#if 0 
  static int nCall = 0;
  nCall++;
  printf("%d calls\n", nCall);
#endif

  aUsed = lsmMallocZero(pDb->pEnv, nBlock);
  if( aUsed==0 ){
    /* Malloc has failed. Since this function is only called within debug
    ** builds, this probably means the user is running an OOM injection test.
    ** Regardless, it will not be possible to run the integrity-check at this
    ** time, so assume the database is Ok and return non-zero. */
Changes to src/lsm_main.c.
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435

static int infoFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){
  LsmString *pStr = (LsmString *)pCtx;
  lsmStringAppendf(pStr, "%s{%d %lld}", (pStr->n?" ":""), iBlk, iSnapshot);
  return 0;
}

int lsmInfoFreelist(lsm_db *pDb, char **pzOut){
  Snapshot *pWorker;              /* Worker snapshot */
  int bUnlock = 0;
  LsmString s;
  int i;
  int rc;

  /* Obtain the worker snapshot */







|







421
422
423
424
425
426
427
428
429
430
431
432
433
434
435

static int infoFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){
  LsmString *pStr = (LsmString *)pCtx;
  lsmStringAppendf(pStr, "%s{%d %lld}", (pStr->n?" ":""), iBlk, iSnapshot);
  return 0;
}

static int infoFreelist(lsm_db *pDb, char **pzOut){
  Snapshot *pWorker;              /* Worker snapshot */
  int bUnlock = 0;
  LsmString s;
  int i;
  int rc;

  /* Obtain the worker snapshot */
444
445
446
447
448
449
450



451
452
453
454
455
456
457
    *pzOut = s.z;
  }

  /* Release the snapshot and return */
  infoFreeWorker(pDb, bUnlock);
  return rc;
}




static int infoTreeSize(lsm_db *db, int *pnOld, int *pnNew){
  ShmHeader *pShm = db->pShmhdr;
  TreeHeader *p = &pShm->hdr1;

  /* The following code suffers from two race conditions, as it accesses and
  ** trusts the contents of shared memory without verifying checksums:







>
>
>







444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
    *pzOut = s.z;
  }

  /* Release the snapshot and return */
  infoFreeWorker(pDb, bUnlock);
  return rc;
}

static int infoFreelistSize(lsm_db *pDb, int *pnFree, int *pnWaiting){
}

static int infoTreeSize(lsm_db *db, int *pnOld, int *pnNew){
  ShmHeader *pShm = db->pShmhdr;
  TreeHeader *p = &pShm->hdr1;

  /* The following code suffers from two race conditions, as it accesses and
  ** trusts the contents of shared memory without verifying checksums:
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
      char **pzVal = va_arg(ap, char **);
      rc = lsmInfoLogStructure(pDb, pzVal);
      break;
    }

    case LSM_INFO_FREELIST: {
      char **pzVal = va_arg(ap, char **);
      rc = lsmInfoFreelist(pDb, pzVal);
      break;
    }

    case LSM_INFO_CHECKPOINT_SIZE: {
      int *pnByte = va_arg(ap, int *);
      rc = lsmCheckpointSize(pDb, pnByte);
      break;







|







539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
      char **pzVal = va_arg(ap, char **);
      rc = lsmInfoLogStructure(pDb, pzVal);
      break;
    }

    case LSM_INFO_FREELIST: {
      char **pzVal = va_arg(ap, char **);
      rc = infoFreelist(pDb, pzVal);
      break;
    }

    case LSM_INFO_CHECKPOINT_SIZE: {
      int *pnByte = va_arg(ap, int *);
      rc = lsmCheckpointSize(pDb, pnByte);
      break;
Changes to src/lsm_sorted.c.
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
    assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 );
    lsmDbSnapshotSetLevel(pDb->pWorker, pNext);
    sortedFreeLevel(pDb->pEnv, pNew);
  }else{
    if( pDel ) pDel->iRoot = 0;

#if 0
    lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 1, "new-toplevel");
#endif

    if( freelist.nEntry ){
      Freelist *p = &pDb->pWorker->freelist;
      lsmFree(pDb->pEnv, p->aEntry);
      memcpy(p, &freelist, sizeof(freelist));
      freelist.aEntry = 0;







|







4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
    assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 );
    lsmDbSnapshotSetLevel(pDb->pWorker, pNext);
    sortedFreeLevel(pDb->pEnv, pNew);
  }else{
    if( pDel ) pDel->iRoot = 0;

#if 0
    lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "new-toplevel");
#endif

    if( freelist.nEntry ){
      Freelist *p = &pDb->pWorker->freelist;
      lsmFree(pDb->pEnv, p->aEntry);
      memcpy(p, &freelist, sizeof(freelist));
      freelist.aEntry = 0;
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
      /* Clean up the MergeWorker object initialized above. If no error
      ** has occurred, invoke the work-hook to inform the application that
      ** the database structure has changed. */
      mergeWorkerShutdown(&mergeworker, &rc);
      if( rc==LSM_OK ) sortedInvokeWorkHook(pDb);

#if 0
      lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 1, "work");
#endif
      assertBtreeOk(pDb, &pLevel->lhs);
      assertRunInOrder(pDb, &pLevel->lhs);

      /* If bFlush is true and the database is no longer considered "full",
      ** break out of the loop even if nRemaining is still greater than
      ** zero. The caller has an in-memory tree to flush to disk.  */







|







4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
      /* Clean up the MergeWorker object initialized above. If no error
      ** has occurred, invoke the work-hook to inform the application that
      ** the database structure has changed. */
      mergeWorkerShutdown(&mergeworker, &rc);
      if( rc==LSM_OK ) sortedInvokeWorkHook(pDb);

#if 0
      lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "work");
#endif
      assertBtreeOk(pDb, &pLevel->lhs);
      assertRunInOrder(pDb, &pLevel->lhs);

      /* If bFlush is true and the database is no longer considered "full",
      ** break out of the loop even if nRemaining is still greater than
      ** zero. The caller has an in-memory tree to flush to disk.  */
Changes to src/lsm_unix.c.
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  off_t iSz;
  int prc;
  PosixFile *p = (PosixFile *)pFile;
  struct stat buf;

  if( p->pMap ){
    munmap(p->pMap, p->nMap);
    p->pMap = 0;
    p->nMap = 0;
  }

  memset(&buf, 0, sizeof(buf));
  prc = fstat(p->fd, &buf);
  if( prc!=0 ) return LSM_IOERR_BKPT;
  iSz = buf.st_size;
  if( iSz<iMin ){







|
|







186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
  off_t iSz;
  int prc;
  PosixFile *p = (PosixFile *)pFile;
  struct stat buf;

  if( p->pMap ){
    munmap(p->pMap, p->nMap);
    *ppOut = p->pMap = 0;
    *pnOut = p->nMap = 0;
  }

  memset(&buf, 0, sizeof(buf));
  prc = fstat(p->fd, &buf);
  if( prc!=0 ) return LSM_IOERR_BKPT;
  iSz = buf.st_size;
  if( iSz<iMin ){
Changes to tool/lsmperf.tcl.
186
187
188
189
190
191
192
193
194

195
196
197
198
199
200
201
  append script $data3
  append script $data4

  append script "pause -1\n"
  exec_gnuplot_script $script $zPng
}

do_write_test x.png 100 50000 50000 20 {
  lsm safety=0

}


  #lsm "mmap=1 multi_proc=0 page_size=4096 block_size=2097152 autocheckpoint=4194000"
  #lsm-mt    "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=4196000"

# lsm     "safety=1 multi_proc=0"







|
|
>







186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
  append script $data3
  append script $data4

  append script "pause -1\n"
  exec_gnuplot_script $script $zPng
}

do_write_test x.png 100 50000 0 20 {
  lsm-mt "threads=2 multi_proc=0"
  leveldb leveldb
}


  #lsm "mmap=1 multi_proc=0 page_size=4096 block_size=2097152 autocheckpoint=4194000"
  #lsm-mt    "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=4196000"

# lsm     "safety=1 multi_proc=0"