Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change free-list deletes to use range-deletes instead of point-deletes (so that they can be coalesced when segments are merged). This has revealed problems with the range-delete code.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | range-delete-fixes
Files: files | file ages | folders
SHA1: 9374c3a2838f5e2fc9a20548aacb3b7d5f926afc
User & Date: dan 2012-11-02 20:13:17.853
Context
2012-11-03
19:06
Various fixes and tests for range-deletes. check-in: 752517c1cf user: dan tags: range-delete-fixes
2012-11-02
20:13
Change free-list deletes to use range-deletes instead of point-deletes (so that they can be coalesced when segments are merged). This has revealed problems with the range-delete code. check-in: 9374c3a283 user: dan tags: range-delete-fixes
2012-11-01
15:16
Fix a bug preventing a modified snapshot of a "full" database from being written to shared-memory. check-in: 9d8943da66 user: dan tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
Changes to lsm-test/lsmtest_tdb3.c.
719
720
721
722
723
724
725

726
727
728
729
730
731
732
733
734
735
736
737
738
739







740
741
742
743
744
745
746
747
748
749
750
751
    zStart = z;

    while( *z && *z!='=' ) z++;
    if( *z ){
      int eParam;
      int i;
      int iVal;

      int rc;
      char zParam[32];
      int nParam = z-zStart;
      if( nParam==0 || nParam>sizeof(zParam)-1 ) goto syntax_error;

      memcpy(zParam, zStart, nParam);
      zParam[nParam] = '\0';
      rc = testArgSelect(aParam, "param", zParam, &i);
      if( rc!=0 ) return rc;
      eParam = aParam[i].eParam;

      z++;
      zStart = z;
      while( *z>='0' && *z<='9' ) z++;







      nParam = z-zStart;
      if( nParam==0 || nParam>sizeof(zParam)-1 ) goto syntax_error;
      memcpy(zParam, zStart, nParam);
      zParam[nParam] = '\0';
      iVal = atoi(zParam);

      if( eParam>0 ){
        if( bWorker || aParam[i].bWorker==0 ){
          lsm_config(db, eParam, &iVal);
        }
      }else{
        if( pLsm ){







>














>
>
>
>
>
>
>




|







719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
    zStart = z;

    while( *z && *z!='=' ) z++;
    if( *z ){
      int eParam;
      int i;
      int iVal;
      int iMul = 1;
      int rc;
      char zParam[32];
      int nParam = z-zStart;
      if( nParam==0 || nParam>sizeof(zParam)-1 ) goto syntax_error;

      memcpy(zParam, zStart, nParam);
      zParam[nParam] = '\0';
      rc = testArgSelect(aParam, "param", zParam, &i);
      if( rc!=0 ) return rc;
      eParam = aParam[i].eParam;

      z++;
      zStart = z;
      while( *z>='0' && *z<='9' ) z++;
      if( *z=='k' || *z=='K' ){
        iMul = 1024;
        z++;
      }else if( *z=='M' || *z=='M' ){
        iMul = 1024 * 1024;
        z++;
      }
      nParam = z-zStart;
      if( nParam==0 || nParam>sizeof(zParam)-1 ) goto syntax_error;
      memcpy(zParam, zStart, nParam);
      zParam[nParam] = '\0';
      iVal = atoi(zParam) * iMul;

      if( eParam>0 ){
        if( bWorker || aParam[i].bWorker==0 ){
          lsm_config(db, eParam, &iVal);
        }
      }else{
        if( pLsm ){
Changes to src/lsm_sorted.c.
180
181
182
183
184
185
186





187
188
189
190
191
192
193
**   lsmMCursorNext()
**   lsmMCursorPrev()
**   lsmMCursorFirst()
**   lsmMCursorLast()
**   lsmMCursorKey()
**   lsmMCursorValue()
**   lsmMCursorValid()





*/
struct MultiCursor {
  lsm_db *pDb;                    /* Connection that owns this cursor */
  MultiCursor *pNext;             /* Next cursor owned by connection pDb */
  int flags;                      /* Mask of CURSOR_XXX flags */

  int eType;                      /* Cache of current key type */







>
>
>
>
>







180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
**   lsmMCursorNext()
**   lsmMCursorPrev()
**   lsmMCursorFirst()
**   lsmMCursorLast()
**   lsmMCursorKey()
**   lsmMCursorValue()
**   lsmMCursorValid()
**
** iFree:
**   This variable is only used by cursors providing input data for a
**   new top-level segment. Such cursors only ever iterate forwards, not
**   backwards.
*/
struct MultiCursor {
  lsm_db *pDb;                    /* Connection that owns this cursor */
  MultiCursor *pNext;             /* Next cursor owned by connection pDb */
  int flags;                      /* Mask of CURSOR_XXX flags */

  int eType;                      /* Cache of current key type */
1905
1906
1907
1908
1909
1910
1911
1912
1913

















1914


1915







1916





1917

1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
        lsmTreeCursorValue(pTreeCsr, &pVal, &nVal);
      }
      break;
    }

    case CURSOR_DATA_SYSTEM: {
      Snapshot *pWorker = pCsr->pDb->pWorker;
      if( (pCsr->flags & CURSOR_FLUSH_FREELIST) 
       && pWorker && pWorker->freelist.nEntry > pCsr->iFree 

















      ){


        int iEntry = pWorker->freelist.nEntry - pCsr->iFree - 1;







        FreelistEntry *pEntry = &pWorker->freelist.aEntry[iEntry];





        u32 i = ~((u32)(pEntry->iBlk));

        lsmPutU32(pCsr->pSystemVal, i);
        pKey = pCsr->pSystemVal;
        nKey = 4;
        if( pEntry->iId>=0 ){
          eType = LSM_SYSTEMKEY | LSM_INSERT;
        }else{
          eType = LSM_SYSTEMKEY | LSM_POINT_DELETE;
        }
      }
      break;
    }

    default: {
      int iPtr = iKey - CURSOR_DATA_SEGMENT;







|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
|
>
>
>
>
>
>
>
|
>
>
>
>
>
|
>
|
|
|
<
<
|
<







1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957


1958

1959
1960
1961
1962
1963
1964
1965
        lsmTreeCursorValue(pTreeCsr, &pVal, &nVal);
      }
      break;
    }

    case CURSOR_DATA_SYSTEM: {
      Snapshot *pWorker = pCsr->pDb->pWorker;
      if( pWorker && (pCsr->flags & CURSOR_FLUSH_FREELIST) ){
        int nEntry = pWorker->freelist.nEntry;
        if( pCsr->iFree < (nEntry*2) ){
          FreelistEntry *aEntry = pWorker->freelist.aEntry;
          int i = nEntry - 1 - (pCsr->iFree / 2);
          u32 iKey = 0;

          if( (pCsr->iFree % 2) ){
            eType = LSM_END_DELETE|LSM_SYSTEMKEY;
            iKey = aEntry[i].iBlk-1;
          }else if( aEntry[i].iId>=0 ){
            eType = LSM_INSERT|LSM_SYSTEMKEY;
            iKey = aEntry[i].iBlk;

            /* If the in-memory entry immediately before this one was a
             ** DELETE, and the block number is one greater than the current
             ** block number, mark this entry as an "end-delete-range". */
            if( i<(nEntry-1) 
                && aEntry[i+1].iBlk==aEntry[i].iBlk+1 && aEntry[i+1].iId<0
              ){
              eType |= LSM_END_DELETE;
            }

            /* If the in-memory entry immediately after this one is a
            ** DELETE, and the block number is one less than the current
            ** block number, mark this entry as an "start-delete-range". 
            ** Also increase iFree so that the next entry is not visited
            ** (since it has already been accounted for by setting this
            ** flag).  */
            if( i>0 
                && aEntry[i-1].iBlk==aEntry[i].iBlk-1 && aEntry[i-1].iId<0
              ){
              eType |= LSM_START_DELETE;
            }
          }else{
            eType = LSM_START_DELETE|LSM_SYSTEMKEY;
            iKey = aEntry[i].iBlk + 1;
          }

          pKey = pCsr->pSystemVal;
          nKey = 4;


          lsmPutU32(pKey, ~iKey);

        }
      }
      break;
    }

    default: {
      int iPtr = iKey - CURSOR_DATA_SEGMENT;
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960

1961
1962
1963
1964
1965
1966
1967

  if( peType ) *peType = eType;
  if( pnKey ) *pnKey = nKey;
  if( ppKey ) *ppKey = pKey;
}

static int sortedDbKeyCompare(
  int (*xCmp)(void *, int, void *, int),
  int iLhsFlags, void *pLhsKey, int nLhsKey,
  int iRhsFlags, void *pRhsKey, int nRhsKey
){

  int res;

  /* Compare the keys, including the system flag. */
  res = sortedKeyCompare(xCmp, 
    rtTopic(iLhsFlags), pLhsKey, nLhsKey,
    rtTopic(iRhsFlags), pRhsKey, nRhsKey
  );







|



>







1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002

  if( peType ) *peType = eType;
  if( pnKey ) *pnKey = nKey;
  if( ppKey ) *ppKey = pKey;
}

static int sortedDbKeyCompare(
  MultiCursor *pCsr,
  int iLhsFlags, void *pLhsKey, int nLhsKey,
  int iRhsFlags, void *pRhsKey, int nRhsKey
){
  int (*xCmp)(void *, int, void *, int) = pCsr->pDb->xCmp;
  int res;

  /* Compare the keys, including the system flag. */
  res = sortedKeyCompare(xCmp, 
    rtTopic(iLhsFlags), pLhsKey, nLhsKey,
    rtTopic(iRhsFlags), pRhsKey, nRhsKey
  );
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
    iRes = i2;
  }else if( pKey2==0 ){
    iRes = i1;
  }else{
    int res;

    /* Compare the keys */
    res = sortedDbKeyCompare(pCsr->pDb->xCmp, 
        eType1, pKey1, nKey1, eType2, pKey2, nKey2
    );

    res = res * mul;
    if( res==0 ){
      iRes = (rtIsSeparator(eType1) ? i2 : i1);
    }else if( res<0 ){







|







2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
    iRes = i2;
  }else if( pKey2==0 ){
    iRes = i1;
  }else{
    int res;

    /* Compare the keys */
    res = sortedDbKeyCompare(pCsr,
        eType1, pKey1, nKey1, eType2, pKey2, nKey2
    );

    res = res * mul;
    if( res==0 ){
      iRes = (rtIsSeparator(eType1) ? i2 : i1);
    }else if( res<0 ){
2313
2314
2315
2316
2317
2318
2319
2320



2321
2322
2323
2324
2325
2326
2327
2328
        *pnVal = 0;
      }
      break;
    }

    case CURSOR_DATA_SYSTEM: {
      Snapshot *pWorker = pCsr->pDb->pWorker;
      if( pWorker && pWorker->freelist.nEntry > pCsr->iFree ){



        int iEntry = pWorker->freelist.nEntry - pCsr->iFree - 1;
        u8 *aVal = &((u8 *)(pCsr->pSystemVal))[4];
        lsmPutU64(aVal, pWorker->freelist.aEntry[iEntry].iId);
        *ppVal = aVal;
        *pnVal = 8;
      }
      break;
    }







|
>
>
>
|







2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
        *pnVal = 0;
      }
      break;
    }

    case CURSOR_DATA_SYSTEM: {
      Snapshot *pWorker = pCsr->pDb->pWorker;
      if( pWorker 
       && (pCsr->iFree % 2)==0
       && pCsr->iFree < (pWorker->freelist.nEntry*2)
      ){
        int iEntry = pWorker->freelist.nEntry - 1 - (pCsr->iFree / 2);
        u8 *aVal = &((u8 *)(pCsr->pSystemVal))[4];
        lsmPutU64(aVal, pWorker->freelist.aEntry[iEntry].iId);
        *ppVal = aVal;
        *pnVal = 8;
      }
      break;
    }
2484
2485
2486
2487
2488
2489
2490






2491
2492
2493
2494
2495
2496
2497
  /* Check if this key has already been deleted by a range-delete */
  iKey = pCsr->aTree[1];
  if( (iKey>0 && (rdmask & lsmTreeCursorFlags(pCsr->apTreeCsr[0]))) 
   || (iKey>1 && (rdmask & lsmTreeCursorFlags(pCsr->apTreeCsr[1]))) 
  ){
    return 0;
  }






  for(i=CURSOR_DATA_SEGMENT; i<iKey; i++){
    int iPtr = i-CURSOR_DATA_SEGMENT;
    if( pCsr->aPtr[iPtr].pPg && (pCsr->aPtr[iPtr].eType & rdmask) ){
      return 0;
    }
  }








>
>
>
>
>
>







2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
  /* Check if this key has already been deleted by a range-delete */
  iKey = pCsr->aTree[1];
  if( (iKey>0 && (rdmask & lsmTreeCursorFlags(pCsr->apTreeCsr[0]))) 
   || (iKey>1 && (rdmask & lsmTreeCursorFlags(pCsr->apTreeCsr[1]))) 
  ){
    return 0;
  }
  if( iKey>CURSOR_DATA_SYSTEM && (pCsr->flags & CURSOR_FLUSH_FREELIST) ){
    int eType;
    multiCursorGetKey(pCsr, CURSOR_DATA_SYSTEM, &eType, 0, 0);
    if( rdmask & eType ) return 0;
  }

  for(i=CURSOR_DATA_SEGMENT; i<iKey; i++){
    int iPtr = i-CURSOR_DATA_SEGMENT;
    if( pCsr->aPtr[iPtr].pPg && (pCsr->aPtr[iPtr].eType & rdmask) ){
      return 0;
    }
  }

2764
2765
2766
2767
2768
2769
2770

2771

2772
2773

2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791



























2792
2793
2794
2795
2796
2797
2798

  /* Check the current key value. If it is not greater than (if bReverse==0)
  ** or less than (if bReverse!=0) the key currently cached in pCsr->key, 
  ** then the cursor has not yet been successfully advanced.  
  */
  multiCursorGetKey(pCsr, pCsr->aTree[1], &eNewType, &pNew, &nNew);
  if( pNew ){

    int res = sortedDbKeyCompare(pCsr->pDb->xCmp, 

        eNewType, pNew, nNew, pCsr->eType, pCsr->key.pData, pCsr->key.nData
    );

    if( (bReverse==0 && res<=0) || (bReverse!=0 && res>=0) ){
      return 0;
    }

    multiCursorCacheKey(pCsr, pRc);
    assert( pCsr->eType==eNewType );

    /* If this cursor is configured to skip deleted keys, and the current
    ** cursor points to a SORTED_DELETE entry, then the cursor has not been 
    ** successfully advanced.  
    **
    ** Similarly, if the cursor is configured to skip system keys and the
    ** current cursor points to a system key, it has not yet been advanced.
     */
    if( *pRc==LSM_OK && 0==mcursorLocationOk(pCsr, 0) ) return 0;
  }
  return 1;
}




























static int multiCursorAdvance(MultiCursor *pCsr, int bReverse){
  int rc = LSM_OK;                /* Return Code */
  if( lsmMCursorValid(pCsr) ){
    do {
      int iKey = pCsr->aTree[1];








>
|
>
|

>


















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872

  /* Check the current key value. If it is not greater than (if bReverse==0)
  ** or less than (if bReverse!=0) the key currently cached in pCsr->key, 
  ** then the cursor has not yet been successfully advanced.  
  */
  multiCursorGetKey(pCsr, pCsr->aTree[1], &eNewType, &pNew, &nNew);
  if( pNew ){
    int typemask = (pCsr->flags & CURSOR_IGNORE_DELETE) ? ~(0) : LSM_SYSTEMKEY;
    int res = sortedDbKeyCompare(pCsr,
      eNewType & typemask, pNew, nNew, 
      pCsr->eType & typemask, pCsr->key.pData, pCsr->key.nData
    );

    if( (bReverse==0 && res<=0) || (bReverse!=0 && res>=0) ){
      return 0;
    }

    multiCursorCacheKey(pCsr, pRc);
    assert( pCsr->eType==eNewType );

    /* If this cursor is configured to skip deleted keys, and the current
    ** cursor points to a SORTED_DELETE entry, then the cursor has not been 
    ** successfully advanced.  
    **
    ** Similarly, if the cursor is configured to skip system keys and the
    ** current cursor points to a system key, it has not yet been advanced.
     */
    if( *pRc==LSM_OK && 0==mcursorLocationOk(pCsr, 0) ) return 0;
  }
  return 1;
}

static void flCsrAdvance(MultiCursor *pCsr){
  assert( pCsr->flags & CURSOR_FLUSH_FREELIST );
  if( pCsr->iFree % 2 ){
    pCsr->iFree++;
  }else{
    int nEntry = pCsr->pDb->pWorker->freelist.nEntry;
    FreelistEntry *aEntry = pCsr->pDb->pWorker->freelist.aEntry;

    int i = nEntry - 1 - (pCsr->iFree / 2);

    /* If the current entry is a delete and the "end-delete" key will not
    ** be attached to the next entry, increment iFree by 1 only. */
    if( aEntry[i].iId<0 ){
      while( 1 ){
        if( i==0 || aEntry[i-1].iBlk!=aEntry[i].iBlk-1 ){
          pCsr->iFree--;
          break;
        }
        if( aEntry[i-1].iId>=0 ) break;
        pCsr->iFree += 2;
        i--;
      }
    }
    pCsr->iFree += 2;
  }
}

static int multiCursorAdvance(MultiCursor *pCsr, int bReverse){
  int rc = LSM_OK;                /* Return Code */
  if( lsmMCursorValid(pCsr) ){
    do {
      int iKey = pCsr->aTree[1];

2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
          rc = lsmTreeCursorPrev(pTreeCsr);
        }else{
          rc = lsmTreeCursorNext(pTreeCsr);
        }
      }else if( iKey==CURSOR_DATA_SYSTEM ){
        assert( pCsr->flags & CURSOR_FLUSH_FREELIST );
        assert( bReverse==0 );
        pCsr->iFree++;
      }else if( iKey==(CURSOR_DATA_SEGMENT+pCsr->nPtr) ){
        assert( bReverse==0 && pCsr->pBtCsr );
        rc = btreeCursorNext(pCsr->pBtCsr);
      }else{
        rc = segmentCursorAdvance(pCsr, iKey-CURSOR_DATA_SEGMENT, bReverse);
      }
      if( rc==LSM_OK ){







|







2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
          rc = lsmTreeCursorPrev(pTreeCsr);
        }else{
          rc = lsmTreeCursorNext(pTreeCsr);
        }
      }else if( iKey==CURSOR_DATA_SYSTEM ){
        assert( pCsr->flags & CURSOR_FLUSH_FREELIST );
        assert( bReverse==0 );
        flCsrAdvance(pCsr);
      }else if( iKey==(CURSOR_DATA_SEGMENT+pCsr->nPtr) ){
        assert( bReverse==0 && pCsr->pBtCsr );
        rc = btreeCursorNext(pCsr->pBtCsr);
      }else{
        rc = segmentCursorAdvance(pCsr, iKey-CURSOR_DATA_SEGMENT, bReverse);
      }
      if( rc==LSM_OK ){
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860

int lsmMCursorPrev(MultiCursor *pCsr){
  if( (pCsr->flags & CURSOR_PREV_OK)==0 ) return LSM_MISUSE_BKPT;
  return multiCursorAdvance(pCsr, 1);
}

int lsmMCursorKey(MultiCursor *pCsr, void **ppKey, int *pnKey){
  if( pCsr->flags & CURSOR_SEEK_EQ ){
    *pnKey = pCsr->key.nData;
    *ppKey = pCsr->key.pData;
  }else{
    int iKey = pCsr->aTree[1];

    if( iKey==CURSOR_DATA_TREE0 || iKey==CURSOR_DATA_TREE1 ){
      TreeCursor *pTreeCsr = pCsr->apTreeCsr[iKey-CURSOR_DATA_TREE0];







|







2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934

int lsmMCursorPrev(MultiCursor *pCsr){
  if( (pCsr->flags & CURSOR_PREV_OK)==0 ) return LSM_MISUSE_BKPT;
  return multiCursorAdvance(pCsr, 1);
}

int lsmMCursorKey(MultiCursor *pCsr, void **ppKey, int *pnKey){
  if( (pCsr->flags & CURSOR_SEEK_EQ) || pCsr->aTree==0 ){
    *pnKey = pCsr->key.nData;
    *ppKey = pCsr->key.pData;
  }else{
    int iKey = pCsr->aTree[1];

    if( iKey==CURSOR_DATA_TREE0 || iKey==CURSOR_DATA_TREE1 ){
      TreeCursor *pTreeCsr = pCsr->apTreeCsr[iKey-CURSOR_DATA_TREE0];
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
  return LSM_OK;
}

int lsmMCursorValue(MultiCursor *pCsr, void **ppVal, int *pnVal){
  void *pVal;
  int nVal;
  int rc;
  if( pCsr->flags & CURSOR_SEEK_EQ ){
    rc = LSM_OK;
    nVal = pCsr->val.nData;
    pVal = pCsr->val.pData;
  }else{

    assert( pCsr->aTree );
    assert( mcursorLocationOk(pCsr, (pCsr->flags & CURSOR_IGNORE_DELETE)) );







|







2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
  return LSM_OK;
}

int lsmMCursorValue(MultiCursor *pCsr, void **ppVal, int *pnVal){
  void *pVal;
  int nVal;
  int rc;
  if( (pCsr->flags & CURSOR_SEEK_EQ) || pCsr->aTree==0 ){
    rc = LSM_OK;
    nVal = pCsr->val.nData;
    pVal = pCsr->val.pData;
  }else{

    assert( pCsr->aTree );
    assert( mcursorLocationOk(pCsr, (pCsr->flags & CURSOR_IGNORE_DELETE)) );
3516
3517
3518
3519
3520
3521
3522

3523
3524
3525
3526
3527
3528
3529
}

static int mergeWorkerWrite(
  MergeWorker *pMW,               /* Merge worker object to write into */
  int eType,                      /* One of SORTED_SEPARATOR, WRITE or DELETE */
  void *pKey, int nKey,           /* Key value */
  MultiCursor *pCsr,              /* Read value (if any) from here */

  int iPtr                        /* Absolute value of page pointer, or 0 */
){
  int rc = LSM_OK;                /* Return code */
  Merge *pMerge;                  /* Persistent part of level merge state */
  int nHdr;                       /* Space required for this record header */
  Page *pPg;                      /* Page to write to */
  u8 *aData;                      /* Data buffer for page pWriter->pPage */







>







3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
}

static int mergeWorkerWrite(
  MergeWorker *pMW,               /* Merge worker object to write into */
  int eType,                      /* One of SORTED_SEPARATOR, WRITE or DELETE */
  void *pKey, int nKey,           /* Key value */
  MultiCursor *pCsr,              /* Read value (if any) from here */
  int iVal,
  int iPtr                        /* Absolute value of page pointer, or 0 */
){
  int rc = LSM_OK;                /* Return code */
  Merge *pMerge;                  /* Persistent part of level merge state */
  int nHdr;                       /* Space required for this record header */
  Page *pPg;                      /* Page to write to */
  u8 *aData;                      /* Data buffer for page pWriter->pPage */
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
  **
  **     1) record type - 1 byte.
  **     2) Page-pointer-offset - 1 varint
  **     3) Key size - 1 varint
  **     4) Value size - 1 varint (only if LSM_INSERT flag is set)
  */
  if( rc==LSM_OK ){
    rc = lsmMCursorValue(pCsr, &pVal, &nVal);
  }
  if( rc==LSM_OK ){
    nHdr = 1 + lsmVarintLen32(iRPtr) + lsmVarintLen32(nKey);
    if( rtIsWrite(eType) ) nHdr += lsmVarintLen32(nVal);

    /* If the entire header will not fit on page pPg, or if page pPg is 
    ** marked read-only, advance to the next page of the output run. */







|







3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
  **
  **     1) record type - 1 byte.
  **     2) Page-pointer-offset - 1 varint
  **     3) Key size - 1 varint
  **     4) Value size - 1 varint (only if LSM_INSERT flag is set)
  */
  if( rc==LSM_OK ){
    rc = multiCursorGetVal(pCsr, iVal, &pVal, &nVal);
  }
  if( rc==LSM_OK ){
    nHdr = 1 + lsmVarintLen32(iRPtr) + lsmVarintLen32(nKey);
    if( rtIsWrite(eType) ) nHdr += lsmVarintLen32(nVal);

    /* If the entire header will not fit on page pPg, or if page pPg is 
    ** marked read-only, advance to the next page of the output run. */
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637
    if( rtIsWrite(eType) ) iOff += lsmVarintPut32(&aData[iOff], nVal);   /* 4 */
    pMerge->iOutputOff = iOff;

    /* Write the key and data into the segment. */
    assert( iFPtr==pageGetPtr(aData, nData) );
    rc = mergeWorkerData(pMW, 0, iFPtr+iRPtr, pKey, nKey);
    if( rc==LSM_OK && rtIsWrite(eType) ){
      if( rtTopic(eType)==0 ) rc = lsmMCursorValue(pCsr, &pVal, &nVal);
      if( rc==LSM_OK ){
        rc = mergeWorkerData(pMW, 0, iFPtr+iRPtr, pVal, nVal);
      }
    }
  }

  return rc;







|







3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
    if( rtIsWrite(eType) ) iOff += lsmVarintPut32(&aData[iOff], nVal);   /* 4 */
    pMerge->iOutputOff = iOff;

    /* Write the key and data into the segment. */
    assert( iFPtr==pageGetPtr(aData, nData) );
    rc = mergeWorkerData(pMW, 0, iFPtr+iRPtr, pKey, nKey);
    if( rc==LSM_OK && rtIsWrite(eType) ){
      if( rtTopic(eType)==0 ) rc = multiCursorGetVal(pCsr, iVal, &pVal, &nVal);
      if( rc==LSM_OK ){
        rc = mergeWorkerData(pMW, 0, iFPtr+iRPtr, pVal, nVal);
      }
    }
  }

  return rc;
3723
3724
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742

3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755








3756



























3757



3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771
3772

3773
3774
3775
3776
3777
3778
3779
** a merge operation. When this function is called, *piFlags contains the
** database entry flags for the current entry. The entry about to be written
** to the output.
**
** Note that this function only has to work for cursors configured to 
** iterate forwards (not backwards).
*/
static void mergeRangeDeletes(MultiCursor *pCsr, int *piFlags){
  int f = *piFlags;
  int iKey = pCsr->aTree[1];
  int i;

  assert( pCsr->flags & CURSOR_NEXT_OK );
  if( pCsr->flags & CURSOR_IGNORE_DELETE ){
    /* The ignore-delete flag is set when the output of the merge will form
    ** the oldest level in the database. In this case there is no point in
    ** retaining any range-delete flags.  */
    assert( (f & LSM_POINT_DELETE)==0 );
    f &= ~(LSM_START_DELETE|LSM_END_DELETE);
  }else{

    if( iKey==0 ){
      int btreeflags = lsmTreeCursorFlags(pCsr->apTreeCsr[1]);
      if( btreeflags & LSM_END_DELETE ){
        f |= (LSM_START_DELETE|LSM_END_DELETE);
      }
    }

    for(i=LSM_MAX(0, iKey+1-CURSOR_DATA_SEGMENT); i<pCsr->nPtr; i++){
      SegmentPtr *pPtr = &pCsr->aPtr[i];
      if( pPtr->pPg && (pPtr->eType & LSM_END_DELETE) ){
        f |= (LSM_START_DELETE|LSM_END_DELETE);
      }
    }




































    if( (f & LSM_START_DELETE) && (f & LSM_END_DELETE) && (f & LSM_INSERT)==0 ){



      f = 0;
    }
  }

  *piFlags = f;
}

static int mergeWorkerStep(MergeWorker *pMW){
  lsm_db *pDb = pMW->pDb;       /* Database handle */
  MultiCursor *pCsr;            /* Cursor to read input data from */
  int rc = LSM_OK;              /* Return code */
  int eType;                    /* SORTED_SEPARATOR, WRITE or DELETE */
  void *pKey; int nKey;         /* Key */
  Segment *pSeg;                /* Output segment */
  Pgno iPtr;


  pCsr = pMW->pCsr;
  pSeg = &pMW->pLevel->lhs;

  /* Pull the next record out of the source cursor. */
  lsmMCursorKey(pCsr, &pKey, &nKey);
  eType = pCsr->eType;







|












>













>
>
>
>
>
>
>
>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>















>







3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
** a merge operation. When this function is called, *piFlags contains the
** database entry flags for the current entry. The entry about to be written
** to the output.
**
** Note that this function only has to work for cursors configured to 
** iterate forwards (not backwards).
*/
static void mergeRangeDeletes(MultiCursor *pCsr, int *piVal, int *piFlags){
  int f = *piFlags;
  int iKey = pCsr->aTree[1];
  int i;

  assert( pCsr->flags & CURSOR_NEXT_OK );
  if( pCsr->flags & CURSOR_IGNORE_DELETE ){
    /* The ignore-delete flag is set when the output of the merge will form
    ** the oldest level in the database. In this case there is no point in
    ** retaining any range-delete flags.  */
    assert( (f & LSM_POINT_DELETE)==0 );
    f &= ~(LSM_START_DELETE|LSM_END_DELETE);
  }else{
#if 0
    if( iKey==0 ){
      int btreeflags = lsmTreeCursorFlags(pCsr->apTreeCsr[1]);
      if( btreeflags & LSM_END_DELETE ){
        f |= (LSM_START_DELETE|LSM_END_DELETE);
      }
    }

    for(i=LSM_MAX(0, iKey+1-CURSOR_DATA_SEGMENT); i<pCsr->nPtr; i++){
      SegmentPtr *pPtr = &pCsr->aPtr[i];
      if( pPtr->pPg && (pPtr->eType & LSM_END_DELETE) ){
        f |= (LSM_START_DELETE|LSM_END_DELETE);
      }
    }
#endif
    for(i=0; i<(CURSOR_DATA_SEGMENT + pCsr->nPtr); i++){
      if( i!=iKey ){
        int eType;
        void *pKey;
        int nKey;
        int res;
        multiCursorGetKey(pCsr, i, &eType, &pKey, &nKey);

        if( pKey ){
          res = sortedKeyCompare(pCsr->pDb->xCmp, 
              rtTopic(pCsr->eType), pCsr->key.pData, pCsr->key.nData,
              rtTopic(eType), pKey, nKey
              );
          assert( res<=0 );
          if( res==0 ){
            if( (f & (LSM_INSERT|LSM_POINT_DELETE))==0 ){
              if( eType & LSM_INSERT ){
                f |= LSM_INSERT;
                *piVal = i;
              }
              else if( eType & LSM_POINT_DELETE ){
                f |= LSM_POINT_DELETE;
              }
            }
            f |= (eType & (LSM_END_DELETE|LSM_START_DELETE));
          }

          if( i>iKey && (eType & LSM_END_DELETE) && res<0 ){
            f |= (LSM_END_DELETE|LSM_START_DELETE);
          }
        }
      }
    }

    assert( (f & LSM_INSERT)==0 || (f & LSM_POINT_DELETE)==0 );
    if( (f & LSM_START_DELETE) 
     && (f & LSM_END_DELETE) 
     && (f & LSM_POINT_DELETE )
    ){
      f = 0;
    }
  }

  *piFlags = f;
}

static int mergeWorkerStep(MergeWorker *pMW){
  lsm_db *pDb = pMW->pDb;       /* Database handle */
  MultiCursor *pCsr;            /* Cursor to read input data from */
  int rc = LSM_OK;              /* Return code */
  int eType;                    /* SORTED_SEPARATOR, WRITE or DELETE */
  void *pKey; int nKey;         /* Key */
  Segment *pSeg;                /* Output segment */
  Pgno iPtr;
  int iVal;

  pCsr = pMW->pCsr;
  pSeg = &pMW->pLevel->lhs;

  /* Pull the next record out of the source cursor. */
  lsmMCursorKey(pCsr, &pKey, &nKey);
  eType = pCsr->eType;
3803
3804
3805
3806
3807
3808
3809

3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
    if( pPtr->pPg
     && 0==pDb->xCmp(pPtr->pKey, pPtr->nKey, pKey, nKey)
    ){
      iPtr = pPtr->iPtr+pPtr->iPgPtr;
    }
  }


  mergeRangeDeletes(pCsr, &eType);

  if( eType!=0 ){
    if( pMW->aGobble ){
      int iGobble = pCsr->aTree[1] - CURSOR_DATA_SEGMENT;
      if( iGobble<pCsr->nPtr ){
        SegmentPtr *pGobble = &pCsr->aPtr[iGobble];
        if( (pGobble->flags & PGFTR_SKIP_THIS_FLAG)==0 ){
          pMW->aGobble[iGobble] = lsmFsPageNumber(pGobble->pPg);
        }
      }
    }

    /* If this is a separator key and we know that the output pointer has not
    ** changed, there is no point in writing an output record. Otherwise,
    ** proceed. */
    if( rtIsSeparator(eType)==0 || iPtr!=0 ){
      /* Write the record into the main run. */
      if( rc==LSM_OK ){
        rc = mergeWorkerWrite(pMW, eType, pKey, nKey, pCsr, iPtr);
      }
    }
  }

  /* Advance the cursor to the next input record (assuming one exists). */
  assert( lsmMCursorValid(pMW->pCsr) );
  if( rc==LSM_OK ) rc = lsmMCursorNext(pMW->pCsr);







>
|


















|







3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
    if( pPtr->pPg
     && 0==pDb->xCmp(pPtr->pKey, pPtr->nKey, pKey, nKey)
    ){
      iPtr = pPtr->iPtr+pPtr->iPgPtr;
    }
  }

  iVal = pCsr->aTree[1];
  mergeRangeDeletes(pCsr, &iVal, &eType);

  if( eType!=0 ){
    if( pMW->aGobble ){
      int iGobble = pCsr->aTree[1] - CURSOR_DATA_SEGMENT;
      if( iGobble<pCsr->nPtr ){
        SegmentPtr *pGobble = &pCsr->aPtr[iGobble];
        if( (pGobble->flags & PGFTR_SKIP_THIS_FLAG)==0 ){
          pMW->aGobble[iGobble] = lsmFsPageNumber(pGobble->pPg);
        }
      }
    }

    /* If this is a separator key and we know that the output pointer has not
    ** changed, there is no point in writing an output record. Otherwise,
    ** proceed. */
    if( rtIsSeparator(eType)==0 || iPtr!=0 ){
      /* Write the record into the main run. */
      if( rc==LSM_OK ){
        rc = mergeWorkerWrite(pMW, eType, pKey, nKey, pCsr, iVal, iPtr);
      }
    }
  }

  /* Advance the cursor to the next input record (assuming one exists). */
  assert( lsmMCursorValid(pMW->pCsr) );
  if( rc==LSM_OK ) rc = lsmMCursorNext(pMW->pCsr);
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
  if( rc!=LSM_OK || pNew->lhs.iFirst==0 ){
    assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 );
    lsmDbSnapshotSetLevel(pDb->pWorker, pNext);
    sortedFreeLevel(pDb->pEnv, pNew);
  }else{
    if( pDel ) pDel->iRoot = 0;

#if 0
    lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "new-toplevel");
#endif

    if( freelist.nEntry ){
      Freelist *p = &pDb->pWorker->freelist;
      lsmFree(pDb->pEnv, p->aEntry);
      memcpy(p, &freelist, sizeof(freelist));
      freelist.aEntry = 0;







|
|







4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
  if( rc!=LSM_OK || pNew->lhs.iFirst==0 ){
    assert( rc!=LSM_OK || pDb->pWorker->freelist.nEntry==0 );
    lsmDbSnapshotSetLevel(pDb->pWorker, pNext);
    sortedFreeLevel(pDb->pEnv, pNew);
  }else{
    if( pDel ) pDel->iRoot = 0;

#if 1
    lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 0, "new-toplevel");
#endif

    if( freelist.nEntry ){
      Freelist *p = &pDb->pWorker->freelist;
      lsmFree(pDb->pEnv, p->aEntry);
      memcpy(p, &freelist, sizeof(freelist));
      freelist.aEntry = 0;
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399

      /* Clean up the MergeWorker object initialized above. If no error
      ** has occurred, invoke the work-hook to inform the application that
      ** the database structure has changed. */
      mergeWorkerShutdown(&mergeworker, &rc);
      if( rc==LSM_OK ) sortedInvokeWorkHook(pDb);

#if 0
      lsmSortedDumpStructure(pDb, pDb->pWorker, 0, 0, "work");
#endif
      assertBtreeOk(pDb, &pLevel->lhs);
      assertRunInOrder(pDb, &pLevel->lhs);

      /* If bFlush is true and the database is no longer considered "full",
      ** break out of the loop even if nRemaining is still greater than
      ** zero. The caller has an in-memory tree to flush to disk.  */







|
|







4500
4501
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515

      /* Clean up the MergeWorker object initialized above. If no error
      ** has occurred, invoke the work-hook to inform the application that
      ** the database structure has changed. */
      mergeWorkerShutdown(&mergeworker, &rc);
      if( rc==LSM_OK ) sortedInvokeWorkHook(pDb);

#if 1
      lsmSortedDumpStructure(pDb, pDb->pWorker, 1, 0, "work");
#endif
      assertBtreeOk(pDb, &pLevel->lhs);
      assertRunInOrder(pDb, &pLevel->lhs);

      /* If bFlush is true and the database is no longer considered "full",
      ** break out of the loop even if nRemaining is still greater than
      ** zero. The caller has an in-memory tree to flush to disk.  */
Changes to test/test_lsm.c.
11
12
13
14
15
16
17


18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
*************************************************************************
**
*/

#include <tcl.h>
#include "lsm.h"
#include "sqlite4.h"



extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite4 **ppDb);
extern const char *sqlite4TestErrorName(int);

/*
** TCLCMD:    sqlite4_lsm_config DB DBNAME PARAM ...
*/
static int test_lsm_config(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Switch {
    const char *zSwitch;







>
>







|







11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
*************************************************************************
**
*/

#include <tcl.h>
#include "lsm.h"
#include "sqlite4.h"
#include <assert.h>
#include <string.h>

extern int getDbPointer(Tcl_Interp *interp, const char *zA, sqlite4 **ppDb);
extern const char *sqlite4TestErrorName(int);

/*
** TCLCMD:    sqlite4_lsm_config DB DBNAME PARAM ...
*/
static int test_sqlite4_lsm_config(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Switch {
    const char *zSwitch;
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
  }
  return TCL_OK;
}

/*
** TCLCMD:    sqlite4_lsm_info DB DBNAME PARAM
*/
static int test_lsm_info(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Switch {
    const char *zSwitch;







|







83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
  }
  return TCL_OK;
}

/*
** TCLCMD:    sqlite4_lsm_info DB DBNAME PARAM
*/
static int test_sqlite4_lsm_info(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Switch {
    const char *zSwitch;
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
  }
  return TCL_OK;
}

/*
** TCLCMD:    sqlite4_lsm_work DB DBNAME ?SWITCHES? ?N?
*/
static int test_lsm_work(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Switch {
    const char *zSwitch;







|







141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
  }
  return TCL_OK;
}

/*
** TCLCMD:    sqlite4_lsm_work DB DBNAME ?SWITCHES? ?N?
*/
static int test_sqlite4_lsm_work(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Switch {
    const char *zSwitch;
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
  Tcl_SetObjResult(interp, Tcl_NewIntObj(nWork));
  return TCL_OK;
}

/*
** TCLCMD:    sqlite4_lsm_checkpoint DB DBNAME 
*/
static int test_lsm_checkpoint(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  const char *zDb;
  const char *zName;







|







208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
  Tcl_SetObjResult(interp, Tcl_NewIntObj(nWork));
  return TCL_OK;
}

/*
** TCLCMD:    sqlite4_lsm_checkpoint DB DBNAME 
*/
static int test_sqlite4_lsm_checkpoint(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  const char *zDb;
  const char *zName;
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
  Tcl_ResetResult(interp);
  return TCL_OK;
}

/*
** TCLCMD:    sqlite4_lsm_flush DB DBNAME 
*/
static int test_lsm_flush(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  const char *zDb;
  const char *zName;







|







246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
  Tcl_ResetResult(interp);
  return TCL_OK;
}

/*
** TCLCMD:    sqlite4_lsm_flush DB DBNAME 
*/
static int test_sqlite4_lsm_flush(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  const char *zDb;
  const char *zName;
284
285
286
287
288
289
290
































































































































































































































































































































































































































291
292
293
294
295
296
297
298
299
300
301

302
303
304
305
306
307
308
309
    Tcl_SetResult(interp, (char *)sqlite4TestErrorName(rc), TCL_STATIC);
    return TCL_ERROR;
  }

  Tcl_ResetResult(interp);
  return TCL_OK;
}

































































































































































































































































































































































































































int SqlitetestLsm_Init(Tcl_Interp *interp){
  struct SyscallCmd {
    const char *zName;
    Tcl_ObjCmdProc *xCmd;
  } aCmd[] = {
    { "sqlite4_lsm_work",       test_lsm_work                },
    { "sqlite4_lsm_checkpoint", test_lsm_checkpoint          },
    { "sqlite4_lsm_flush",      test_lsm_flush               },
    { "sqlite4_lsm_info",       test_lsm_info                },
    { "sqlite4_lsm_config",     test_lsm_config              },

  };
  int i;

  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xCmd, 0, 0);
  }
  return TCL_OK;
}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>






|
|
|
|
|
>








286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
    Tcl_SetResult(interp, (char *)sqlite4TestErrorName(rc), TCL_STATIC);
    return TCL_ERROR;
  }

  Tcl_ResetResult(interp);
  return TCL_OK;
}

typedef struct TclLsmCursor TclLsmCursor;
typedef struct TclLsm TclLsm;

struct TclLsm {
  lsm_db *db;
};

struct TclLsmCursor {
  lsm_cursor *csr;
};

static int test_lsm_error(Tcl_Interp *interp, const char *zApi, int rc){
  char zMsg[64];
  if( rc==LSM_OK ){
    return TCL_OK;
  }

  sprintf(zMsg, "error in %s() - %d", zApi, rc);
  Tcl_ResetResult(interp);
  Tcl_AppendResult(interp, zMsg, 0);
  return TCL_ERROR;
}

static void test_lsm_cursor_del(void *ctx){
  TclLsmCursor *pCsr = (TclLsmCursor *)ctx;
  if( pCsr ){
    lsm_csr_close(pCsr->csr);
    ckfree((char *)pCsr);
  }
}

static void test_lsm_del(void *ctx){
  TclLsm *p = (TclLsm *)ctx;
  if( p ){
    lsm_close(p->db);
    ckfree((char *)p);
  }
}

/*
** Usage: CSR sub-command ...
*/
static int test_lsm_cursor_cmd(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Subcmd {
    const char *zCmd;
    int nArg;
    const char *zUsage;
  } aCmd[] = {
    /* 0 */ {"close",      0, ""},
    /* 1 */ {"seek",       2, "KEY SEEK-TYPE"},
    /* 2 */ {"first",      0, ""},
    /* 3 */ {"last",       0, ""},
    /* 4 */ {"next",       0, ""},
    /* 5 */ {"prev",       0, ""},
    /* 6 */ {"key",        0, ""},
    /* 7 */ {"value",      0, ""},
    /* 8 */ {"valid",      0, ""},
    {0, 0, 0}
  };
  int iCmd;
  int rc;
  TclLsmCursor *pCsr = (TclLsmCursor *)clientData;

  rc = Tcl_GetIndexFromObjStruct(
      interp, objv[1], aCmd, sizeof(aCmd[0]), "sub-command", 0, &iCmd
  );
  if( rc!=TCL_OK ) return rc;
  if( aCmd[iCmd].nArg>=0 && objc!=(2 + aCmd[iCmd].nArg) ){
    Tcl_WrongNumArgs(interp, 2, objv, aCmd[iCmd].zUsage);
    return TCL_ERROR;
  }

  switch( iCmd ){

    case 0: assert( 0==strcmp(aCmd[0].zCmd, "close") ); {
      Tcl_DeleteCommand(interp, Tcl_GetStringFromObj(objv[0], 0));
      return TCL_OK;
    }

    case 1: assert( 0==strcmp(aCmd[1].zCmd, "seek") ); {
      struct Seekbias {
        const char *zBias;
        int eBias;
      } aBias[] = {
        {"eq",     LSM_SEEK_EQ},
        {"le",     LSM_SEEK_LE},
        {"lefast", LSM_SEEK_LEFAST},
        {"ge",     LSM_SEEK_GE},
        {0, 0}
      };
      int iBias;
      const char *zKey; int nKey;
      zKey = Tcl_GetStringFromObj(objv[2], &nKey);

      rc = Tcl_GetIndexFromObjStruct(
          interp, objv[3], aBias, sizeof(aBias[0]), "bias", 0, &iBias
      );
      if( rc!=TCL_OK ) return rc;

      rc = lsm_csr_seek(pCsr->csr, zKey, nKey, aBias[iBias].eBias);
      return test_lsm_error(interp, "lsm_seek", rc);
    }

    case 2: 
    case 3: 
    case 4: 
    case 5: {
      const char *zApi;

      assert( 0==strcmp(aCmd[2].zCmd, "first") );
      assert( 0==strcmp(aCmd[3].zCmd, "last") );
      assert( 0==strcmp(aCmd[4].zCmd, "next") );
      assert( 0==strcmp(aCmd[5].zCmd, "prev") );

      switch( iCmd ){
        case 2: rc = lsm_csr_first(pCsr->csr); zApi = "lsm_csr_first"; break;
        case 3: rc = lsm_csr_last(pCsr->csr);  zApi = "lsm_csr_last";  break;
        case 4: rc = lsm_csr_next(pCsr->csr);  zApi = "lsm_csr_next";  break;
        case 5: rc = lsm_csr_prev(pCsr->csr);  zApi = "lsm_csr_prev";  break;
      }

      return test_lsm_error(interp, zApi, rc);
    }

    case 6: assert( 0==strcmp(aCmd[6].zCmd, "key") ); {
      const void *pKey; int nKey;
      rc = lsm_csr_key(pCsr->csr, &pKey, &nKey);
      if( rc!=LSM_OK ) test_lsm_error(interp, "lsm_csr_key", rc);

      Tcl_SetObjResult(interp, Tcl_NewStringObj((const char *)pKey, nKey));
      return TCL_OK;
    }

    case 7: assert( 0==strcmp(aCmd[7].zCmd, "value") ); {
      const void *pVal; int nVal;
      rc = lsm_csr_value(pCsr->csr, &pVal, &nVal);
      if( rc!=LSM_OK ) test_lsm_error(interp, "lsm_csr_value", rc);

      Tcl_SetObjResult(interp, Tcl_NewStringObj((const char *)pVal, nVal));
      return TCL_OK;
    }

    case 8: assert( 0==strcmp(aCmd[8].zCmd, "valid") ); {
      int bValid = lsm_csr_valid(pCsr->csr);
      Tcl_SetObjResult(interp, Tcl_NewBooleanObj(bValid));
      return TCL_OK;
    }
  }

  Tcl_AppendResult(interp, "internal error", 0);
  return TCL_ERROR;
}

/*
** Usage: DB sub-command ...
*/
static int test_lsm_cmd(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  struct Subcmd {
    const char *zCmd;
    int nArg;
    const char *zUsage;
  } aCmd[] = {
    /* 0 */ {"close",        0, ""},
    /* 1 */ {"write",        2, "KEY VALUE"},
    /* 2 */ {"delete",       1, "KEY"},
    /* 3 */ {"delete_range", 2, "START-KEY END-KEY"},
    /* 4 */ {"begin",        1, "LEVEL"},
    /* 5 */ {"commit",       1, "LEVEL"},
    /* 6 */ {"rollback",     1, "LEVEL"},
    /* 7 */ {"csr_open",     1, "CSR"},
    /* 8 */ {"work",        -1, "NPAGE ?SWITCHES?"},
    {0, 0, 0}
  };
  int iCmd;
  int rc;
  TclLsm *p = (TclLsm *)clientData;

  if( objc<2 ){
    Tcl_WrongNumArgs(interp, 1, objv, "SUB-COMMAND ...");
    return TCL_ERROR;
  }

  rc = Tcl_GetIndexFromObjStruct(
      interp, objv[1], aCmd, sizeof(aCmd[0]), "sub-command", 0, &iCmd
  );
  if( rc!=TCL_OK ) return rc;
  if( aCmd[iCmd].nArg>=0 && objc!=(2 + aCmd[iCmd].nArg) ){
    Tcl_WrongNumArgs(interp, 2, objv, aCmd[iCmd].zUsage);
    return TCL_ERROR;
  }

  switch( iCmd ){

    case 0: assert( 0==strcmp(aCmd[0].zCmd, "close") ); {
      Tcl_DeleteCommand(interp, Tcl_GetStringFromObj(objv[0], 0));
      return TCL_OK;
    }

    case 1: assert( 0==strcmp(aCmd[1].zCmd, "write") ); {
      const char *zKey; int nKey;
      const char *zVal; int nVal;

      zKey = Tcl_GetStringFromObj(objv[2], &nKey);
      zVal = Tcl_GetStringFromObj(objv[3], &nVal);

      rc = lsm_write(p->db, zKey, nKey, zVal, nVal);
      return test_lsm_error(interp, "lsm_write", rc);
    }

    case 2: assert( 0==strcmp(aCmd[2].zCmd, "delete") ); {
      const char *zKey; int nKey;

      zKey = Tcl_GetStringFromObj(objv[2], &nKey);

      rc = lsm_delete(p->db, zKey, nKey);
      return test_lsm_error(interp, "lsm_delete", rc);
    }

    case 3: assert( 0==strcmp(aCmd[3].zCmd, "delete_range") ); {
      const char *zKey1; int nKey1;
      const char *zKey2; int nKey2;

      zKey1 = Tcl_GetStringFromObj(objv[2], &nKey1);
      zKey2 = Tcl_GetStringFromObj(objv[3], &nKey2);

      rc = lsm_delete_range(p->db, zKey1, nKey1, zKey2, nKey2);
      return test_lsm_error(interp, "lsm_delete_range", rc);
    }

    case 4: 
    case 5: 
    case 6: {
      const char *zApi;
      int iLevel;

      rc = Tcl_GetIntFromObj(interp, objv[2], &iLevel);
      if( rc!=TCL_OK ) return rc;

      assert( 0==strcmp(aCmd[4].zCmd, "begin") );
      assert( 0==strcmp(aCmd[5].zCmd, "commit") );
      assert( 0==strcmp(aCmd[6].zCmd, "rollback") );
      switch( iCmd ){
        case 4: rc = lsm_begin(p->db, iLevel); zApi = "lsm_begin"; break;
        case 5: rc = lsm_commit(p->db, iLevel); zApi = "lsm_commit"; break;
        case 6: rc = lsm_rollback(p->db, iLevel); zApi = "lsm_rollback"; break;
      }

      return test_lsm_error(interp, zApi, rc);
    }

    case 7: assert( 0==strcmp(aCmd[7].zCmd, "csr_open") ); {
      const char *zCsr = Tcl_GetString(objv[2]);
      TclLsmCursor *pCsr;

      pCsr = (TclLsmCursor *)ckalloc(sizeof(TclLsmCursor));
      rc = lsm_csr_open(p->db, &pCsr->csr);
      if( rc!=LSM_OK ){
        test_lsm_cursor_del(pCsr);
        return test_lsm_error(interp, "lsm_csr_open", rc);
      }

      Tcl_CreateObjCommand(
          interp, zCsr, test_lsm_cursor_cmd, 
          (ClientData)pCsr, test_lsm_cursor_del
      );
      Tcl_SetObjResult(interp, objv[2]);
      return TCL_OK;
    }

    case 8: assert( 0==strcmp(aCmd[8].zCmd, "work") ); {
      int nWork;
      int nWrite = 0;
      int flags = 0;
      int i;

      rc = Tcl_GetIntFromObj(interp, objv[2], &nWork);
      if( rc!=TCL_OK ) return rc;

      for(i=3; i<objc; i++){
        int iOpt;
        const char *azOpt[] = { "-optimize", "-flush", 0 };

        rc = Tcl_GetIndexFromObj(interp, objv[i], azOpt, "option", 0, &iOpt);
        if( rc!=TCL_OK ) return rc;

        if( iOpt==0 ) flags |= LSM_WORK_OPTIMIZE;
        if( iOpt==1 ) flags |= LSM_WORK_FLUSH;
      }

      rc = lsm_work(p->db, flags, nWork, &nWrite);
      if( rc!=LSM_OK ) return test_lsm_error(interp, "lsm_work", rc);
      Tcl_SetObjResult(interp, Tcl_NewIntObj(nWrite));
      return TCL_OK;
    }


    default:
      assert( 0 );
  }

  Tcl_AppendResult(interp, "internal error", 0);
  return TCL_ERROR;
}

static void xLog(void *pCtx, int rc, const char *z){
  (void)(rc);
  (void)(pCtx);
  fprintf(stderr, "%s\n", z);
  fflush(stderr);
}

/*
** Usage: lsm_open DB filename ?config?
*/
static int test_lsm_open(
  void * clientData,
  Tcl_Interp *interp,
  int objc,
  Tcl_Obj *CONST objv[]
){
  TclLsm *p;
  int rc;
  const char *zDb = 0;
  const char *zFile = 0;

  if( objc!=3 && objc!=4 ){
    Tcl_WrongNumArgs(interp, 1, objv, "DB FILENAME ?CONFIG?");
    return TCL_ERROR;
  }

  zDb = Tcl_GetString(objv[1]);
  zFile = Tcl_GetString(objv[2]);

  p = (TclLsm *)ckalloc(sizeof(TclLsm));
  rc = lsm_new(0, &p->db);
  if( rc!=LSM_OK ){
    test_lsm_del((void *)p);
    test_lsm_error(interp, "lsm_new", rc);
    return TCL_ERROR;
  }

  if( objc==4 ){
    struct Lsmconfig {
      const char *zOpt;
      int eOpt;
    } aConfig[] = {
      { "write_buffer",     LSM_CONFIG_WRITE_BUFFER },
      { "page_size",        LSM_CONFIG_PAGE_SIZE },
      { "block_size",       LSM_CONFIG_BLOCK_SIZE },
      { "safety",           LSM_CONFIG_SAFETY },
      { "autowork",         LSM_CONFIG_AUTOWORK },
      { "autocheckpoint",   LSM_CONFIG_AUTOCHECKPOINT },
      { "log_size",         LSM_CONFIG_LOG_SIZE },
      { "mmap",             LSM_CONFIG_MMAP },
      { "use_log",          LSM_CONFIG_USE_LOG },
      { "nmerge",           LSM_CONFIG_NMERGE },
      { "max_freelist",     LSM_CONFIG_MAX_FREELIST },
      { "multi_proc",       LSM_CONFIG_MULTIPLE_PROCESSES },
      { 0, 0 }
    };
    int nElem;
    int i;
    Tcl_Obj **apElem;

    rc = Tcl_ListObjGetElements(interp, objv[3], &nElem, &apElem);
    for(i=0; rc==TCL_OK && i<nElem; i+=2){
      int iOpt;
      rc = Tcl_GetIndexFromObjStruct(
          interp, apElem[i], aConfig, sizeof(aConfig[0]), "option", 0, &iOpt
      );
      if( rc==TCL_OK ){
        if( i==(nElem-1) ){
          Tcl_ResetResult(interp);
          Tcl_AppendResult(interp, "option \"", Tcl_GetString(apElem[i]), 
              "\" requires an argument", 0
          );
          rc = TCL_ERROR;
        }else{
          int iVal;
          rc = Tcl_GetIntFromObj(interp, apElem[i+1], &iVal);
          if( rc==TCL_OK ){
            lsm_config(p->db, aConfig[iOpt].eOpt, &iVal);
          }
        }
      }
    }
    if( rc!=TCL_OK ){ 
      test_lsm_del((void *)p);
      return rc;
    }
  }

  lsm_config_log(p->db, xLog, 0);

  rc = lsm_open(p->db, zFile);
  if( rc!=LSM_OK ){
    test_lsm_del((void *)p);
    test_lsm_error(interp, "lsm_open", rc);
    return TCL_ERROR;
  }

  Tcl_CreateObjCommand(interp, zDb, test_lsm_cmd, (ClientData)p, test_lsm_del);
  Tcl_SetObjResult(interp, objv[1]);
  return TCL_OK;
}

int SqlitetestLsm_Init(Tcl_Interp *interp){
  struct SyscallCmd {
    const char *zName;
    Tcl_ObjCmdProc *xCmd;
  } aCmd[] = {
    { "sqlite4_lsm_work",       test_sqlite4_lsm_work                },
    { "sqlite4_lsm_checkpoint", test_sqlite4_lsm_checkpoint          },
    { "sqlite4_lsm_flush",      test_sqlite4_lsm_flush               },
    { "sqlite4_lsm_info",       test_sqlite4_lsm_info                },
    { "sqlite4_lsm_config",     test_sqlite4_lsm_config              },
    { "lsm_open",               test_lsm_open                        },
  };
  int i;

  for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
    Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xCmd, 0, 0);
  }
  return TCL_OK;
}
Changes to tool/lsmperf.tcl.
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

  set lRes [list]
  foreach {name sys} $lSys {
    set wt [list -w $nWrite -r $nRepeat -f $nFetch -system $sys]
    lappend lRes [exec_lsmtest_speed $nSec $wt]
    if {$sys != [lindex $lSys end]} {
      puts "Sleeping 20 seconds..."
      #after 20000
    }
  }

  # Set up the header part of the gnuplot script.
  #
  set xmax 0
  foreach res $lRes {







|







89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

  set lRes [list]
  foreach {name sys} $lSys {
    set wt [list -w $nWrite -r $nRepeat -f $nFetch -system $sys]
    lappend lRes [exec_lsmtest_speed $nSec $wt]
    if {$sys != [lindex $lSys end]} {
      puts "Sleeping 20 seconds..."
      after 20000
    }
  }

  # Set up the header part of the gnuplot script.
  #
  set xmax 0
  foreach res $lRes {
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
  append script $data4

  append script "pause -1\n"
  exec_gnuplot_script $script $zPng
}

do_write_test x.png 600 50000 50000 20 {
 lsm-st     "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1"
 lsm-st2    "page_size=1024 mmap=1 multi_proc=0 safety=1 threads=1 autowork=1"
}

# lsm-mt    "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=8192000"
# lsm-mt     "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0"
# lsm-st     "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1"
# lsm-mt     "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0"
# lsm-mt     "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0"







|
<







187
188
189
190
191
192
193
194

195
196
197
198
199
200
201
  append script $data4

  append script "pause -1\n"
  exec_gnuplot_script $script $zPng
}

do_write_test x.png 600 50000 50000 20 {
  lsm-mt-1 "mmap=1 multi_proc=0 safety=0 threads=3 autowork=0 block_size=1M"

}

# lsm-mt    "mmap=1 multi_proc=0 threads=2 autowork=0 autocheckpoint=8192000"
# lsm-mt     "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0"
# lsm-st     "mmap=1 multi_proc=0 safety=1 threads=1 autowork=1"
# lsm-mt     "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0"
# lsm-mt     "mmap=1 multi_proc=0 safety=1 threads=3 autowork=0"