SQLite

Changes On Branch mmap-experimental
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Changes In Branch mmap-experimental Excluding Merge-Ins

This is equivalent to a diff from 9f839ac0 to 638a39bb

2011-11-16
18:08
Merge the PCACHE2 changes into trunk. (check-in: 457513f2 user: drh tags: trunk)
2011-11-14
12:34
Further work on mmap(). Still does not work right - autovacuum tests are the first to fail. (Closed-Leaf check-in: 638a39bb user: drh tags: mmap-experimental)
01:55
Begin making experimental changes to use mmap() for reading content from a database. The code compiles, but crashes on the test suite. (check-in: 09be42d5 user: drh tags: mmap-experimental)
2011-11-13
21:44
Add a version number to the sqlite3_pcache_methods2 object. Other PCACHE2 documentation improvements. (Closed-Leaf check-in: 9f839ac0 user: drh tags: experimental-pcache)
2011-11-12
23:10
Attempt to modify btree.c so that it assumes that calls to sqlite3PagerWrite() will reallocate the page buffer. As there is not good way to test this assumption yet, probably a few spots were missed. (check-in: ceee03c7 user: drh tags: experimental-pcache)

Changes to src/btree.c.

723
724
725
726
727
728
729




















730
731
732
733
734
735
736
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  if( pCur->eState!=CURSOR_VALID || pCur->skipNext!=0 ){
    *pHasMoved = 1;
  }else{
    *pHasMoved = 0;
  }
  return SQLITE_OK;
}

/*
** Set up the correct data pointers for a MemPage
*/
static u8 *btreeGetData(MemPage *pPage){
  pPage->aData = sqlite3PagerGetData(pPage->pDbPage);
  pPage->aDataEnd = &pPage->aData[pPage->pBt->usableSize];
  pPage->aCellIdx = &pPage->aData[pPage->cellOffset];
  return pPage->aData;
}

/*
** Make a btree page is writable.
*/
static int btreeMakePageWriteable(MemPage *pPage){
  int rc;
  rc = sqlite3PagerWrite(pPage->pDbPage);
  btreeGetData(pPage);
  return rc;
}

#ifndef SQLITE_OMIT_AUTOVACUUM
/*
** Given a page number of a regular database page, return the page
** number for the pointer-map page that contains the entry for the
** input page number.
**
793
794
795
796
797
798
799

800
801
802
803
804
805
806
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827







+







  }
  assert( offset <= (int)pBt->usableSize-5 );
  pPtrmap = (u8 *)sqlite3PagerGetData(pDbPage);

  if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
    TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
    *pRC= rc = sqlite3PagerWrite(pDbPage);
    pPtrmap = sqlite3PagerGetData(pDbPage);
    if( rc==SQLITE_OK ){
      pPtrmap[offset] = eType;
      put4byte(&pPtrmap[offset+1], parent);
    }
  }

ptrmap_exit:
1541
1542
1543
1544
1545
1546
1547
1548

1549
1550
1551
1552
1553
1554
1555
1562
1563
1564
1565
1566
1567
1568

1569
1570
1571
1572
1573
1574
1575
1576







-
+







/*
** Get a page from the pager.  Initialize the MemPage.pBt and
** MemPage.aData elements if needed.
**
** If the noContent flag is set, it means that we do not care about
** the content of the page at this time.  So do not go to the disk
** to fetch the content.  Just fill in the content with zeros for now.
** If in the future we call sqlite3PagerWrite() on this page, that
** If in the future we call btreeMakePageWriteable() on this page, that
** means we have started to be concerned about content and the disk
** read should occur at that point.
*/
static int btreeGetPage(
  BtShared *pBt,       /* The btree */
  Pgno pgno,           /* Number of the page to fetch */
  MemPage **ppPage,    /* Return the page in this parameter */
1631
1632
1633
1634
1635
1636
1637
1638

1639
1640
1641
1642
1643
1644
1645
1652
1653
1654
1655
1656
1657
1658

1659
1660
1661
1662
1663
1664
1665
1666







-
+







** call to btreeGetPage.
*/
static void releasePage(MemPage *pPage){
  if( pPage ){
    assert( pPage->aData );
    assert( pPage->pBt );
    assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage );
    assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData );
    /* assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); */
    assert( sqlite3_mutex_held(pPage->pBt->mutex) );
    sqlite3PagerUnref(pPage->pDbPage);
  }
}

/*
** During a rollback, when the pager reloads information into the cache
2306
2307
2308
2309
2310
2311
2312

2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323

2324

2325
2326

2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346

2347
2348

2349
2350
2351
2352
2353
2354

2355
2356
2357
2358
2359
2360
2361







+











+
-
+

-
+





-







*/
static int lockBtree(BtShared *pBt){
  int rc;              /* Result code from subfunctions */
  MemPage *pPage1;     /* Page 1 of the database file */
  int nPage;           /* Number of pages in the database */
  int nPageFile = 0;   /* Number of pages in the database file */
  int nPageHeader;     /* Number of pages in the database according to hdr */
  u8 *page1;           /* Content of page 1 */

  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( pBt->pPage1==0 );
  rc = sqlite3PagerSharedLock(pBt->pPager);
  if( rc!=SQLITE_OK ) return rc;
  rc = btreeGetPage(pBt, 1, &pPage1, 0);
  if( rc!=SQLITE_OK ) return rc;

  /* Do some checking to help insure the file we opened really is
  ** a valid database file. 
  */
  page1 = btreeGetData(pPage1);
  nPage = nPageHeader = get4byte(28+(u8*)pPage1->aData);
  nPage = nPageHeader = get4byte(&page1[28]);
  sqlite3PagerPagecount(pBt->pPager, &nPageFile);
  if( nPage==0 || memcmp(24+(u8*)pPage1->aData, 92+(u8*)pPage1->aData,4)!=0 ){
  if( nPage==0 || memcmp(24+page1, 92+page1, 4)!=0 ){
    nPage = nPageFile;
  }
  if( nPage>0 ){
    u32 pageSize;
    u32 usableSize;
    u8 *page1 = pPage1->aData;
    rc = SQLITE_NOTADB;
    if( memcmp(page1, zMagicHeader, 16)!=0 ){
      goto page1_init_failed;
    }

#ifdef SQLITE_OMIT_WAL
    if( page1[18]>1 ){
2478
2479
2480
2481
2482
2483
2484
2485

2486
2487

2488
2489
2490
2491
2492
2493
2494
2500
2501
2502
2503
2504
2505
2506

2507
2508

2509
2510
2511
2512
2513
2514
2515
2516







-
+

-
+








  assert( sqlite3_mutex_held(pBt->mutex) );
  if( pBt->nPage>0 ){
    return SQLITE_OK;
  }
  pP1 = pBt->pPage1;
  assert( pP1!=0 );
  rc = sqlite3PagerWrite(pP1->pDbPage);
  rc = btreeMakePageWriteable(pP1);
  if( rc ) return rc;
  data = pP1->aData;
  data = btreeGetData(pP1);
  memcpy(data, zMagicHeader, sizeof(zMagicHeader));
  assert( sizeof(zMagicHeader)==16 );
  data[16] = (u8)((pBt->pageSize>>8)&0xff);
  data[17] = (u8)((pBt->pageSize>>16)&0xff);
  data[18] = 1;
  data[19] = 1;
  assert( pBt->usableSize<=pBt->pageSize && pBt->usableSize+255>=pBt->pageSize);
2650
2651
2652
2653
2654
2655
2656

2657
2658

2659
2660
2661
2662
2663
2664
2665
2672
2673
2674
2675
2676
2677
2678
2679
2680

2681
2682
2683
2684
2685
2686
2687
2688







+

-
+








      /* If the db-size header field is incorrect (as it may be if an old
      ** client has been writing the database file), update it now. Doing
      ** this sooner rather than later means the database size can safely 
      ** re-read the database size from page 1 if a savepoint or transaction
      ** rollback occurs within the transaction.
      */
      btreeGetData(pPage1);
      if( pBt->nPage!=get4byte(&pPage1->aData[28]) ){
        rc = sqlite3PagerWrite(pPage1->pDbPage);
        rc = btreeMakePageWriteable(pPage1);
        if( rc==SQLITE_OK ){
          put4byte(&pPage1->aData[28], pBt->nPage);
        }
      }
    }
  }

2850
2851
2852
2853
2854
2855
2856
2857

2858
2859
2860
2861
2862
2863
2864
2873
2874
2875
2876
2877
2878
2879

2880
2881
2882
2883
2884
2885
2886
2887







-
+







  ** iPtrPage.
  */
  if( eType!=PTRMAP_ROOTPAGE ){
    rc = btreeGetPage(pBt, iPtrPage, &pPtrPage, 0);
    if( rc!=SQLITE_OK ){
      return rc;
    }
    rc = sqlite3PagerWrite(pPtrPage->pDbPage);
    rc = btreeMakePageWriteable(pPtrPage);
    if( rc!=SQLITE_OK ){
      releasePage(pPtrPage);
      return rc;
    }
    rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
    releasePage(pPtrPage);
    if( rc==SQLITE_OK ){
2896
2897
2898
2899
2900
2901
2902

2903
2904
2905
2906
2907
2908
2909
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933







+







  assert( sqlite3_mutex_held(pBt->mutex) );
  assert( iLastPg>nFin );

  if( !PTRMAP_ISPAGE(pBt, iLastPg) && iLastPg!=PENDING_BYTE_PAGE(pBt) ){
    u8 eType;
    Pgno iPtrPage;

    btreeGetData(pBt->pPage1);
    nFreeList = get4byte(&pBt->pPage1->aData[36]);
    if( nFreeList==0 ){
      return SQLITE_DONE;
    }

    rc = ptrmapGet(pBt, iLastPg, &eType, &iPtrPage);
    if( rc!=SQLITE_OK ){
2952
2953
2954
2955
2956
2957
2958
2959

2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979

2980
2981
2982
2983
2984
2985
2986
2976
2977
2978
2979
2980
2981
2982

2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002

3003
3004
3005
3006
3007
3008
3009
3010







-
+



















-
+







          releasePage(pLastPg);
          return rc;
        }
        releasePage(pFreePg);
      }while( nFin!=0 && iFreePg>nFin );
      assert( iFreePg<iLastPg );
      
      rc = sqlite3PagerWrite(pLastPg->pDbPage);
      rc = btreeMakePageWriteable(pLastPg);
      if( rc==SQLITE_OK ){
        rc = relocatePage(pBt, pLastPg, eType, iPtrPage, iFreePg, nFin!=0);
      }
      releasePage(pLastPg);
      if( rc!=SQLITE_OK ){
        return rc;
      }
    }
  }

  if( nFin==0 ){
    iLastPg--;
    while( iLastPg==PENDING_BYTE_PAGE(pBt)||PTRMAP_ISPAGE(pBt, iLastPg) ){
      if( PTRMAP_ISPAGE(pBt, iLastPg) ){
        MemPage *pPg;
        rc = btreeGetPage(pBt, iLastPg, &pPg, 0);
        if( rc!=SQLITE_OK ){
          return rc;
        }
        rc = sqlite3PagerWrite(pPg->pDbPage);
        rc = btreeMakePageWriteable(pPg);
        releasePage(pPg);
        if( rc!=SQLITE_OK ){
          return rc;
        }
      }
      iLastPg--;
    }
3006
3007
3008
3009
3010
3011
3012
3013

3014
3015
3016
3017
3018
3019
3020
3030
3031
3032
3033
3034
3035
3036

3037
3038
3039
3040
3041
3042
3043
3044







-
+







  assert( pBt->inTransaction==TRANS_WRITE && p->inTrans==TRANS_WRITE );
  if( !pBt->autoVacuum ){
    rc = SQLITE_DONE;
  }else{
    invalidateAllOverflowCache(pBt);
    rc = incrVacuumStep(pBt, 0, btreePagecount(pBt));
    if( rc==SQLITE_OK ){
      rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
      rc = btreeMakePageWriteable(pBt->pPage1);
      put4byte(&pBt->pPage1->aData[28], pBt->nPage);
    }
  }
  sqlite3BtreeLeave(p);
  return rc;
}

3048
3049
3050
3051
3052
3053
3054

3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071

3072
3073
3074
3075
3076
3077
3078
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095

3096
3097
3098
3099
3100
3101
3102
3103







+
















-
+







      /* It is not possible to create a database for which the final page
      ** is either a pointer-map page or the pending-byte page. If one
      ** is encountered, this indicates corruption.
      */
      return SQLITE_CORRUPT_BKPT;
    }

    btreeGetData(pBt->pPage1);
    nFree = get4byte(&pBt->pPage1->aData[36]);
    nEntry = pBt->usableSize/5;
    nPtrmap = (nFree-nOrig+PTRMAP_PAGENO(pBt, nOrig)+nEntry)/nEntry;
    nFin = nOrig - nFree - nPtrmap;
    if( nOrig>PENDING_BYTE_PAGE(pBt) && nFin<PENDING_BYTE_PAGE(pBt) ){
      nFin--;
    }
    while( PTRMAP_ISPAGE(pBt, nFin) || nFin==PENDING_BYTE_PAGE(pBt) ){
      nFin--;
    }
    if( nFin>nOrig ) return SQLITE_CORRUPT_BKPT;

    for(iFree=nOrig; iFree>nFin && rc==SQLITE_OK; iFree--){
      rc = incrVacuumStep(pBt, nFin, iFree);
    }
    if( (rc==SQLITE_DONE || rc==SQLITE_OK) && nFree>0 ){
      rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
      rc = btreeMakePageWriteable(pBt->pPage1);
      put4byte(&pBt->pPage1->aData[32], 0);
      put4byte(&pBt->pPage1->aData[36], 0);
      put4byte(&pBt->pPage1->aData[28], nFin);
      sqlite3PagerTruncateImage(pBt->pPager, nFin);
      pBt->nPage = nFin;
    }
    if( rc!=SQLITE_OK ){
3407
3408
3409
3410
3411
3412
3413
3414

3415
3416
3417
3418
3419
3420
3421
3432
3433
3434
3435
3436
3437
3438

3439
3440
3441
3442
3443
3444
3445
3446







-
+







    assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK );
    assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) );
    sqlite3BtreeEnter(p);
    rc = sqlite3PagerSavepoint(pBt->pPager, op, iSavepoint);
    if( rc==SQLITE_OK ){
      if( iSavepoint<0 && pBt->initiallyEmpty ) pBt->nPage = 0;
      rc = newDatabase(pBt);
      pBt->nPage = get4byte(28 + pBt->pPage1->aData);
      pBt->nPage = get4byte(28 + btreeGetData(pBt->pPage1));

      /* The database size was written into the offset 28 of the header
      ** when the transaction started, so we know that the value at offset
      ** 28 is nonzero. */
      assert( pBt->nPage>0 );
    }
    sqlite3BtreeLeave(p);
3844
3845
3846
3847
3848
3849
3850
3851

3852
3853
3854
3855
3856
3857
3858
3869
3870
3871
3872
3873
3874
3875

3876
3877
3878
3879
3880
3881
3882
3883







-
+







  /* Check if data must be read/written to/from the btree page itself. */
  if( offset<pCur->info.nLocal ){
    int a = amt;
    if( a+offset>pCur->info.nLocal ){
      a = pCur->info.nLocal - offset;
    }
    if( eOp ){
      if( (rc = sqlite3PagerWrite(pPage->pDbPage))!=SQLITE_OK ) return rc;
      if( (rc = btreeMakePageWriteable(pPage))!=SQLITE_OK ) return rc;
      getCellInfo(pCur);
      aPayload = pCur->info.pCell + pCur->info.nHeader;
      memcpy(aPayload+offset, pBuf, a);
    }else{
      memcpy(pBuf, aPayload+offset, a);
    }
    offset = 0;
3946
3947
3948
3949
3950
3951
3952
3953

3954
3955
3956
3957
3958
3959
3960
3971
3972
3973
3974
3975
3976
3977

3978
3979
3980
3981
3982
3983
3984
3985







-
+







        ** output buffer, bypassing the page-cache altogether. This speeds
        ** up loading large records that span many overflow pages.
        */
        if( eOp==0                                             /* (1) */
         && offset==0                                          /* (2) */
         && pBt->inTransaction==TRANS_READ                     /* (4) */
         && (fd = sqlite3PagerFile(pBt->pPager))->pMethods     /* (3) */
         && pBt->pPage1->aData[19]==0x01                       /* (5) */
         && btreeGetData(pBt->pPage1)[19]==0x01                /* (5) */
        ){
          u8 aSave[4];
          u8 *aWrite = &pBuf[-4];
          memcpy(aSave, aWrite, 4);
          rc = sqlite3OsRead(fd, aWrite, a+4, pBt->pageSize * (nextPage-1));
          nextPage = get4byte(aWrite);
          memcpy(aWrite, aSave, 4);
4772
4773
4774
4775
4776
4777
4778
4779

4780
4781
4782
4783
4784
4785
4786
4797
4798
4799
4800
4801
4802
4803

4804
4805
4806
4807
4808
4809
4810
4811







-
+







  *pRes = 0;
  return rc;
}

/*
** Allocate a new page from the database file.
**
** The new page is marked as dirty.  (In other words, sqlite3PagerWrite()
** The new page is marked as dirty.  (In other words, btreeMakePageWriteable()
** has already been called on the new page.)  The new page has also
** been referenced and the calling routine is responsible for calling
** sqlite3PagerUnref() on the new page when it is done.
**
** SQLITE_OK is returned on success.  Any other return value indicates
** an error.  *ppPage and *pPgno are undefined in the event of an error.
** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned.
4808
4809
4810
4811
4812
4813
4814
4815

4816
4817
4818
4819
4820
4821
4822
4833
4834
4835
4836
4837
4838
4839

4840
4841
4842
4843
4844
4845
4846
4847







-
+







  MemPage *pTrunk = 0;
  MemPage *pPrevTrunk = 0;
  Pgno mxPage;     /* Total size of the database file */

  assert( sqlite3_mutex_held(pBt->mutex) );
  pPage1 = pBt->pPage1;
  mxPage = btreePagecount(pBt);
  n = get4byte(&pPage1->aData[36]);
  n = get4byte(&btreeGetData(pPage1)[36]);
  testcase( n==mxPage-1 );
  if( n>=mxPage ){
    return SQLITE_CORRUPT_BKPT;
  }
  if( n>0 ){
    /* There are pages on the freelist.  Reuse one of those pages. */
    Pgno iTrunk;
4839
4840
4841
4842
4843
4844
4845
4846

4847
4848
4849
4850
4851
4852
4853
4864
4865
4866
4867
4868
4869
4870

4871
4872
4873
4874
4875
4876
4877
4878







-
+







      *pPgno = nearby;
    }
#endif

    /* Decrement the free-list count by 1. Set iTrunk to the index of the
    ** first free-list trunk page. iPrevTrunk is initially 1.
    */
    rc = sqlite3PagerWrite(pPage1->pDbPage);
    rc = btreeMakePageWriteable(pPage1);
    if( rc ) return rc;
    put4byte(&pPage1->aData[36], n-1);

    /* The code within this loop is run only once if the 'searchList' variable
    ** is not true. Otherwise, it runs once for each trunk-page on the
    ** free-list until the page 'nearby' is located.
    */
4873
4874
4875
4876
4877
4878
4879
4880

4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901

4902
4903
4904
4905
4906
4907
4908
4909

4910
4911
4912
4913
4914
4915
4916
4898
4899
4900
4901
4902
4903
4904

4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925

4926
4927
4928
4929
4930
4931
4932
4933

4934
4935
4936
4937
4938
4939
4940
4941







-
+




















-
+







-
+








      k = get4byte(&pTrunk->aData[4]); /* # of leaves on this trunk page */
      if( k==0 && !searchList ){
        /* The trunk has no leaves and the list is not being searched. 
        ** So extract the trunk page itself and use it as the newly 
        ** allocated page */
        assert( pPrevTrunk==0 );
        rc = sqlite3PagerWrite(pTrunk->pDbPage);
        rc = btreeMakePageWriteable(pTrunk);
        if( rc ){
          goto end_allocate_page;
        }
        *pPgno = iTrunk;
        memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
        *ppPage = pTrunk;
        pTrunk = 0;
        TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
      }else if( k>(u32)(pBt->usableSize/4 - 2) ){
        /* Value of k is out of range.  Database corruption */
        rc = SQLITE_CORRUPT_BKPT;
        goto end_allocate_page;
#ifndef SQLITE_OMIT_AUTOVACUUM
      }else if( searchList && nearby==iTrunk ){
        /* The list is being searched and this trunk page is the page
        ** to allocate, regardless of whether it has leaves.
        */
        assert( *pPgno==iTrunk );
        *ppPage = pTrunk;
        searchList = 0;
        rc = sqlite3PagerWrite(pTrunk->pDbPage);
        rc = btreeMakePageWriteable(pTrunk);
        if( rc ){
          goto end_allocate_page;
        }
        if( k==0 ){
          if( !pPrevTrunk ){
            memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
          }else{
            rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
            rc = btreeMakePageWriteable(pPrevTrunk);
            if( rc!=SQLITE_OK ){
              goto end_allocate_page;
            }
            memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
          }
        }else{
          /* The trunk page is required by the caller but it contains 
4924
4925
4926
4927
4928
4929
4930
4931

4932
4933
4934
4935
4936
4937
4938
4939
4940
4941
4942
4943
4944

4945
4946
4947
4948
4949
4950
4951
4949
4950
4951
4952
4953
4954
4955

4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968

4969
4970
4971
4972
4973
4974
4975
4976







-
+












-
+







            goto end_allocate_page;
          }
          testcase( iNewTrunk==mxPage );
          rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0);
          if( rc!=SQLITE_OK ){
            goto end_allocate_page;
          }
          rc = sqlite3PagerWrite(pNewTrunk->pDbPage);
          rc = btreeMakePageWriteable(pNewTrunk);
          if( rc!=SQLITE_OK ){
            releasePage(pNewTrunk);
            goto end_allocate_page;
          }
          memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
          put4byte(&pNewTrunk->aData[4], k-1);
          memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
          releasePage(pNewTrunk);
          if( !pPrevTrunk ){
            assert( sqlite3PagerIswriteable(pPage1->pDbPage) );
            put4byte(&pPage1->aData[32], iNewTrunk);
          }else{
            rc = sqlite3PagerWrite(pPrevTrunk->pDbPage);
            rc = btreeMakePageWriteable(pPrevTrunk);
            if( rc ){
              goto end_allocate_page;
            }
            put4byte(&pPrevTrunk->aData[0], iNewTrunk);
          }
        }
        pTrunk = 0;
4981
4982
4983
4984
4985
4986
4987
4988

4989

4990
4991
4992
4993
4994
4995
4996
4997

4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011

5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027

5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041

5042
5043
5044
5045
5046
5047
5048
5006
5007
5008
5009
5010
5011
5012

5013
5014
5015
5016
5017
5018
5019
5020
5021
5022

5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036

5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052

5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066

5067
5068
5069
5070
5071
5072
5073
5074







-
+

+







-
+













-
+















-
+













-
+







        testcase( iPage==mxPage );
        if( !searchList || iPage==nearby ){
          int noContent;
          *pPgno = iPage;
          TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
                 ": %d more free pages\n",
                 *pPgno, closest+1, k, pTrunk->pgno, n-1));
          rc = sqlite3PagerWrite(pTrunk->pDbPage);
          rc = btreeMakePageWriteable(pTrunk);
          if( rc ) goto end_allocate_page;
          aData = pTrunk->aData;
          if( closest<k-1 ){
            memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
          }
          put4byte(&aData[4], k-1);
          noContent = !btreeGetHasContent(pBt, *pPgno);
          rc = btreeGetPage(pBt, *pPgno, ppPage, noContent);
          if( rc==SQLITE_OK ){
            rc = sqlite3PagerWrite((*ppPage)->pDbPage);
            rc = btreeMakePageWriteable(*ppPage);
            if( rc!=SQLITE_OK ){
              releasePage(*ppPage);
            }
          }
          searchList = 0;
        }
      }
      releasePage(pPrevTrunk);
      pPrevTrunk = 0;
    }while( searchList );
  }else{
    /* There are no pages on the freelist, so create a new page at the
    ** end of the file */
    rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
    rc = btreeMakePageWriteable(pBt->pPage1);
    if( rc ) return rc;
    pBt->nPage++;
    if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++;

#ifndef SQLITE_OMIT_AUTOVACUUM
    if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, pBt->nPage) ){
      /* If *pPgno refers to a pointer-map page, allocate two new pages
      ** at the end of the file instead of one. The first allocated page
      ** becomes a new pointer-map page, the second is used by the caller.
      */
      MemPage *pPg = 0;
      TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage));
      assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) );
      rc = btreeGetPage(pBt, pBt->nPage, &pPg, 1);
      if( rc==SQLITE_OK ){
        rc = sqlite3PagerWrite(pPg->pDbPage);
        rc = btreeMakePageWriteable(pPg);
        releasePage(pPg);
      }
      if( rc ) return rc;
      pBt->nPage++;
      if( pBt->nPage==PENDING_BYTE_PAGE(pBt) ){ pBt->nPage++; }
    }
#endif
    put4byte(28 + (u8*)pBt->pPage1->aData, pBt->nPage);
    *pPgno = pBt->nPage;

    assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
    rc = btreeGetPage(pBt, *pPgno, ppPage, 1);
    if( rc ) return rc;
    rc = sqlite3PagerWrite((*ppPage)->pDbPage);
    rc = btreeMakePageWriteable(*ppPage);
    if( rc!=SQLITE_OK ){
      releasePage(*ppPage);
    }
    TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
  }

  assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
5091
5092
5093
5094
5095
5096
5097
5098

5099
5100
5101
5102
5103
5104
5105
5106
5107
5108

5109
5110
5111
5112
5113
5114
5115
5117
5118
5119
5120
5121
5122
5123

5124
5125
5126
5127
5128
5129
5130
5131
5132
5133

5134
5135
5136
5137
5138
5139
5140
5141







-
+









-
+







    pPage = pMemPage;
    sqlite3PagerRef(pPage->pDbPage);
  }else{
    pPage = btreePageLookup(pBt, iPage);
  }

  /* Increment the free page count on pPage1 */
  rc = sqlite3PagerWrite(pPage1->pDbPage);
  rc = btreeMakePageWriteable(pPage1);
  if( rc ) goto freepage_out;
  nFree = get4byte(&pPage1->aData[36]);
  put4byte(&pPage1->aData[36], nFree+1);

  if( pBt->secureDelete ){
    /* If the secure_delete option is enabled, then
    ** always fully overwrite deleted information with zeros.
    */
    if( (!pPage && ((rc = btreeGetPage(pBt, iPage, &pPage, 0))!=0) )
     ||            ((rc = sqlite3PagerWrite(pPage->pDbPage))!=0)
     ||            ((rc = btreeMakePageWriteable(pPage))!=0)
    ){
      goto freepage_out;
    }
    memset(pPage->aData, 0, pPage->pBt->pageSize);
  }

  /* If the database supports auto-vacuum, write an entry in the pointer-map
5153
5154
5155
5156
5157
5158
5159
5160

5161
5162
5163
5164
5165
5166
5167
5179
5180
5181
5182
5183
5184
5185

5186
5187
5188
5189
5190
5191
5192
5193







-
+







      ** usableSize/4 - 8 entries will be reported as corrupt.  In order
      ** to maintain backwards compatibility with older versions of SQLite,
      ** we will continue to restrict the number of entries to usableSize/4 - 8
      ** for now.  At some point in the future (once everyone has upgraded
      ** to 3.6.0 or later) we should consider fixing the conditional above
      ** to read "usableSize/4-2" instead of "usableSize/4-8".
      */
      rc = sqlite3PagerWrite(pTrunk->pDbPage);
      rc = btreeMakePageWriteable(pTrunk);
      if( rc==SQLITE_OK ){
        put4byte(&pTrunk->aData[4], nLeaf+1);
        put4byte(&pTrunk->aData[8+nLeaf*4], iPage);
        if( pPage && !pBt->secureDelete ){
          sqlite3PagerDontWrite(pPage->pDbPage);
        }
        rc = btreeSetHasContent(pBt, iPage);
5176
5177
5178
5179
5180
5181
5182
5183

5184
5185
5186
5187
5188
5189
5190
5202
5203
5204
5205
5206
5207
5208

5209
5210
5211
5212
5213
5214
5215
5216







-
+







  ** Possibly because the free-list is empty, or possibly because the 
  ** first trunk in the free-list is full. Either way, the page being freed
  ** will become the new first trunk page in the free-list.
  */
  if( pPage==0 && SQLITE_OK!=(rc = btreeGetPage(pBt, iPage, &pPage, 0)) ){
    goto freepage_out;
  }
  rc = sqlite3PagerWrite(pPage->pDbPage);
  rc = btreeMakePageWriteable(pPage);
  if( rc!=SQLITE_OK ){
    goto freepage_out;
  }
  put4byte(pPage->aData, iTrunk);
  put4byte(&pPage->aData[4], 0);
  put4byte(&pPage1->aData[32], iPage);
  TRACE(("FREE-PAGE: %d new trunk page replacing %d\n", pPage->pgno, iTrunk));
5536
5537
5538
5539
5540
5541
5542
5543

5544
5545
5546
5547
5548
5549
5550
5562
5563
5564
5565
5566
5567
5568

5569
5570
5571
5572
5573
5574
5575
5576







-
+







      put4byte(pCell, iChild);
    }
    j = pPage->nOverflow++;
    assert( j<(int)(sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0])) );
    pPage->aOvfl[j].pCell = pCell;
    pPage->aOvfl[j].idx = (u16)i;
  }else{
    int rc = sqlite3PagerWrite(pPage->pDbPage);
    int rc = btreeMakePageWriteable(pPage);
    if( rc!=SQLITE_OK ){
      *pRC = rc;
      return;
    }
    assert( sqlite3PagerIswriteable(pPage->pDbPage) );
    data = pPage->aData;
    cellOffset = pPage->cellOffset;
6205
6206
6207
6208
6209
6210
6211
6212

6213
6214
6215
6216
6217
6218
6219
6231
6232
6233
6234
6235
6236
6237

6238
6239
6240
6241
6242
6243
6244
6245







-
+







  }
  pageFlags = apOld[0]->aData[0];
  for(i=0; i<k; i++){
    MemPage *pNew;
    if( i<nOld ){
      pNew = apNew[i] = apOld[i];
      apOld[i] = 0;
      rc = sqlite3PagerWrite(pNew->pDbPage);
      rc = btreeMakePageWriteable(pNew);
      nNew++;
      if( rc ) goto balance_cleanup;
    }else{
      assert( i>0 );
      rc = allocateBtreePage(pBt, &pNew, &pgno, pgno, 0);
      if( rc ) goto balance_cleanup;
      apNew[i] = pNew;
6534
6535
6536
6537
6538
6539
6540
6541

6542
6543
6544
6545
6546
6547
6548
6560
6561
6562
6563
6564
6565
6566

6567
6568
6569
6570
6571
6572
6573
6574







-
+







  assert( pRoot->nOverflow>0 );
  assert( sqlite3_mutex_held(pBt->mutex) );

  /* Make pRoot, the root page of the b-tree, writable. Allocate a new 
  ** page that will become the new right-child of pPage. Copy the contents
  ** of the node stored on pRoot into the new child page.
  */
  rc = sqlite3PagerWrite(pRoot->pDbPage);
  rc = btreeMakePageWriteable(pRoot);
  if( rc==SQLITE_OK ){
    rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0);
    copyNodeContent(pRoot, pChild, &rc);
    if( ISAUTOVACUUM ){
      ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc);
    }
  }
6612
6613
6614
6615
6616
6617
6618
6619

6620
6621
6622
6623
6624
6625
6626
6638
6639
6640
6641
6642
6643
6644

6645
6646
6647
6648
6649
6650
6651
6652







-
+







      }
    }else if( pPage->nOverflow==0 && pPage->nFree<=nMin ){
      break;
    }else{
      MemPage * const pParent = pCur->apPage[iPage-1];
      int const iIdx = pCur->aiIdx[iPage-1];

      rc = sqlite3PagerWrite(pParent->pDbPage);
      rc = btreeMakePageWriteable(pParent);
      if( rc==SQLITE_OK ){
#ifndef SQLITE_OMIT_QUICKBALANCE
        if( pPage->hasData
         && pPage->nOverflow==1
         && pPage->aOvfl[0].idx==pPage->nCell
         && pParent->pgno!=1
         && pParent->nCell==iIdx
6789
6790
6791
6792
6793
6794
6795
6796

6797
6798
6799
6800
6801
6802
6803
6815
6816
6817
6818
6819
6820
6821

6822
6823
6824
6825
6826
6827
6828
6829







-
+







  if( rc ) goto end_insert;
  assert( szNew==cellSizePtr(pPage, newCell) );
  assert( szNew <= MX_CELL_SIZE(pBt) );
  idx = pCur->aiIdx[pCur->iPage];
  if( loc==0 ){
    u16 szOld;
    assert( idx<pPage->nCell );
    rc = sqlite3PagerWrite(pPage->pDbPage);
    rc = btreeMakePageWriteable(pPage);
    if( rc ){
      goto end_insert;
    }
    oldCell = findCell(pPage, idx);
    if( !pPage->leaf ){
      memcpy(newCell, oldCell, 4);
    }
6905
6906
6907
6908
6909
6910
6911
6912

6913
6914
6915
6916
6917
6918
6919
6931
6932
6933
6934
6935
6936
6937

6938
6939
6940
6941
6942
6943
6944
6945







-
+







  /* Save the positions of any other cursors open on this table before
  ** making any modifications. Make the page containing the entry to be 
  ** deleted writable. Then free any overflow pages associated with the 
  ** entry and finally remove the cell itself from within the page.  
  */
  rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
  if( rc ) return rc;
  rc = sqlite3PagerWrite(pPage->pDbPage);
  rc = btreeMakePageWriteable(pPage);
  if( rc ) return rc;
  rc = clearCell(pPage, pCell);
  dropCell(pPage, iCellIdx, cellSizePtr(pPage, pCell), &rc);
  if( rc ) return rc;

  /* If the cell deleted was not located on a leaf page, then the cursor
  ** is currently pointing to the largest entry in the sub-tree headed
6929
6930
6931
6932
6933
6934
6935
6936

6937
6938
6939
6940
6941
6942
6943
6955
6956
6957
6958
6959
6960
6961

6962
6963
6964
6965
6966
6967
6968
6969







-
+







    pCell = findCell(pLeaf, pLeaf->nCell-1);
    nCell = cellSizePtr(pLeaf, pCell);
    assert( MX_CELL_SIZE(pBt) >= nCell );

    allocateTempSpace(pBt);
    pTmp = pBt->pTmpSpace;

    rc = sqlite3PagerWrite(pLeaf->pDbPage);
    rc = btreeMakePageWriteable(pLeaf);
    insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc);
    dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc);
    if( rc ) return rc;
  }

  /* Balance the tree. If the entry deleted was located on a leaf page,
  ** then the cursor still points to that page. In this case the first
7066
7067
7068
7069
7070
7071
7072
7073

7074
7075
7076
7077
7078
7079
7080
7092
7093
7094
7095
7096
7097
7098

7099
7100
7101
7102
7103
7104
7105
7106







-
+







      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = btreeGetPage(pBt, pgnoRoot, &pRoot, 0);
      if( rc!=SQLITE_OK ){
        return rc;
      }
      rc = sqlite3PagerWrite(pRoot->pDbPage);
      rc = btreeMakePageWriteable(pRoot);
      if( rc!=SQLITE_OK ){
        releasePage(pRoot);
        return rc;
      }
    }else{
      pRoot = pPageMove;
    } 
7158
7159
7160
7161
7162
7163
7164
7165

7166
7167
7168
7169
7170
7171
7172
7184
7185
7186
7187
7188
7189
7190

7191
7192
7193
7194
7195
7196
7197
7198







-
+







    if( rc ) goto cleardatabasepage_out;
  }else if( pnChange ){
    assert( pPage->intKey );
    *pnChange += pPage->nCell;
  }
  if( freePageFlag ){
    freePage(pPage, &rc);
  }else if( (rc = sqlite3PagerWrite(pPage->pDbPage))==0 ){
  }else if( (rc = btreeMakePageWriteable(pPage))==0 ){
    zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
  }

cleardatabasepage_out:
  releasePage(pPage);
  return rc;
}
7353
7354
7355
7356
7357
7358
7359
7360

7361
7362
7363
7364
7365
7366
7367
7379
7380
7381
7382
7383
7384
7385

7386
7387
7388
7389
7390
7391
7392
7393







-
+








  sqlite3BtreeEnter(p);
  assert( p->inTrans>TRANS_NONE );
  assert( SQLITE_OK==querySharedCacheTableLock(p, MASTER_ROOT, READ_LOCK) );
  assert( pBt->pPage1 );
  assert( idx>=0 && idx<=15 );

  *pMeta = get4byte(&pBt->pPage1->aData[36 + idx*4]);
  *pMeta = get4byte(&btreeGetData(pBt->pPage1)[36 + idx*4]);

  /* If auto-vacuum is disabled in this build and this is an auto-vacuum
  ** database, mark the database as read-only.  */
#ifdef SQLITE_OMIT_AUTOVACUUM
  if( idx==BTREE_LARGEST_ROOT_PAGE && *pMeta>0 ) pBt->readOnly = 1;
#endif

7375
7376
7377
7378
7379
7380
7381
7382

7383
7384
7385
7386
7387
7388
7389
7401
7402
7403
7404
7405
7406
7407

7408
7409
7410
7411
7412
7413
7414
7415







-
+







int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
  BtShared *pBt = p->pBt;
  int rc;
  assert( idx>=1 && idx<=15 );
  sqlite3BtreeEnter(p);
  assert( p->inTrans==TRANS_WRITE );
  assert( pBt->pPage1!=0 );
  rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
  rc = btreeMakePageWriteable(pBt->pPage1);
  if( rc==SQLITE_OK ){
    put4byte(&pBt->pPage1->aData[36 + idx*4], iMeta);
#ifndef SQLITE_OMIT_AUTOVACUUM
    if( idx==BTREE_INCR_VACUUM ){
      assert( pBt->autoVacuum || iMeta==0 );
      assert( iMeta==0 || iMeta==1 );
      pBt->incrVacuum = (u8)iMeta;
7913
7914
7915
7916
7917
7918
7919

7920
7921
7922
7923
7924
7925
7926
7939
7940
7941
7942
7943
7944
7945
7946
7947
7948
7949
7950
7951
7952
7953







+







    sCheck.anRef[i] = 1;
  }
  sqlite3StrAccumInit(&sCheck.errMsg, zErr, sizeof(zErr), 20000);
  sCheck.errMsg.useMalloc = 2;

  /* Check the integrity of the freelist
  */
  btreeGetData(pBt->pPage1);
  checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
            get4byte(&pBt->pPage1->aData[36]), "Main freelist: ");

  /* Check all the tables.
  */
  for(i=0; (int)i<nRoot && sCheck.mxErr; i++){
    if( aRoot[i]==0 ) continue;
8200
8201
8202
8203
8204
8205
8206
8207

8208
8209
8210
8211

8212
8213

8214
8215
8216
8217
8218
8219
8220
8221
8222
8223
8227
8228
8229
8230
8231
8232
8233

8234
8235
8236
8237

8238
8239

8240
8241
8242
8243
8244
8245
8246
8247
8248
8249
8250







-
+



-
+

-
+










  /* If setting the version fields to 1, do not automatically open the
  ** WAL connection, even if the version fields are currently set to 2.
  */
  pBt->doNotUseWAL = (u8)(iVersion==1);

  rc = sqlite3BtreeBeginTrans(pBtree, 0);
  if( rc==SQLITE_OK ){
    u8 *aData = pBt->pPage1->aData;
    u8 *aData = btreeGetData(pBt->pPage1);
    if( aData[18]!=(u8)iVersion || aData[19]!=(u8)iVersion ){
      rc = sqlite3BtreeBeginTrans(pBtree, 2);
      if( rc==SQLITE_OK ){
        rc = sqlite3PagerWrite(pBt->pPage1->pDbPage);
        rc = btreeMakePageWriteable(pBt->pPage1);
        if( rc==SQLITE_OK ){
          aData = pBt->pPage1->aData;
          aData = btreeGetData(pBt->pPage1);
          aData[18] = (u8)iVersion;
          aData[19] = (u8)iVersion;
        }
      }
    }
  }

  pBt->doNotUseWAL = 0;
  return rc;
}

Changes to src/os.c.

194
195
196
197
198
199
200














































201
202
203
204
205
206
207
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  }else{
    double r;
    rc = pVfs->xCurrentTime(pVfs, &r);
    *pTimeOut = (sqlite3_int64)(r*86400000.0);
  }
  return rc;
}

/* Attempt to map all or part of a file into memory.  VFSes are not
** required to implement this.  The VFS might be an older version (less then
** 3) that does not have an xMap pointer.  Or the xMap pointer might be NULL.
*/
int sqlite3OsMap(
  sqlite3_file *pFile,    /* The file to be mapped into memory */
  sqlite3_int64 ofst,     /* Index of the first byte to map */
  sqlite3_int64 len,      /* Number of bytes to be mapped */
  int mmapFlags,          /* Map control flags */
  void **ppMemObj,        /* Write a mapping object here */
  void **ppMem            /* Write the start of the mapped file here */
){
  int rc;

  /* The current implementation only does read-only mmap.  This could change
  ** in the future. */
  assert( mmapFlags==SQLITE_OPEN_READONLY );

  /* The current implementation currently only maps the whole file.  This
  ** could change in the future. */
  assert( ofst==0 );

  if( pFile->pMethods==0 || pFile->pMethods->iVersion<3
     || pFile->pMethods->xMap==0 ){
    *ppMemObj = 0;
    *ppMem = 0;
    rc = SQLITE_CANTOPEN;
  }else{
    rc = pFile->pMethods->xMap(pFile, ofst, len, mmapFlags, ppMemObj, ppMem);
  }
  return rc;
}

/* Undo a mapping.
**
** The pMemObj parameter will have been obtained by a prior call to
** sqlite3OsMap().  So if pMemObj is not NULL, we know that the current
** VFS does support xMap and xUnmap.
*/
int sqlite3OsUnmap(sqlite3_file *pFile, void *pMemObj){
  int rc = SQLITE_OK;
  if( pMemObj ) rc = pFile->pMethods->xUnmap(pFile, pMemObj);
  return rc;
}


int sqlite3OsOpenMalloc(
  sqlite3_vfs *pVfs, 
  const char *zFile, 
  sqlite3_file **ppFile, 
  int flags,
  int *pOutFlags

Changes to src/os.h.

247
248
249
250
251
252
253


254
255
256
257
258
259
260
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262







+
+







#define SQLITE_FCNTL_DB_UNCHANGED 0xca093fa0
int sqlite3OsSectorSize(sqlite3_file *id);
int sqlite3OsDeviceCharacteristics(sqlite3_file *id);
int sqlite3OsShmMap(sqlite3_file *,int,int,int,void volatile **);
int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmUnmap(sqlite3_file *id, int);
int sqlite3OsMap(sqlite3_file*,sqlite3_int64,sqlite3_int64,int,void**,void**);
int sqlite3OsUnmap(sqlite3_file*,void*);

/* 
** Functions for accessing sqlite3_vfs methods 
*/
int sqlite3OsOpen(sqlite3_vfs *, const char *, sqlite3_file*, int, int *);
int sqlite3OsDelete(sqlite3_vfs *, const char *, int);
int sqlite3OsAccess(sqlite3_vfs *, const char *, int, int *pResOut);

Changes to src/os_unix.c.

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
115
116
117
118
119
120
121

122

123
124
125
126
127
128
129







-

-







#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <sys/time.h>
#include <errno.h>
#ifndef SQLITE_OMIT_WAL
#include <sys/mman.h>
#endif

#if SQLITE_ENABLE_LOCKING_STYLE
# include <sys/ioctl.h>
# if OS_VXWORKS
#  include <semaphore.h>
#  include <limits.h>
# else
408
409
410
411
412
413
414








415
416
417
418
419
420
421
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427







+
+
+
+
+
+
+
+







#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)

  { "mkdir",        (sqlite3_syscall_ptr)mkdir,           0 },
#define osMkdir     ((int(*)(const char*,mode_t))aSyscall[18].pCurrent)

  { "rmdir",        (sqlite3_syscall_ptr)rmdir,           0 },
#define osRmdir     ((int(*)(const char*))aSyscall[19].pCurrent)

  { "mmap",         (sqlite3_syscall_ptr)mmap,            0 },
#define osMmap  ((int(*)(void*,size_t,int,int,int,off_t))aSyscall[20].pCurrent)

  { "munmap",       (sqlite3_syscall_ptr)munmap,          0 },
#define osMunmap    ((int(*)(void*,size_t))aSyscall[21].pCurrent)



}; /* End of the overrideable system calls */

/*
** This is the xSetSystemCall() method of sqlite3_vfs for all of the
** "unix" VFSes.  Return SQLITE_OK opon successfully updating the
** system call pointer, or SQLITE_NOTFOUND if there is no configurable
4224
4225
4226
4227
4228
4229
4230

























































4231
4232
4233
4234
4235
4236
4237
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








#else
# define unixShmMap     0
# define unixShmLock    0
# define unixShmBarrier 0
# define unixShmUnmap   0
#endif /* #ifndef SQLITE_OMIT_WAL */

/*
** An object used to record enough information about a file mapping to
** undo that mapping.
*/
struct unixMapping {
  sqlite3_int64 len;
  void *p;
};

/*
** Try to map some or all of a file into memory
*/
static int unixMap(
  sqlite3_file *pFile,   /* File to be mapped */
  sqlite3_int64 ofst,    /* Offset of start of section to be mapped */
  sqlite3_int64 len,     /* Length of the section to be mapped */
  int mmapFlags,         /* Flags controlling the mapping */
  void **ppMapObj,       /* Write here an object to undo the mapping */
  void **ppMem           /* Write here a pointer to the mapped file */
){
  struct unixMapping *pNew;
  unixFile *pUFile = (unixFile*)pFile;

  assert( mmapFlags==SQLITE_OPEN_READONLY );
  sqlite3BeginBenignMalloc();
  pNew = sqlite3_malloc( sizeof(*pNew) );
  sqlite3EndBenignMalloc();
  if( pNew==0 ){
    *ppMapObj = 0;
    *ppMem = 0;
    return SQLITE_CANTOPEN;
  }
  pNew->len = len;
  pNew->p = *ppMem = mmap(0, len, PROT_READ, MAP_SHARED, pUFile->h, 0);
  if( pNew->p==0 ){
    sqlite3_free(pNew);
    return SQLITE_CANTOPEN;
  }else{
    *ppMapObj = pNew;
    return SQLITE_OK;
  }
}

/*
** Undo a prior memory mapping.
*/
static int unixUnmap(
  sqlite3_file *pFile,
  void *pMapObj
){
  struct unixMapping *pMap = (struct unixMapping*)pMapObj;
  assert( pMap!=0 );
  munmap(pMap->p, pMap->len);
  sqlite3_free(pMap);
  return SQLITE_OK;
}

/*
** Here ends the implementation of all sqlite3_file methods.
**
********************** End sqlite3_file Methods *******************************
******************************************************************************/

4265
4266
4267
4268
4269
4270
4271
4272

4273
4274

4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287

4288
4289
4290



4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311


4312
4313
4314
4315
4316
4317
4318
4319
4320


4321
4322
4323
4324
4325
4326
4327
4328
4329


4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340


4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352


4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364


4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389


4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402


4403
4404
4405
4406
4407
4408
4409
4328
4329
4330
4331
4332
4333
4334

4335
4336

4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349

4350
4351
4352

4353
4354
4355
4356
4357
4358
4359
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371

4372
4373
4374

4375
4376
4377
4378
4379
4380

4381
4382
4383

4384
4385
4386
4387
4388
4389

4390
4391
4392

4393
4394
4395
4396
4397
4398
4399
4400

4401
4402
4403

4404
4405
4406
4407
4408
4409
4410
4411
4412

4413
4414
4415

4416
4417
4418
4419
4420
4421
4422
4423
4424

4425
4426
4427

4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449

4450
4451
4452

4453
4454
4455
4456
4457
4458
4459
4460
4461
4462

4463
4464
4465

4466
4467
4468
4469
4470
4471
4472
4473
4474







-
+

-
+












-
+


-
+
+
+
















-



-
+
+




-



-
+
+




-



-
+
+






-



-
+
+







-



-
+
+







-



-
+
+




















-



-
+
+








-



-
+
+







**
**   *  A constant sqlite3_io_methods object call METHOD that has locking
**      methods CLOSE, LOCK, UNLOCK, CKRESLOCK.
**
**   *  An I/O method finder function called FINDER that returns a pointer
**      to the METHOD object in the previous bullet.
*/
#define IOMETHODS(FINDER, METHOD, VERSION, CLOSE, LOCK, UNLOCK, CKLOCK)      \
#define IOMETHODS(FINDER, METHOD, CLOSE, LOCK, UNLOCK, CKLOCK, SHMMAP)       \
static const sqlite3_io_methods METHOD = {                                   \
   VERSION,                    /* iVersion */                                \
   3,                          /* iVersion */                                \
   CLOSE,                      /* xClose */                                  \
   unixRead,                   /* xRead */                                   \
   unixWrite,                  /* xWrite */                                  \
   unixTruncate,               /* xTruncate */                               \
   unixSync,                   /* xSync */                                   \
   unixFileSize,               /* xFileSize */                               \
   LOCK,                       /* xLock */                                   \
   UNLOCK,                     /* xUnlock */                                 \
   CKLOCK,                     /* xCheckReservedLock */                      \
   unixFileControl,            /* xFileControl */                            \
   unixSectorSize,             /* xSectorSize */                             \
   unixDeviceCharacteristics,  /* xDeviceCapabilities */                     \
   unixShmMap,                 /* xShmMap */                                 \
   SHMMAP,                     /* xShmMap */                                 \
   unixShmLock,                /* xShmLock */                                \
   unixShmBarrier,             /* xShmBarrier */                             \
   unixShmUnmap                /* xShmUnmap */                               \
   unixShmUnmap,               /* xShmUnmap */                               \
   unixMap,                    /* xMap */                                    \
   unixUnmap                   /* xUnmap */                                  \
};                                                                           \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){   \
  UNUSED_PARAMETER(z); UNUSED_PARAMETER(p);                                  \
  return &METHOD;                                                            \
}                                                                            \
static const sqlite3_io_methods *(*const FINDER)(const char*,unixFile *p)    \
    = FINDER##Impl;

/*
** Here are all of the sqlite3_io_methods objects for each of the
** locking strategies.  Functions that return pointers to these methods
** are also created.
*/
IOMETHODS(
  posixIoFinder,            /* Finder function name */
  posixIoMethods,           /* sqlite3_io_methods object name */
  2,                        /* shared memory is enabled */
  unixClose,                /* xClose method */
  unixLock,                 /* xLock method */
  unixUnlock,               /* xUnlock method */
  unixCheckReservedLock     /* xCheckReservedLock method */
  unixCheckReservedLock,    /* xCheckReservedLock method */
  unixShmMap                /* Shared memory enabled */
)
IOMETHODS(
  nolockIoFinder,           /* Finder function name */
  nolockIoMethods,          /* sqlite3_io_methods object name */
  1,                        /* shared memory is disabled */
  nolockClose,              /* xClose method */
  nolockLock,               /* xLock method */
  nolockUnlock,             /* xUnlock method */
  nolockCheckReservedLock   /* xCheckReservedLock method */
  nolockCheckReservedLock,  /* xCheckReservedLock method */
  0                         /* Shared memory disabled */
)
IOMETHODS(
  dotlockIoFinder,          /* Finder function name */
  dotlockIoMethods,         /* sqlite3_io_methods object name */
  1,                        /* shared memory is disabled */
  dotlockClose,             /* xClose method */
  dotlockLock,              /* xLock method */
  dotlockUnlock,            /* xUnlock method */
  dotlockCheckReservedLock  /* xCheckReservedLock method */
  dotlockCheckReservedLock, /* xCheckReservedLock method */
  0                         /* Shared memory disabled */
)

#if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
IOMETHODS(
  flockIoFinder,            /* Finder function name */
  flockIoMethods,           /* sqlite3_io_methods object name */
  1,                        /* shared memory is disabled */
  flockClose,               /* xClose method */
  flockLock,                /* xLock method */
  flockUnlock,              /* xUnlock method */
  flockCheckReservedLock    /* xCheckReservedLock method */
  flockCheckReservedLock,   /* xCheckReservedLock method */
  0                         /* Shared memory disabled */
)
#endif

#if OS_VXWORKS
IOMETHODS(
  semIoFinder,              /* Finder function name */
  semIoMethods,             /* sqlite3_io_methods object name */
  1,                        /* shared memory is disabled */
  semClose,                 /* xClose method */
  semLock,                  /* xLock method */
  semUnlock,                /* xUnlock method */
  semCheckReservedLock      /* xCheckReservedLock method */
  semCheckReservedLock,     /* xCheckReservedLock method */
  0                         /* Shared memory disabled */
)
#endif

#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
IOMETHODS(
  afpIoFinder,              /* Finder function name */
  afpIoMethods,             /* sqlite3_io_methods object name */
  1,                        /* shared memory is disabled */
  afpClose,                 /* xClose method */
  afpLock,                  /* xLock method */
  afpUnlock,                /* xUnlock method */
  afpCheckReservedLock      /* xCheckReservedLock method */
  afpCheckReservedLock,     /* xCheckReservedLock method */
  0                         /* Shared memory disabled */
)
#endif

/*
** The proxy locking method is a "super-method" in the sense that it
** opens secondary file descriptors for the conch and lock files and
** it uses proxy, dot-file, AFP, and flock() locking methods on those
** secondary files.  For this reason, the division that implements
** proxy locking is located much further down in the file.  But we need
** to go ahead and define the sqlite3_io_methods and finder function
** for proxy locking here.  So we forward declare the I/O methods.
*/
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
static int proxyClose(sqlite3_file*);
static int proxyLock(sqlite3_file*, int);
static int proxyUnlock(sqlite3_file*, int);
static int proxyCheckReservedLock(sqlite3_file*, int*);
IOMETHODS(
  proxyIoFinder,            /* Finder function name */
  proxyIoMethods,           /* sqlite3_io_methods object name */
  1,                        /* shared memory is disabled */
  proxyClose,               /* xClose method */
  proxyLock,                /* xLock method */
  proxyUnlock,              /* xUnlock method */
  proxyCheckReservedLock    /* xCheckReservedLock method */
  proxyCheckReservedLock,   /* xCheckReservedLock method */
  0                         /* Shared memory disabled */
)
#endif

/* nfs lockd on OSX 10.3+ doesn't clear write locks when a read lock is set */
#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
IOMETHODS(
  nfsIoFinder,               /* Finder function name */
  nfsIoMethods,              /* sqlite3_io_methods object name */
  1,                         /* shared memory is disabled */
  unixClose,                 /* xClose method */
  unixLock,                  /* xLock method */
  nfsUnlock,                 /* xUnlock method */
  unixCheckReservedLock      /* xCheckReservedLock method */
  unixCheckReservedLock,     /* xCheckReservedLock method */
  0                          /* Shared memory disabled */
)
#endif

#if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE
/* 
** This "finder" function attempts to determine the best locking strategy 
** for the database file "filePath".  It then returns the sqlite3_io_methods
6767
6768
6769
6770
6771
6772
6773
6774

6775
6776
6777
6778
6779
6780
6781
6832
6833
6834
6835
6836
6837
6838

6839
6840
6841
6842
6843
6844
6845
6846







-
+







    UNIXVFS("unix-proxy",    proxyIoFinder ),
#endif
  };
  unsigned int i;          /* Loop counter */

  /* Double-check that the aSyscall[] array has been constructed
  ** correctly.  See ticket [bb3a86e890c8e96ab] */
  assert( ArraySize(aSyscall)==20 );
  assert( ArraySize(aSyscall)==22 );

  /* Register all VFSes defined in the aVfs[] array */
  for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
    sqlite3_vfs_register(&aVfs[i], i==0);
  }
  return SQLITE_OK; 
}

Changes to src/pager.c.

651
652
653
654
655
656
657



658
659
660
661
662
663
664
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667







+
+
+







  sqlite3_file *sjfd;         /* File descriptor for sub-journal */
  i64 journalOff;             /* Current write offset in the journal file */
  i64 journalHdr;             /* Byte offset to previous journal header */
  sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
  PagerSavepoint *aSavepoint; /* Array of active savepoints */
  int nSavepoint;             /* Number of elements in aSavepoint[] */
  char dbFileVers[16];        /* Changes whenever database file changes */
  u8 *aFileContent;           /* File mapped into memory */
  sqlite3_int64 nFileContent; /* Bytes of memory mapped into aFileContent */
  void *pMapObject;           /* Used to unmap the file */
  /*
  ** End of the routinely-changing class members
  ***************************************************************************/

  u16 nExtra;                 /* Add this many bytes to each in-memory page */
  i16 nReserve;               /* Number of unused bytes at end of each page */
  u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
1719
1720
1721
1722
1723
1724
1725

1726
1727
1728
1729
1730
1731
1732
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736







+







** contents of the pager cache are discarded before switching back to 
** the OPEN state. Regardless of whether the pager is in exclusive-mode
** or not, any journal file left in the file-system will be treated
** as a hot-journal and rolled back the next time a read-transaction
** is opened (by this or by any other connection).
*/
static void pager_unlock(Pager *pPager){
  PgHdr *pPg;

  assert( pPager->eState==PAGER_READER 
       || pPager->eState==PAGER_OPEN 
       || pPager->eState==PAGER_ERROR 
  );

  sqlite3BitvecDestroy(pPager->pInJournal);
1789
1790
1791
1792
1793
1794
1795














1796
1797
1798
1799
1800
1801
1802
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820







+
+
+
+
+
+
+
+
+
+
+
+
+
+







    pPager->eState = PAGER_OPEN;
    pPager->errCode = SQLITE_OK;
  }

  pPager->journalOff = 0;
  pPager->journalHdr = 0;
  pPager->setMaster = 0;

  pPg = 0;
  sqlite3PcacheFetch(pPager->pPCache, 1, 0, &pPg);
  if( pPg ){
    /* assert( sqlite3PcachePagecount(pPager->pPCache)==1 ); */
    pPg->pData = pPg->pBuf;
    sqlite3PcacheRelease(pPg);
  }else{
    /*assert( sqlite3PcachePagecount(pPager->pPCache)==0 );*/
  }
  sqlite3OsUnmap(pPager->fd, pPager->pMapObject);
  pPager->pMapObject = 0;
  pPager->aFileContent = 0;
  pPager->nFileContent = 0;
}

/*
** This function is called whenever an IOERR or FULL error that requires
** the pager to transition into the ERROR state may ahve occurred.
** The first argument is a pointer to the pager structure, the second 
** the error-code about to be returned by a pager API function. The 
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805

2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818








2819
2820
2821
2822
2823
2824
2825
2814
2815
2816
2817
2818
2819
2820



2821



2822
2823
2824
2825
2826
2827




2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842







-
-
-
+
-
-
-






-
-
-
-
+
+
+
+
+
+
+
+







  Pgno pgno = pPg->pgno;       /* Page number to read */
  int rc = SQLITE_OK;          /* Return code */
  int isInWal = 0;             /* True if page is in log file */
  int pgsz = pPager->pageSize; /* Number of bytes to read */

  assert( pPager->eState>=PAGER_READER && !MEMDB );
  assert( isOpen(pPager->fd) );

  if( NEVER(!isOpen(pPager->fd)) ){
    assert( pPager->tempFile );
  assert( pPg->pBuf==pPg->pData );
    memset(pPg->pData, 0, pPager->pageSize);
    return SQLITE_OK;
  }

  if( pagerUseWal(pPager) ){
    /* Try to pull the page from the write-ahead log. */
    rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pgsz, pPg->pData);
  }
  if( rc==SQLITE_OK && !isInWal ){
    i64 iOffset = (pgno-1)*(i64)pPager->pageSize;
    rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
    if( rc==SQLITE_IOERR_SHORT_READ ){
      rc = SQLITE_OK;
    i64 iOffset = (pgno-1)*(i64)pgsz;
    if( iOffset+pgsz <= pPager->nFileContent ){
      pPg->pData = &pPager->aFileContent[iOffset];
    }else{
      rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
      if( rc==SQLITE_IOERR_SHORT_READ ){
        rc = SQLITE_OK;
      }
    }
  }

  if( pgno==1 ){
    if( rc ){
      /* If the read is unsuccessful, set the dbFileVers[] to something
      ** that will never be a valid file version.  dbFileVers[] is a copy
4888
4889
4890
4891
4892
4893
4894







4895
4896
4897
4898
4899
4900
4901
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925







+
+
+
+
+
+
+







    rc = pagerBeginReadTransaction(pPager);
  }

  if( pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){
    rc = pagerPagecount(pPager, &pPager->dbSize);
  }

  assert( pPager->aFileContent==0 );
  pPager->nFileContent = pPager->dbSize*(sqlite3_int64)pPager->pageSize;
  sqlite3OsMap(pPager->fd, 0, pPager->nFileContent,
               SQLITE_OPEN_READONLY, (void**)&pPager->pMapObject,
               (void**)&pPager->aFileContent);
  if( pPager->aFileContent==0 ) pPager->nFileContent = 0;

 failed:
  if( rc!=SQLITE_OK ){
    assert( !MEMDB );
    pager_unlock(pPager);
    assert( pPager->eState==PAGER_OPEN );
  }else{
    pPager->eState = PAGER_READER;
5272
5273
5274
5275
5276
5277
5278










5279
5280
5281
5282
5283
5284
5285
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319







+
+
+
+
+
+
+
+
+
+







    assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED );
    assert( assert_pager_state(pPager) );
  }

  PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager)));
  return rc;
}

/*
** Make a copy of page content into malloced space.
*/
void makePageWriteable(Pager *pPager, PgHdr *pPg){
  if( pPg->pData!=pPg->pBuf ){
    memcpy(pPg->pBuf, pPg->pData, pPager->pageSize);
    pPg->pData = pPg->pBuf;
  }
}

/*
** Mark a single data page as writeable. The page is written into the 
** main journal or sub-journal as required. If the page is written into
** one of the journals, the corresponding bit is set in the 
** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs
** of any open savepoints as appropriate.
5320
5321
5322
5323
5324
5325
5326




5327
5328
5329
5330
5331
5332
5333
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371







+
+
+
+







  */
  if( pPager->eState==PAGER_WRITER_LOCKED ){
    rc = pager_open_journal(pPager);
    if( rc!=SQLITE_OK ) return rc;
  }
  assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
  assert( assert_pager_state(pPager) );

  /* Make sure page content is held in malloced memory */
  makePageWriteable(pPager, pPg);
  pData = pPg->pData;

  /* Mark the page as dirty.  If the page has already been written
  ** to the journal then we can return right away.
  */
  sqlite3PcacheMakeDirty(pPg);
  if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){
    assert( !pagerUseWal(pPager) );
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537
5538
5539
5559
5560
5561
5562
5563
5564
5565

5566
5567
5568

5569
5570
5571
5572
5573
5574
5575







-



-







}

/*
** Return TRUE if the page given in the argument was previously passed
** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
** to change the content of the page.
*/
#ifndef NDEBUG
int sqlite3PagerIswriteable(DbPage *pPg){
  return pPg->flags&PGHDR_DIRTY;
}
#endif

/*
** A call to this routine tells the pager that it is not necessary to
** write the information on page pPg back to the disk, even though
** that page might be marked as dirty.  This happens, for example, when
** the page has been added as a leaf of the freelist and so its
** content no longer matters.
6415
6416
6417
6418
6419
6420
6421

6422
6423
6424
6425
6426
6427
6428
6451
6452
6453
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463
6464
6465







+







      sqlite3PcacheMove(pPgOld, pPager->dbSize+1);
    }else{
      sqlite3PcacheDrop(pPgOld);
    }
  }

  origPgno = pPg->pgno;
  makePageWriteable(pPager, pPg);
  sqlite3PcacheMove(pPg, pgno);
  sqlite3PcacheMakeDirty(pPg);

  /* For an in-memory database, make sure the original page continues
  ** to exist, in case the transaction needs to roll back.  Use pPgOld
  ** as the original page since it has already been allocated.
  */
6464
6465
6466
6467
6468
6469
6470
6471

6472
6473

6474
6475
6476
6477
6478
6479
6480
6501
6502
6503
6504
6505
6506
6507

6508
6509

6510
6511
6512
6513
6514
6515
6516
6517







-
+

-
+







  return SQLITE_OK;
}
#endif

/*
** Return a pointer to the data for the specified page.
*/
void *sqlite3PagerGetData(DbPage *pPg){
u8 *sqlite3PagerGetData(DbPage *pPg){
  assert( pPg->nRef>0 || pPg->pPager->memDb );
  return pPg->pData;
  return (u8*)pPg->pData;
}

/*
** Return a pointer to the Pager.nExtra bytes of "extra" space 
** allocated along with the specified page.
*/
void *sqlite3PagerGetExtra(DbPage *pPg){

Changes to src/pager.h.

119
120
121
122
123
124
125
126

127
128
129
130
131
132
133
119
120
121
122
123
124
125

126
127
128
129
130
131
132
133







-
+







void sqlite3PagerUnref(DbPage*);

/* Operations on page references. */
int sqlite3PagerWrite(DbPage*);
void sqlite3PagerDontWrite(DbPage*);
int sqlite3PagerMovepage(Pager*,DbPage*,Pgno,int);
int sqlite3PagerPageRefcount(DbPage*);
void *sqlite3PagerGetData(DbPage *); 
u8 *sqlite3PagerGetData(DbPage *); 
void *sqlite3PagerGetExtra(DbPage *); 

/* Functions used to manage pager transactions and savepoints. */
void sqlite3PagerPagecount(Pager*, int*);
int sqlite3PagerBegin(Pager*, int exFlag, int);
int sqlite3PagerCommitPhaseOne(Pager*,const char *zMaster, int);
int sqlite3PagerExclusiveLock(Pager*);
164
165
166
167
168
169
170
171
172

173
174
175
176
177
178
179
180
181
182
183
164
165
166
167
168
169
170

171
172
173
174
175
176
177
178
179
180
181
182
183







-

+











#if defined(SQLITE_HAS_CODEC) && !defined(SQLITE_OMIT_WAL)
void *sqlite3PagerCodec(DbPage *);
#endif

/* Functions to support testing and debugging. */
#if !defined(NDEBUG) || defined(SQLITE_TEST)
  Pgno sqlite3PagerPagenumber(DbPage*);
  int sqlite3PagerIswriteable(DbPage*);
#endif
int sqlite3PagerIswriteable(DbPage*);
#ifdef SQLITE_TEST
  int *sqlite3PagerStats(Pager*);
  void sqlite3PagerRefdump(Pager*);
  void disable_simulated_io_errors(void);
  void enable_simulated_io_errors(void);
#else
# define disable_simulated_io_errors()
# define enable_simulated_io_errors()
#endif

#endif /* _PAGER_H_ */

Changes to src/pcache.c.

127
128
129
130
131
132
133
134

135
136
137
138
139
140
141
127
128
129
130
131
132
133

134
135
136
137
138
139
140
141







-
+







*/
static void pcacheUnpin(PgHdr *p){
  PCache *pCache = p->pCache;
  if( pCache->bPurgeable ){
    if( p->pgno==1 ){
      pCache->pPage1 = 0;
    }
    sqlite3GlobalConfig.pcache2.xUnpin(pCache->pCache, p->pPage, 0);
    sqlite3GlobalConfig.pcache2.xUnpin(pCache->pCache, p->pPage, 1);
  }
}

/*************************************************** General Interfaces ******
**
** Initialize and shutdown the page cache subsystem. Neither of these 
** functions are threadsafe.
283
284
285
286
287
288
289
290

291
292
293
294
295
296
297
298

299
300
301
302
303
304
305
283
284
285
286
287
288
289

290
291
292
293
294
295
296
297

298
299
300
301
302
303
304
305







-
+







-
+








  if( pPage ){
    pPgHdr = (PgHdr *)pPage->pExtra;

    if( !pPgHdr->pPage ){
      memset(pPgHdr, 0, sizeof(PgHdr));
      pPgHdr->pPage = pPage;
      pPgHdr->pData = pPage->pBuf;
      pPgHdr->pData = pPgHdr->pBuf = pPage->pBuf;
      pPgHdr->pExtra = (void *)&pPgHdr[1];
      memset(pPgHdr->pExtra, 0, pCache->szExtra);
      pPgHdr->pCache = pCache;
      pPgHdr->pgno = pgno;
    }
    assert( pPgHdr->pCache==pCache );
    assert( pPgHdr->pgno==pgno );
    assert( pPgHdr->pData==pPage->pBuf );
    assert( pPgHdr->pBuf==pPage->pBuf );
    assert( pPgHdr->pExtra==(void *)&pPgHdr[1] );

    if( 0==pPgHdr->nRef ){
      pCache->nRef++;
    }
    pPgHdr->nRef++;
    if( pgno==1 ){

Changes to src/pcache.h.

20
21
22
23
24
25
26
27


28
29
30
31
32
33
34
20
21
22
23
24
25
26

27
28
29
30
31
32
33
34
35







-
+
+








/*
** Every page in the cache is controlled by an instance of the following
** structure.
*/
struct PgHdr {
  sqlite3_pcache_page *pPage;    /* Pcache object page handle */
  void *pData;                   /* Page data */
  void *pData;                   /* Page data to actually use */
  void *pBuf;                    /* Malloced buffer to hold pData */
  void *pExtra;                  /* Extra content */
  PgHdr *pDirty;                 /* Transient list of dirty pages */
  Pgno pgno;                     /* Page number for this page */
  Pager *pPager;                 /* The pager this page is part of */
#ifdef SQLITE_CHECK_PAGES
  u32 pageHash;                  /* Hash of page content */
#endif

Changes to src/sqlite.h.in.

685
686
687
688
689
690
691




692
693
694
695
696
697
698
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702







+
+
+
+







  int (*xDeviceCharacteristics)(sqlite3_file*);
  /* Methods above are valid for version 1 */
  int (*xShmMap)(sqlite3_file*, int iPg, int pgsz, int, void volatile**);
  int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
  void (*xShmBarrier)(sqlite3_file*);
  int (*xShmUnmap)(sqlite3_file*, int deleteFlag);
  /* Methods above are valid for version 2 */
  int (*xMap)(sqlite3_file*, sqlite3_int64 ofst, sqlite3_int64 len,
              int mmapFlags, void **ppMemObj, void **ppMem);
  int (*xUnmap)(sqlite3_file*, void *pMemObj);
  /* Methods above are valid for version 3 */
  /* Additional methods may be added in future releases */
};

/*
** CAPI3REF: Standard File Control Opcodes
**
** These integer constants are opcodes for the xFileControl method

Changes to test/syscall.test.

56
57
58
59
60
61
62

63
64
65
66
67
68
69
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70







+







#-------------------------------------------------------------------------
# Tests for the xNextSystemCall method.
#
foreach s {
    open close access getcwd stat fstat ftruncate
    fcntl read pread write pwrite fchmod fallocate
    pread64 pwrite64 unlink openDirectory mkdir rmdir
    mmap munmap
} {
  if {[test_syscall exists $s]} {lappend syscall_list $s}
}
do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list]

#-------------------------------------------------------------------------
# This test verifies that if a call to open() fails and errno is set to