/ Check-in [72de0073]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Change the semantics of xShmGet() such that it will never increase the size of shared memory. xShmSize() must be used to grow the size of shared memory. A shared memory segment size cannot be shrunk (except by dropping it).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 72de00731245277b2209103ec0a76e3d4f56530e
User & Date: drh 2010-05-26 15:06:38
Context
2010-05-26
17:31
Make sure the wal-index mapping is always large enough to cover the entire active area of the wal-index. check-in: 42705bab user: drh tags: trunk
15:06
Change the semantics of xShmGet() such that it will never increase the size of shared memory. xShmSize() must be used to grow the size of shared memory. A shared memory segment size cannot be shrunk (except by dropping it). check-in: 72de0073 user: drh tags: trunk
2010-05-25
15:53
Updated header comments in wal.c. No functional code changes. check-in: 687632a6 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

  3598   3598     }
  3599   3599     unixLeaveMutex();
  3600   3600   
  3601   3601     return SQLITE_OK;
  3602   3602   }
  3603   3603   
  3604   3604   /*
  3605         -** Query and/or changes the size of the underlying storage for
  3606         -** a shared-memory segment.  The reqSize parameter is the new size
  3607         -** of the underlying storage, or -1 to do just a query.  The size
  3608         -** of the underlying storage (after resizing if resizing occurs) is
  3609         -** written into pNewSize.
         3605  +** Changes the size of the underlying storage for  a shared-memory segment.
  3610   3606   **
  3611         -** This routine does not (necessarily) change the size of the mapping 
  3612         -** of the underlying storage into memory.  Use xShmGet() to change
  3613         -** the mapping size.
         3607  +** The reqSize parameter is the new requested size of the shared memory.
         3608  +** This implementation is free to increase the shared memory size to
         3609  +** any amount greater than or equal to reqSize.  If the shared memory is
         3610  +** already as big or bigger as reqSize, this routine is a no-op.
  3614   3611   **
  3615   3612   ** The reqSize parameter is the minimum size requested.  The implementation
  3616   3613   ** is free to expand the storage to some larger amount if it chooses.
  3617   3614   */
  3618   3615   static int unixShmSize(
  3619   3616     sqlite3_file *fd,         /* The open database file holding SHM */
  3620   3617     int reqSize,              /* Requested size.  -1 for query only */
................................................................................
  3625   3622     unixShmNode *pShmNode = p->pShmNode;
  3626   3623     int rc = SQLITE_OK;
  3627   3624     struct stat sStat;
  3628   3625   
  3629   3626     assert( pShmNode==pDbFd->pInode->pShmNode );
  3630   3627     assert( pShmNode->pInode==pDbFd->pInode );
  3631   3628   
  3632         -  /* On a query, this loop runs once.  When reqSize>=0, the loop potentially
  3633         -  ** runs twice, except if the actual size is already greater than or equal
  3634         -  ** to the requested size, reqSize is set to -1 on the first iteration and
  3635         -  ** the loop only runs once.
  3636         -  */
  3637   3629     while( 1 ){
  3638   3630       if( fstat(pShmNode->h, &sStat)==0 ){
  3639   3631         *pNewSize = (int)sStat.st_size;
  3640         -      if( reqSize>=0 && reqSize<=(int)sStat.st_size ) break;
         3632  +      if( reqSize<=(int)sStat.st_size ) break;
  3641   3633       }else{
  3642   3634         *pNewSize = 0;
  3643   3635         rc = SQLITE_IOERR;
  3644   3636         break;
  3645   3637       }
  3646         -    if( reqSize<0 ) break;
  3647         -    reqSize = (reqSize + SQLITE_UNIX_SHM_INCR - 1)/SQLITE_UNIX_SHM_INCR;
  3648         -    reqSize *= SQLITE_UNIX_SHM_INCR;
  3649   3638       rc = ftruncate(pShmNode->h, reqSize);
  3650   3639       reqSize = -1;
  3651   3640     }
  3652   3641     return rc;
  3653   3642   }
  3654   3643   
  3655   3644   
  3656   3645   /*
  3657         -** Map the shared storage into memory.  The minimum size of the
  3658         -** mapping should be reqMapSize if reqMapSize is positive.  If
  3659         -** reqMapSize is zero or negative, the implementation can choose
  3660         -** whatever mapping size is convenient.
         3646  +** Map the shared storage into memory. 
         3647  +**
         3648  +** If reqMapSize is positive, then an attempt is made to make the
         3649  +** mapping at least reqMapSize bytes in size.  However, the mapping
         3650  +** will never be larger than the size of the underlying shared memory
         3651  +** as set by prior calls to xShmSize().  
  3661   3652   **
  3662   3653   ** *ppBuf is made to point to the memory which is a mapping of the
  3663   3654   ** underlying storage.  A mutex is acquired to prevent other threads
  3664   3655   ** from running while *ppBuf is in use in order to prevent other threads
  3665   3656   ** remapping *ppBuf out from under this thread.  The unixShmRelease()
  3666   3657   ** call will release the mutex.  However, if the lock state is CHECKPOINT,
  3667   3658   ** the mutex is not acquired because CHECKPOINT will never remap the
................................................................................
  3670   3661   **
  3671   3662   ** RECOVER needs to be atomic.  The same mutex that prevents *ppBuf from
  3672   3663   ** being remapped also prevents more than one thread from being in
  3673   3664   ** RECOVER at a time.  But, RECOVER sometimes wants to remap itself.
  3674   3665   ** To prevent RECOVER from losing its lock while remapping, the
  3675   3666   ** mutex is not released by unixShmRelease() when in RECOVER.
  3676   3667   **
  3677         -** *pNewMapSize is set to the size of the mapping.
         3668  +** *pNewMapSize is set to the size of the mapping.  Usually *pNewMapSize
         3669  +** will be reqMapSize or larger, though it could be smaller if the
         3670  +** underlying shared memory has never been enlarged to reqMapSize bytes
         3671  +** by prior calls to xShmSize().
  3678   3672   **
  3679         -** *ppBuf and *pNewMapSize might be NULL and zero if no space has
         3673  +** *ppBuf might be NULL and zero if no space has
  3680   3674   ** yet been allocated to the underlying storage.
  3681   3675   */
  3682   3676   static int unixShmGet(
  3683   3677     sqlite3_file *fd,        /* Database file holding shared memory */
  3684   3678     int reqMapSize,          /* Requested size of mapping. -1 means don't care */
  3685   3679     int *pNewMapSize,        /* Write new size of mapping here */
  3686   3680     void volatile **ppBuf    /* Write mapping buffer origin here */
................................................................................
  3697   3691       assert( sqlite3_mutex_notheld(pShmNode->mutex) );
  3698   3692       sqlite3_mutex_enter(pShmNode->mutexBuf);
  3699   3693       p->hasMutexBuf = 1;
  3700   3694     }
  3701   3695     sqlite3_mutex_enter(pShmNode->mutex);
  3702   3696     if( pShmNode->szMap==0 || reqMapSize>pShmNode->szMap ){
  3703   3697       int actualSize;
  3704         -    if( unixShmSize(fd, -1, &actualSize)==SQLITE_OK
  3705         -     && reqMapSize<actualSize
  3706         -    ){
  3707         -      reqMapSize = actualSize;
         3698  +    if( unixShmSize(fd, -1, &actualSize)!=SQLITE_OK ){
         3699  +      actualSize = 0;
  3708   3700       }
  3709         -    if( pShmNode->pMMapBuf ){
         3701  +    reqMapSize = actualSize;
         3702  +    if( pShmNode->pMMapBuf || reqMapSize<=0 ){
  3710   3703         munmap(pShmNode->pMMapBuf, pShmNode->szMap);
  3711   3704       }
  3712         -    pShmNode->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED,
  3713         -                           pShmNode->h, 0);
  3714         -    pShmNode->szMap = pShmNode->pMMapBuf ? reqMapSize : 0;
         3705  +    if( reqMapSize>0 ){
         3706  +      pShmNode->pMMapBuf = mmap(0, reqMapSize, PROT_READ|PROT_WRITE, MAP_SHARED,
         3707  +                             pShmNode->h, 0);
         3708  +      pShmNode->szMap = pShmNode->pMMapBuf ? reqMapSize : 0;
         3709  +    }else{
         3710  +      pShmNode->pMMapBuf = 0;
         3711  +      pShmNode->szMap = 0;
         3712  +    }
  3715   3713     }
  3716   3714     *pNewMapSize = pShmNode->szMap;
  3717   3715     *ppBuf = pShmNode->pMMapBuf;
  3718   3716     sqlite3_mutex_leave(pShmNode->mutex);
  3719   3717     return rc;
  3720   3718   }
  3721   3719   

Changes to src/os_win.c.

  1754   1754     }
  1755   1755     winShmLeaveMutex();
  1756   1756   
  1757   1757     return SQLITE_OK;
  1758   1758   }
  1759   1759   
  1760   1760   /*
  1761         -** Query and/or changes the size of the underlying storage for
  1762         -** a shared-memory segment.  The reqSize parameter is the new size
  1763         -** of the underlying storage, or -1 to do just a query.  The size
  1764         -** of the underlying storage (after resizing if resizing occurs) is
  1765         -** written into pNewSize.
         1761  +** Increase the size of the underlying storage for a shared-memory segment.
  1766   1762   **
  1767         -** This routine does not (necessarily) change the size of the mapping 
  1768         -** of the underlying storage into memory.  Use xShmGet() to change
  1769         -** the mapping size.
         1763  +** The reqSize parameter is the new requested minimum size of the underlying
         1764  +** shared memory.  This routine may choose to make the shared memory larger
         1765  +** than this value (for example to round the shared memory size up to an
         1766  +** operating-system dependent page size.)
  1770   1767   **
  1771         -** The reqSize parameter is the minimum size requested.  The implementation
  1772         -** is free to expand the storage to some larger amount if it chooses.
         1768  +** This routine will only grow the size of shared memory.  A request for
         1769  +** a smaller size is a no-op.
  1773   1770   */
  1774   1771   static int winShmSize(
  1775   1772     sqlite3_file *fd,         /* Database holding the shared memory */
  1776   1773     int reqSize,              /* Requested size.  -1 for query only */
  1777   1774     int *pNewSize             /* Write new size here */
  1778   1775   ){
  1779   1776     winFile *pDbFd = (winFile*)fd;

Changes to src/wal.c.

   240    240   /* A block of WALINDEX_LOCK_RESERVED bytes beginning at
   241    241   ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems
   242    242   ** only support mandatory file-locks, we do not read or write data
   243    243   ** from the region of the file on which locks are applied.
   244    244   */
   245    245   #define WALINDEX_LOCK_OFFSET   (sizeof(WalIndexHdr)*2)
   246    246   #define WALINDEX_LOCK_RESERVED 8
          247  +#define WALINDEX_HDR_SIZE      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)
   247    248   
   248    249   /* Size of header before each frame in wal */
   249    250   #define WAL_FRAME_HDRSIZE 24
   250    251   
   251    252   /* Size of write ahead log header */
   252    253   #define WAL_HDRSIZE 24
   253    254   
................................................................................
   558    559   /*
   559    560   ** Release our reference to the wal-index memory map, if we are holding
   560    561   ** it.
   561    562   */
   562    563   static void walIndexUnmap(Wal *pWal){
   563    564     if( pWal->pWiData ){
   564    565       sqlite3OsShmRelease(pWal->pDbFd);
   565         -    pWal->pWiData = 0;
   566    566     }
          567  +  pWal->pWiData = 0;
          568  +  pWal->szWIndex = -1;
   567    569   }
   568    570   
   569    571   /*
   570    572   ** Map the wal-index file into memory if it isn't already. 
   571    573   **
   572         -** The reqSize parameter is the minimum required size of the mapping.
   573         -** A value of -1 means "don't care".
          574  +** The reqSize parameter is the requested size of the mapping.  The
          575  +** mapping will be at least this big if the underlying storage is
          576  +** that big.  But the mapping will never grow larger than the underlying
          577  +** storage.  Use the walIndexRemap() to enlarget the storage space.
   574    578   */
   575    579   static int walIndexMap(Wal *pWal, int reqSize){
   576    580     int rc = SQLITE_OK;
   577    581     if( pWal->pWiData==0 || reqSize>pWal->szWIndex ){
   578    582       walIndexUnmap(pWal);
   579    583       rc = sqlite3OsShmGet(pWal->pDbFd, reqSize, &pWal->szWIndex,
   580    584                                (void volatile**)(char volatile*)&pWal->pWiData);
   581         -    if( rc==SQLITE_OK && pWal->pWiData==0 ){
   582         -      /* Make sure pWal->pWiData is not NULL while we are holding the
   583         -      ** lock on the mapping. */
   584         -      assert( pWal->szWIndex==0 );
   585         -      pWal->pWiData = &pWal->iCallback;
   586         -    }
   587    585       if( rc!=SQLITE_OK ){
   588    586         walIndexUnmap(pWal);
   589    587       }
   590    588     }
   591    589     return rc;
   592    590   }
   593    591   
   594    592   /*
          593  +** Enlarge the wal-index to be at least enlargeTo bytes in size and
   595    594   ** Remap the wal-index so that the mapping covers the full size
   596    595   ** of the underlying file.
   597    596   **
   598    597   ** If enlargeTo is non-negative, then increase the size of the underlying
   599    598   ** storage to be at least as big as enlargeTo before remapping.
   600    599   */
   601    600   static int walIndexRemap(Wal *pWal, int enlargeTo){
   602    601     int rc;
   603    602     int sz;
          603  +  assert( pWal->lockState>=SQLITE_SHM_WRITE );
   604    604     rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz);
   605    605     if( rc==SQLITE_OK && sz>pWal->szWIndex ){
   606    606       walIndexUnmap(pWal);
   607    607       rc = walIndexMap(pWal, sz);
   608    608     }
          609  +  assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK );
   609    610     return rc;
   610    611   }
   611         -
   612         -/*
   613         -** Increment by which to increase the wal-index file size.
   614         -*/
   615         -#define WALINDEX_MMAP_INCREMENT (64*1024)
   616         -
   617    612   
   618    613   /*
   619    614   ** Compute a hash on a page number.  The resulting hash value must land
   620    615   ** between 0 and (HASHTABLE_NSLOT-1).
   621    616   */
   622    617   static int walHash(u32 iPage){
   623    618     assert( iPage>0 );
................................................................................
   734    729   */
   735    730   static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
   736    731     int rc;                         /* Return code */
   737    732     int nMapping;                   /* Required mapping size in bytes */
   738    733     
   739    734     /* Make sure the wal-index is mapped. Enlarge the mapping if required. */
   740    735     nMapping = walMappingSize(iFrame);
   741         -  rc = walIndexMap(pWal, -1);
          736  +  rc = walIndexMap(pWal, nMapping);
   742    737     while( rc==SQLITE_OK && nMapping>pWal->szWIndex ){
   743         -    int nByte = pWal->szWIndex + WALINDEX_MMAP_INCREMENT;
   744         -    rc = walIndexRemap(pWal, nByte);
          738  +    rc = walIndexRemap(pWal, nMapping);
   745    739     }
   746    740   
   747    741     /* Assuming the wal-index file was successfully mapped, find the hash 
   748    742     ** table and section of of the page number array that pertain to frame 
   749    743     ** iFrame of the WAL. Then populate the page number array and the hash 
   750    744     ** table entry.
   751    745     */
................................................................................
   903    897       }
   904    898   
   905    899       sqlite3_free(aFrame);
   906    900     }
   907    901   
   908    902   finished:
   909    903     if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){
   910         -    rc = walIndexRemap(pWal, WALINDEX_MMAP_INCREMENT);
          904  +    rc = walIndexRemap(pWal, walMappingSize(1));
   911    905     }
   912    906     if( rc==SQLITE_OK ){
   913    907       pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
   914    908       pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
   915    909       walIndexWriteHdr(pWal);
   916    910     }
   917    911     return rc;
................................................................................
   979    973     if( !pRet ){
   980    974       return SQLITE_NOMEM;
   981    975     }
   982    976   
   983    977     pRet->pVfs = pVfs;
   984    978     pRet->pWalFd = (sqlite3_file *)&pRet[1];
   985    979     pRet->pDbFd = pDbFd;
          980  +  pRet->szWIndex = -1;
   986    981     sqlite3_randomness(8, &pRet->hdr.aSalt);
   987    982     pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd;
   988    983     sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName);
   989    984     rc = sqlite3OsShmOpen(pDbFd);
   990    985   
   991    986     /* Open file handle on the write-ahead log file. */
   992    987     if( rc==SQLITE_OK ){
................................................................................
  1305   1300   ** is read successfully and the checksum verified, return zero.
  1306   1301   */
  1307   1302   int walIndexTryHdr(Wal *pWal, int *pChanged){
  1308   1303     u32 aCksum[2];               /* Checksum on the header content */
  1309   1304     WalIndexHdr h1, h2;          /* Two copies of the header content */
  1310   1305     WalIndexHdr *aHdr;           /* Header in shared memory */
  1311   1306   
  1312         -  assert( pWal->pWiData );
  1313         -  if( pWal->szWIndex==0 ){
  1314         -    /* The wal-index is of size 0 bytes. This is handled in the same way
  1315         -    ** as an invalid header. The caller will run recovery to construct
  1316         -    ** a valid wal-index file before accessing the database.
  1317         -    */
         1307  +  if( pWal->szWIndex < WALINDEX_HDR_SIZE ){
         1308  +    /* The wal-index is not large enough to hold the header, then assume
         1309  +    ** header is invalid. */
  1318   1310       return 1;
  1319   1311     }
         1312  +  assert( pWal->pWiData );
  1320   1313   
  1321   1314     /* Read the header. The caller may or may not have an exclusive 
  1322   1315     ** (WRITE, PENDING, CHECKPOINT or RECOVER) lock on the wal-index
  1323   1316     ** file, meaning it is possible that an inconsistent snapshot is read
  1324   1317     ** from the file. If this happens, return non-zero.
  1325   1318     **
  1326   1319     ** There are two copies of the header at the beginning of the wal-index.
................................................................................
  1374   1367   */
  1375   1368   static int walIndexReadHdr(Wal *pWal, int *pChanged){
  1376   1369     int rc;                         /* Return code */
  1377   1370     int lockState;                  /* pWal->lockState before running recovery */
  1378   1371   
  1379   1372     assert( pWal->lockState>=SQLITE_SHM_READ );
  1380   1373     assert( pChanged );
  1381         -  rc = walIndexMap(pWal, -1);
         1374  +  rc = walIndexMap(pWal, walMappingSize(1));
  1382   1375     if( rc!=SQLITE_OK ){
  1383   1376       return rc;
  1384   1377     }
  1385   1378   
  1386   1379     /* First attempt to read the wal-index header. This may fail for one
  1387   1380     ** of two reasons: (a) the wal-index does not yet exist or has been
  1388   1381     ** corrupted and needs to be constructed by running recovery, or (b)
................................................................................
  1624   1617       rc = walSetLock(pWal, SQLITE_SHM_WRITE);
  1625   1618   
  1626   1619       /* If this connection is not reading the most recent database snapshot,
  1627   1620       ** it is not possible to write to the database. In this case release
  1628   1621       ** the write locks and return SQLITE_BUSY.
  1629   1622       */
  1630   1623       if( rc==SQLITE_OK ){
  1631         -      rc = walIndexMap(pWal, sizeof(WalIndexHdr));
         1624  +      rc = walIndexMap(pWal, walMappingSize(1));
         1625  +      assert( pWal->szWIndex>=WALINDEX_HDR_SIZE || rc!=SQLITE_OK );
  1632   1626         if( rc==SQLITE_OK
  1633   1627          && memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))
  1634   1628         ){
  1635   1629           rc = SQLITE_BUSY;
  1636   1630         }
  1637   1631         walIndexUnmap(pWal);
  1638   1632         if( rc!=SQLITE_OK ){