/ Check-in [6f3a5c24]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add an experimental fix to avoid attempting to mmap memory from an offset that is not a multiple of the system page size on systems with page sizes larger than 32KB.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | shm-mapping-fix
Files: files | file ages | folders
SHA1: 6f3a5c24d254fc6faf607b505bdef4a7aafc21af
User & Date: dan 2014-03-20 08:59:47
Context
2014-03-20
09:42
Add a test to ensure os_unix.c works with 64KiB OS pages. Closed-Leaf check-in: e3d2be3b user: dan tags: shm-mapping-fix
08:59
Add an experimental fix to avoid attempting to mmap memory from an offset that is not a multiple of the system page size on systems with page sizes larger than 32KB. check-in: 6f3a5c24 user: dan tags: shm-mapping-fix
2014-03-19
20:01
Avoid some unnecessary calls to sqlite3VdbeRecordUnpack() that were being made when merging data from two or more temp files together in vdbesort.c check-in: 707ea170 user: dan tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

  4101   4101              pShmNode->sharedMask, pShmNode->exclMask));
  4102   4102     }
  4103   4103   #endif
  4104   4104   
  4105   4105     return rc;        
  4106   4106   }
  4107   4107   
         4108  +/*
         4109  +** Return the system page size.
         4110  +*/
         4111  +static int unixGetPagesize(void){
         4112  +#if defined(_BSD_SOURCE)
         4113  +  return getpagesize();
         4114  +#else
         4115  +  return (int)sysconf(_SC_PAGESIZE);
         4116  +#endif
         4117  +}
         4118  +
         4119  +/*
         4120  +** Return the minimum number of 32KB shm regions that should be mapped at
         4121  +** a time, assuming that each mapping must be an integer multiple of the
         4122  +** current system page-size.
         4123  +**
         4124  +** Usually, this is 1. The exception seems to be systems that are configured
         4125  +** to use 64KB pages - in this case each mapping must cover at least two
         4126  +** shm regions.
         4127  +*/
         4128  +static int unixShmRegionPerMap(void){
         4129  +  int shmsz = 32*1024;            /* SHM region size */
         4130  +  int pgsz = unixGetPagesize();   /* System page size */
         4131  +  assert( ((pgsz-1)&pgsz)==0 );   /* Page size must be a power of 2 */
         4132  +  if( pgsz<shmsz ) return 1;
         4133  +  return pgsz/shmsz;
         4134  +}
  4108   4135   
  4109   4136   /*
  4110   4137   ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
  4111   4138   **
  4112   4139   ** This is not a VFS shared-memory method; it is a utility function called
  4113   4140   ** by VFS shared-memory methods.
  4114   4141   */
  4115   4142   static void unixShmPurge(unixFile *pFd){
  4116   4143     unixShmNode *p = pFd->pInode->pShmNode;
  4117   4144     assert( unixMutexHeld() );
  4118   4145     if( p && p->nRef==0 ){
         4146  +    int nShmPerMap = unixShmRegionPerMap();
  4119   4147       int i;
  4120   4148       assert( p->pInode==pFd->pInode );
  4121   4149       sqlite3_mutex_free(p->mutex);
  4122         -    for(i=0; i<p->nRegion; i++){
         4150  +    for(i=0; i<p->nRegion; i+=nShmPerMap){
  4123   4151         if( p->h>=0 ){
  4124   4152           osMunmap(p->apRegion[i], p->szRegion);
  4125   4153         }else{
  4126   4154           sqlite3_free(p->apRegion[i]);
  4127   4155         }
  4128   4156       }
  4129   4157       sqlite3_free(p->apRegion);
................................................................................
  4322   4350     int bExtend,                    /* True to extend file if necessary */
  4323   4351     void volatile **pp              /* OUT: Mapped memory */
  4324   4352   ){
  4325   4353     unixFile *pDbFd = (unixFile*)fd;
  4326   4354     unixShm *p;
  4327   4355     unixShmNode *pShmNode;
  4328   4356     int rc = SQLITE_OK;
         4357  +  int nShmPerMap = unixShmRegionPerMap();
         4358  +  int nReqRegion;
  4329   4359   
  4330   4360     /* If the shared-memory file has not yet been opened, open it now. */
  4331   4361     if( pDbFd->pShm==0 ){
  4332   4362       rc = unixOpenSharedMemory(pDbFd);
  4333   4363       if( rc!=SQLITE_OK ) return rc;
  4334   4364     }
  4335   4365   
................................................................................
  4337   4367     pShmNode = p->pShmNode;
  4338   4368     sqlite3_mutex_enter(pShmNode->mutex);
  4339   4369     assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
  4340   4370     assert( pShmNode->pInode==pDbFd->pInode );
  4341   4371     assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
  4342   4372     assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
  4343   4373   
  4344         -  if( pShmNode->nRegion<=iRegion ){
         4374  +  /* Minimum number of regions required to be mapped. */
         4375  +  nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;
         4376  +
         4377  +  if( pShmNode->nRegion<nReqRegion ){
  4345   4378       char **apNew;                      /* New apRegion[] array */
  4346         -    int nByte = (iRegion+1)*szRegion;  /* Minimum required file size */
         4379  +    int nByte = nReqRegion*szRegion;   /* Minimum required file size */
  4347   4380       struct stat sStat;                 /* Used by fstat() */
  4348   4381   
  4349   4382       pShmNode->szRegion = szRegion;
  4350   4383   
  4351   4384       if( pShmNode->h>=0 ){
  4352   4385         /* The requested region is not mapped into this processes address space.
  4353   4386         ** Check to see if it has been allocated (i.e. if the wal-index file is
................................................................................
  4388   4421             }
  4389   4422           }
  4390   4423         }
  4391   4424       }
  4392   4425   
  4393   4426       /* Map the requested memory region into this processes address space. */
  4394   4427       apNew = (char **)sqlite3_realloc(
  4395         -        pShmNode->apRegion, (iRegion+1)*sizeof(char *)
         4428  +        pShmNode->apRegion, nReqRegion*sizeof(char *)
  4396   4429       );
  4397   4430       if( !apNew ){
  4398   4431         rc = SQLITE_IOERR_NOMEM;
  4399   4432         goto shmpage_out;
  4400   4433       }
  4401   4434       pShmNode->apRegion = apNew;
  4402         -    while(pShmNode->nRegion<=iRegion){
         4435  +    while( pShmNode->nRegion<nReqRegion ){
         4436  +      int nMap = szRegion*nShmPerMap;
         4437  +      int i;
  4403   4438         void *pMem;
  4404   4439         if( pShmNode->h>=0 ){
  4405         -        pMem = osMmap(0, szRegion,
         4440  +        pMem = osMmap(0, nMap,
  4406   4441               pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 
  4407   4442               MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
  4408   4443           );
  4409   4444           if( pMem==MAP_FAILED ){
  4410   4445             rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
  4411   4446             goto shmpage_out;
  4412   4447           }
................................................................................
  4414   4449           pMem = sqlite3_malloc(szRegion);
  4415   4450           if( pMem==0 ){
  4416   4451             rc = SQLITE_NOMEM;
  4417   4452             goto shmpage_out;
  4418   4453           }
  4419   4454           memset(pMem, 0, szRegion);
  4420   4455         }
  4421         -      pShmNode->apRegion[pShmNode->nRegion] = pMem;
  4422         -      pShmNode->nRegion++;
         4456  +
         4457  +      for(i=0; i<nShmPerMap; i++){
         4458  +        pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i];
         4459  +      }
         4460  +      pShmNode->nRegion += nShmPerMap;
  4423   4461       }
  4424   4462     }
  4425   4463   
  4426   4464   shmpage_out:
  4427   4465     if( pShmNode->nRegion>iRegion ){
  4428   4466       *pp = pShmNode->apRegion[iRegion];
  4429   4467     }else{
................................................................................
  4629   4667       osMunmap(pFd->pMapRegion, pFd->mmapSizeActual);
  4630   4668       pFd->pMapRegion = 0;
  4631   4669       pFd->mmapSize = 0;
  4632   4670       pFd->mmapSizeActual = 0;
  4633   4671     }
  4634   4672   }
  4635   4673   
  4636         -/*
  4637         -** Return the system page size.
  4638         -*/
  4639         -static int unixGetPagesize(void){
  4640         -#if HAVE_MREMAP
  4641         -  return 512;
  4642         -#elif defined(_BSD_SOURCE)
  4643         -  return getpagesize();
  4644         -#else
  4645         -  return (int)sysconf(_SC_PAGESIZE);
  4646         -#endif
  4647         -}
  4648         -
  4649   4674   /*
  4650   4675   ** Attempt to set the size of the memory mapping maintained by file 
  4651   4676   ** descriptor pFd to nNew bytes. Any existing mapping is discarded.
  4652   4677   **
  4653   4678   ** If successful, this function sets the following variables:
  4654   4679   **
  4655   4680   **       unixFile.pMapRegion
................................................................................
  4678   4703     assert( nNew>0 );
  4679   4704     assert( pFd->mmapSizeActual>=pFd->mmapSize );
  4680   4705     assert( MAP_FAILED!=0 );
  4681   4706   
  4682   4707     if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
  4683   4708   
  4684   4709     if( pOrig ){
         4710  +#if HAVE_MREMAP
         4711  +    i64 nReuse = pFd->mmapSize;
         4712  +#else
  4685   4713       const int szSyspage = unixGetPagesize();
  4686   4714       i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
         4715  +#endif
  4687   4716       u8 *pReq = &pOrig[nReuse];
  4688   4717   
  4689   4718       /* Unmap any pages of the existing mapping that cannot be reused. */
  4690   4719       if( nReuse!=nOrig ){
  4691   4720         osMunmap(pReq, nOrig-nReuse);
  4692   4721       }
  4693   4722