/ Check-in [713b1b7d]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:In os_unix.c, do not open the directory containing the database file when the database file is opened. Instead, wait until time to fsync() the directory. And do not report an error if the open fails, since some systems (Ex: AIX and a chromium sandbox) are unable to open and fsync directories.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 713b1b7dc1296e9cee42aeaad8c85528155f721d
User & Date: drh 2011-08-08 23:48:40
Context
2011-08-09
18:14
Add command-line utilities "offsets.c" and "extract.c" for use in low-level analyzsis of database files. check-in: dfa22ed4 user: drh tags: trunk
2011-08-08
23:48
In os_unix.c, do not open the directory containing the database file when the database file is opened. Instead, wait until time to fsync() the directory. And do not report an error if the open fails, since some systems (Ex: AIX and a chromium sandbox) are unable to open and fsync directories. check-in: 713b1b7d user: drh tags: trunk
23:18
Allow the unlink() system call to be overridden in os_unix.c. check-in: 8d1b5c3a user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/os_unix.c.

   204    204   ** VFS implementations.
   205    205   */
   206    206   typedef struct unixFile unixFile;
   207    207   struct unixFile {
   208    208     sqlite3_io_methods const *pMethod;  /* Always the first entry */
   209    209     unixInodeInfo *pInode;              /* Info about locks on this inode */
   210    210     int h;                              /* The file descriptor */
   211         -  int dirfd;                          /* File descriptor for the directory */
   212    211     unsigned char eFileLock;            /* The type of lock held on this fd */
   213    212     unsigned char ctrlFlags;            /* Behavioral bits.  UNIXFILE_* flags */
   214    213     int lastErrno;                      /* The unix errno from last I/O error */
   215    214     void *lockingContext;               /* Locking style specific state */
   216    215     UnixUnusedFd *pUnused;              /* Pre-allocated UnixUnusedFd */
   217    216     const char *zPath;                  /* Name of the file */
   218    217     unixShm *pShm;                      /* Shared memory segment information */
................................................................................
   249    248   
   250    249   /*
   251    250   ** Allowed values for the unixFile.ctrlFlags bitmask:
   252    251   */
   253    252   #define UNIXFILE_EXCL        0x01     /* Connections from one process only */
   254    253   #define UNIXFILE_RDONLY      0x02     /* Connection is read only */
   255    254   #define UNIXFILE_PERSIST_WAL 0x04     /* Persistent WAL mode */
          255  +#define UNIXFILE_DIRSYNC     0x08     /* Directory sync needed */
   256    256   
   257    257   /*
   258    258   ** Include code that is common to all os_*.c files
   259    259   */
   260    260   #include "os_common.h"
   261    261   
   262    262   /*
................................................................................
  1749   1749   **
  1750   1750   ** It is *not* necessary to hold the mutex when this routine is called,
  1751   1751   ** even on VxWorks.  A mutex will be acquired on VxWorks by the
  1752   1752   ** vxworksReleaseFileId() routine.
  1753   1753   */
  1754   1754   static int closeUnixFile(sqlite3_file *id){
  1755   1755     unixFile *pFile = (unixFile*)id;
  1756         -  if( pFile->dirfd>=0 ){
  1757         -    robust_close(pFile, pFile->dirfd, __LINE__);
  1758         -    pFile->dirfd=-1;
  1759         -  }
  1760   1756     if( pFile->h>=0 ){
  1761   1757       robust_close(pFile, pFile->h, __LINE__);
  1762   1758       pFile->h = -1;
  1763   1759     }
  1764   1760   #if OS_VXWORKS
  1765   1761     if( pFile->pId ){
  1766   1762       if( pFile->isDelete ){
................................................................................
  3244   3240   #endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
  3245   3241   
  3246   3242     if( OS_VXWORKS && rc!= -1 ){
  3247   3243       rc = 0;
  3248   3244     }
  3249   3245     return rc;
  3250   3246   }
         3247  +
         3248  +/*
         3249  +** Open a file descriptor to the directory containing file zFilename.
         3250  +** If successful, *pFd is set to the opened file descriptor and
         3251  +** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
         3252  +** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
         3253  +** value.
         3254  +**
         3255  +** If SQLITE_OK is returned, the caller is responsible for closing
         3256  +** the file descriptor *pFd using close().
         3257  +*/
         3258  +static int openDirectory(const char *zFilename, int *pFd){
         3259  +  int ii;
         3260  +  int fd = -1;
         3261  +  char zDirname[MAX_PATHNAME+1];
         3262  +
         3263  +  sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
         3264  +  for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);
         3265  +  if( ii>0 ){
         3266  +    zDirname[ii] = '\0';
         3267  +    fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
         3268  +    if( fd>=0 ){
         3269  +#ifdef FD_CLOEXEC
         3270  +      osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
         3271  +#endif
         3272  +      OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
         3273  +    }
         3274  +  }
         3275  +  *pFd = fd;
         3276  +  return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));
         3277  +}
  3251   3278   
  3252   3279   /*
  3253   3280   ** Make sure all writes to a particular file are committed to disk.
  3254   3281   **
  3255   3282   ** If dataOnly==0 then both the file itself and its metadata (file
  3256   3283   ** size, access time, etc) are synced.  If dataOnly!=0 then only the
  3257   3284   ** file data is synced.
................................................................................
  3285   3312     OSTRACE(("SYNC    %-3d\n", pFile->h));
  3286   3313     rc = full_fsync(pFile->h, isFullsync, isDataOnly);
  3287   3314     SimulateIOError( rc=1 );
  3288   3315     if( rc ){
  3289   3316       pFile->lastErrno = errno;
  3290   3317       return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath);
  3291   3318     }
  3292         -  if( pFile->dirfd>=0 ){
  3293         -    OSTRACE(("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
         3319  +
         3320  +  /* Also fsync the directory containing the file if the DIRSYNC flag
         3321  +  ** is set.  This is a one-time occurrance.  Many systems (examples: AIX
         3322  +  ** or any process running inside a chromium sandbox) are unable to fsync a
         3323  +  ** directory, so ignore errors.
         3324  +  */
         3325  +  if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){
         3326  +    int dirfd;
         3327  +    OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath,
  3294   3328               HAVE_FULLFSYNC, isFullsync));
  3295         -#ifndef SQLITE_DISABLE_DIRSYNC
  3296         -    /* The directory sync is only attempted if full_fsync is
  3297         -    ** turned off or unavailable.  If a full_fsync occurred above,
  3298         -    ** then the directory sync is superfluous.
  3299         -    */
  3300         -    if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
  3301         -       /*
  3302         -       ** We have received multiple reports of fsync() returning
  3303         -       ** errors when applied to directories on certain file systems.
  3304         -       ** A failed directory sync is not a big deal.  So it seems
  3305         -       ** better to ignore the error.  Ticket #1657
  3306         -       */
  3307         -       /* pFile->lastErrno = errno; */
  3308         -       /* return SQLITE_IOERR; */
         3329  +    openDirectory(pFile->zPath, &dirfd);
         3330  +    if( dirfd>=0 ){
         3331  +      full_fsync(dirfd, 0, 0);
         3332  +      robust_close(pFile, dirfd, __LINE__);
  3309   3333       }
  3310         -#endif
  3311         -    /* Only need to sync once, so close the  directory when we are done */
  3312         -    robust_close(pFile, pFile->dirfd, __LINE__);
  3313         -    pFile->dirfd = -1;
         3334  +    pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC;
  3314   3335     }
  3315   3336     return rc;
  3316   3337   }
  3317   3338   
  3318   3339   /*
  3319   3340   ** Truncate an open file to a specified size
  3320   3341   */
................................................................................
  4474   4495   
  4475   4496   /*
  4476   4497   ** Initialize the contents of the unixFile structure pointed to by pId.
  4477   4498   */
  4478   4499   static int fillInUnixFile(
  4479   4500     sqlite3_vfs *pVfs,      /* Pointer to vfs object */
  4480   4501     int h,                  /* Open file descriptor of file being opened */
  4481         -  int dirfd,              /* Directory file descriptor */
         4502  +  int syncDir,            /* True to sync directory on first sync */
  4482   4503     sqlite3_file *pId,      /* Write to the unixFile structure here */
  4483   4504     const char *zFilename,  /* Name of the file being opened */
  4484   4505     int noLock,             /* Omit locking if true */
  4485   4506     int isDelete,           /* Delete on close if true */
  4486   4507     int isReadOnly          /* True if the file is opened read-only */
  4487   4508   ){
  4488   4509     const sqlite3_io_methods *pLockingStyle;
................................................................................
  4505   4526       || pVfs->pAppData==(void*)&autolockIoFinder );
  4506   4527   #else
  4507   4528     assert( zFilename==0 || zFilename[0]=='/' );
  4508   4529   #endif
  4509   4530   
  4510   4531     OSTRACE(("OPEN    %-3d %s\n", h, zFilename));
  4511   4532     pNew->h = h;
  4512         -  pNew->dirfd = dirfd;
  4513   4533     pNew->zPath = zFilename;
  4514   4534     if( memcmp(pVfs->zName,"unix-excl",10)==0 ){
  4515   4535       pNew->ctrlFlags = UNIXFILE_EXCL;
  4516   4536     }else{
  4517   4537       pNew->ctrlFlags = 0;
  4518   4538     }
  4519   4539     if( isReadOnly ){
  4520   4540       pNew->ctrlFlags |= UNIXFILE_RDONLY;
  4521   4541     }
         4542  +  if( syncDir ){
         4543  +    pNew->ctrlFlags |= UNIXFILE_DIRSYNC;
         4544  +  }
  4522   4545   
  4523   4546   #if OS_VXWORKS
  4524   4547     pNew->pId = vxworksFindFileId(zFilename);
  4525   4548     if( pNew->pId==0 ){
  4526   4549       noLock = 1;
  4527   4550       rc = SQLITE_NOMEM;
  4528   4551     }
................................................................................
  4647   4670       h = -1;
  4648   4671       osUnlink(zFilename);
  4649   4672       isDelete = 0;
  4650   4673     }
  4651   4674     pNew->isDelete = isDelete;
  4652   4675   #endif
  4653   4676     if( rc!=SQLITE_OK ){
  4654         -    if( dirfd>=0 ) robust_close(pNew, dirfd, __LINE__);
  4655   4677       if( h>=0 ) robust_close(pNew, h, __LINE__);
  4656   4678     }else{
  4657   4679       pNew->pMethod = pLockingStyle;
  4658   4680       OpenCounter(+1);
  4659   4681     }
  4660   4682     return rc;
  4661   4683   }
  4662   4684   
  4663         -/*
  4664         -** Open a file descriptor to the directory containing file zFilename.
  4665         -** If successful, *pFd is set to the opened file descriptor and
  4666         -** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
  4667         -** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
  4668         -** value.
  4669         -**
  4670         -** If SQLITE_OK is returned, the caller is responsible for closing
  4671         -** the file descriptor *pFd using close().
  4672         -*/
  4673         -static int openDirectory(const char *zFilename, int *pFd){
  4674         -  int ii;
  4675         -  int fd = -1;
  4676         -  char zDirname[MAX_PATHNAME+1];
  4677         -
  4678         -  sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
  4679         -  for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);
  4680         -  if( ii>0 ){
  4681         -    zDirname[ii] = '\0';
  4682         -    fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
  4683         -    if( fd>=0 ){
  4684         -#ifdef FD_CLOEXEC
  4685         -      osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
  4686         -#endif
  4687         -      OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
  4688         -    }
  4689         -  }
  4690         -  *pFd = fd;
  4691         -  return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));
  4692         -}
  4693         -
  4694   4685   /*
  4695   4686   ** Return the name of a directory in which to put temporary files.
  4696   4687   ** If no suitable temporary file directory can be found, return NULL.
  4697   4688   */
  4698   4689   static const char *unixTempFileDir(void){
  4699   4690     static const char *azDirs[] = {
  4700   4691        0,
................................................................................
  4919   4910     const char *zPath,           /* Pathname of file to be opened */
  4920   4911     sqlite3_file *pFile,         /* The file descriptor to be filled in */
  4921   4912     int flags,                   /* Input flags to control the opening */
  4922   4913     int *pOutFlags               /* Output flags returned to SQLite core */
  4923   4914   ){
  4924   4915     unixFile *p = (unixFile *)pFile;
  4925   4916     int fd = -1;                   /* File descriptor returned by open() */
  4926         -  int dirfd = -1;                /* Directory file descriptor */
  4927   4917     int openFlags = 0;             /* Flags to pass to open() */
  4928   4918     int eType = flags&0xFFFFFF00;  /* Type of file to open */
  4929   4919     int noLock;                    /* True to omit locking primitives */
  4930   4920     int rc = SQLITE_OK;            /* Function Return Code */
  4931   4921   
  4932   4922     int isExclusive  = (flags & SQLITE_OPEN_EXCLUSIVE);
  4933   4923     int isDelete     = (flags & SQLITE_OPEN_DELETEONCLOSE);
................................................................................
  4938   4928     int isAutoProxy  = (flags & SQLITE_OPEN_AUTOPROXY);
  4939   4929   #endif
  4940   4930   
  4941   4931     /* If creating a master or main-file journal, this function will open
  4942   4932     ** a file-descriptor on the directory too. The first time unixSync()
  4943   4933     ** is called the directory file descriptor will be fsync()ed and close()d.
  4944   4934     */
  4945         -  int isOpenDirectory = (isCreate && (
         4935  +  int syncDir = (isCreate && (
  4946   4936           eType==SQLITE_OPEN_MASTER_JOURNAL 
  4947   4937        || eType==SQLITE_OPEN_MAIN_JOURNAL 
  4948   4938        || eType==SQLITE_OPEN_WAL
  4949   4939     ));
  4950   4940   
  4951   4941     /* If argument zPath is a NULL pointer, this function is required to open
  4952   4942     ** a temporary file. Use this buffer to store the file name in.
................................................................................
  4992   4982         if( !pUnused ){
  4993   4983           return SQLITE_NOMEM;
  4994   4984         }
  4995   4985       }
  4996   4986       p->pUnused = pUnused;
  4997   4987     }else if( !zName ){
  4998   4988       /* If zName is NULL, the upper layer is requesting a temp file. */
  4999         -    assert(isDelete && !isOpenDirectory);
         4989  +    assert(isDelete && !syncDir);
  5000   4990       rc = unixGetTempname(MAX_PATHNAME+1, zTmpname);
  5001   4991       if( rc!=SQLITE_OK ){
  5002   4992         return rc;
  5003   4993       }
  5004   4994       zName = zTmpname;
  5005   4995     }
  5006   4996   
................................................................................
  5057   5047     }
  5058   5048   #if SQLITE_ENABLE_LOCKING_STYLE
  5059   5049     else{
  5060   5050       p->openFlags = openFlags;
  5061   5051     }
  5062   5052   #endif
  5063   5053   
  5064         -  if( isOpenDirectory ){
  5065         -    rc = openDirectory(zPath, &dirfd);
  5066         -    if( rc!=SQLITE_OK ){
  5067         -      /* It is safe to close fd at this point, because it is guaranteed not
  5068         -      ** to be open on a database file. If it were open on a database file,
  5069         -      ** it would not be safe to close as this would release any locks held
  5070         -      ** on the file by this process.  */
  5071         -      assert( eType!=SQLITE_OPEN_MAIN_DB );
  5072         -      robust_close(p, fd, __LINE__);
  5073         -      goto open_finished;
  5074         -    }
  5075         -  }
  5076         -
  5077   5054   #ifdef FD_CLOEXEC
  5078   5055     osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
  5079   5056   #endif
  5080   5057   
  5081   5058     noLock = eType!=SQLITE_OPEN_MAIN_DB;
  5082   5059   
  5083   5060     
  5084   5061   #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
  5085   5062     struct statfs fsInfo;
  5086   5063     if( fstatfs(fd, &fsInfo) == -1 ){
  5087   5064       ((unixFile*)pFile)->lastErrno = errno;
  5088         -    if( dirfd>=0 ) robust_close(p, dirfd, __LINE__);
  5089   5065       robust_close(p, fd, __LINE__);
  5090   5066       return SQLITE_IOERR_ACCESS;
  5091   5067     }
  5092   5068     if (0 == strncmp("msdos", fsInfo.f_fstypename, 5)) {
  5093   5069       ((unixFile*)pFile)->fsFlags |= SQLITE_FSFLAGS_IS_MSDOS;
  5094   5070     }
  5095   5071   #endif
................................................................................
  5113   5089           ** with fd is a database file, and there are other connections open
  5114   5090           ** on that file that are currently holding advisory locks on it,
  5115   5091           ** then the call to close() will cancel those locks. In practice,
  5116   5092           ** we're assuming that statfs() doesn't fail very often. At least
  5117   5093           ** not while other file descriptors opened by the same process on
  5118   5094           ** the same file are working.  */
  5119   5095           p->lastErrno = errno;
  5120         -        if( dirfd>=0 ){
  5121         -          robust_close(p, dirfd, __LINE__);
  5122         -        }
  5123   5096           robust_close(p, fd, __LINE__);
  5124   5097           rc = SQLITE_IOERR_ACCESS;
  5125   5098           goto open_finished;
  5126   5099         }
  5127   5100         useProxy = !(fsInfo.f_flags&MNT_LOCAL);
  5128   5101       }
  5129   5102       if( useProxy ){
  5130         -      rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock,
         5103  +      rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock,
  5131   5104                             isDelete, isReadonly);
  5132   5105         if( rc==SQLITE_OK ){
  5133   5106           rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
  5134   5107           if( rc!=SQLITE_OK ){
  5135   5108             /* Use unixClose to clean up the resources added in fillInUnixFile 
  5136   5109             ** and clear all the structure's references.  Specifically, 
  5137   5110             ** pFile->pMethods will be NULL so sqlite3OsClose will be a no-op 
................................................................................
  5141   5114           }
  5142   5115         }
  5143   5116         goto open_finished;
  5144   5117       }
  5145   5118     }
  5146   5119   #endif
  5147   5120     
  5148         -  rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock,
         5121  +  rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock,
  5149   5122                         isDelete, isReadonly);
  5150   5123   open_finished:
  5151   5124     if( rc!=SQLITE_OK ){
  5152   5125       sqlite3_free(p->pUnused);
  5153   5126     }
  5154   5127     return rc;
  5155   5128   }
................................................................................
  5741   5714   */
  5742   5715   static int proxyCreateUnixFile(
  5743   5716       const char *path,        /* path for the new unixFile */
  5744   5717       unixFile **ppFile,       /* unixFile created and returned by ref */
  5745   5718       int islockfile           /* if non zero missing dirs will be created */
  5746   5719   ) {
  5747   5720     int fd = -1;
  5748         -  int dirfd = -1;
  5749   5721     unixFile *pNew;
  5750   5722     int rc = SQLITE_OK;
  5751   5723     int openFlags = O_RDWR | O_CREAT;
  5752   5724     sqlite3_vfs dummyVfs;
  5753   5725     int terrno = 0;
  5754   5726     UnixUnusedFd *pUnused = NULL;
  5755   5727   
................................................................................
  5806   5778     memset(&dummyVfs, 0, sizeof(dummyVfs));
  5807   5779     dummyVfs.pAppData = (void*)&autolockIoFinder;
  5808   5780     dummyVfs.zName = "dummy";
  5809   5781     pUnused->fd = fd;
  5810   5782     pUnused->flags = openFlags;
  5811   5783     pNew->pUnused = pUnused;
  5812   5784     
  5813         -  rc = fillInUnixFile(&dummyVfs, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0, 0);
         5785  +  rc = fillInUnixFile(&dummyVfs, fd, 0, (sqlite3_file*)pNew, path, 0, 0, 0);
  5814   5786     if( rc==SQLITE_OK ){
  5815   5787       *ppFile = pNew;
  5816   5788       return SQLITE_OK;
  5817   5789     }
  5818   5790   end_create_proxy:    
  5819   5791     robust_close(pNew, fd, __LINE__);
  5820   5792     sqlite3_free(pNew);