/* ** 2001 September 16 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains code that is specific to particular operating ** systems. The purpose of this file is to provide a uniform abstraction ** on which the rest of SQLite can operate. */ #include "sqliteInt.h" #include "os.h" #ifndef OS_UNIX # ifndef OS_WIN # if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__) # define OS_WIN 1 # define OS_UNIX 0 # else # define OS_WIN 0 # define OS_UNIX 1 # endif # else # define OS_UNIX 0 # endif #endif #ifndef OS_WIN # define OS_WIN 0 #endif #if OS_UNIX # include # include # include # include #endif #if OS_WIN # include #endif #if OS_UNIX /* ** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996) ** section 6.5.2.2 lines 483 through 490 specify that when a process ** sets or clears a lock, that operation overrides any prior locks set ** by the same process. It does not explicitly say so, but this implies ** that it overrides locks set by the same process using a different ** file descriptor. Consider this test case: ** ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); ** ** Suppose ./file1 and ./file2 are really be the same file (because ** one is a hard or symbolic link to the other) then if you set ** an exclusive lock on fd1, then try to get an exclusive lock ** on fd2, it works. I would have expected the second lock to ** fail since there was already a lock on the file due to fd1. ** But not so. Since both locks came from the same process, the ** second overrides the first, even though they were on different ** file descriptors opened on different file names. ** ** Bummer. If you ask me, this is broken. Badly broken. It means ** that we cannot use POSIX locks to synchronize file access among ** competing threads of the same process. POSIX locks will work fine ** to synchronize access for threads in separate processes, but not ** threads within the same process. ** ** To work around the problem, SQLite has to manage file locks internally ** on its own. Whenever a new database is opened, we have to find the ** specific inode of the database file (the inode is determined by the ** st_dev and st_ino fields of the stat structure that fstat() fills in) ** and check for locks already existing on that inode. When locks are ** created or removed, we have to look at our own internal record of the ** locks to see if another thread has previously set a lock on that same ** inode. ** ** The OsFile structure for POSIX is no longer just an integer file ** descriptor. It is now a structure that holds the integer file ** descriptor and a pointer to a structure that describes the internal ** locks on the corresponding inode. There is one locking structure ** per inode, so if the same inode is opened twice, both OsFile structures ** point to the same locking structure. The locking structure keeps ** a reference count (so we will know when to delete it) and a "cnt" ** field that tells us its internal lock status. cnt==0 means the ** file is unlocked. cnt==-1 means the file has an exclusive lock. ** cnt>0 means there are cnt shared locks on the file. ** ** Any attempt to lock or unlock a file first checks the locking ** structure. The fcntl() system call is only invoked to set a ** POSIX lock if the internal lock structure transitions between ** a locked and an unlocked state. */ /* ** An instance of the following structure serves as the key used ** to locate a particular lockInfo structure given its inode. */ struct inodeKey { dev_t dev; /* Device number */ ino_t ino; /* Inode number */ }; /* ** An instance of the following structure is allocated for each inode. ** A single inode can have multiple file descriptors, so each OsFile ** structure contains a pointer to an instance of this object and this ** object keeps a count of the number of OsFiles pointing to it. */ struct lockInfo { struct inodeKey key; /* The lookup key */ int cnt; /* 0: unlocked. -1: write lock. 1...: read lock. */ int nRef; /* Number of pointers to this structure */ }; /* ** This hash table maps inodes (in the form of inodeKey structures) into ** pointers to lockInfo structures. */ static Hash lockHash = { SQLITE_HASH_BINARY, 0, 0, 0, 0, 0 }; /* ** Given a file descriptor, locate a lockInfo structure that describes ** that file descriptor. Create a new one if necessary. NULL might ** be returned if malloc() fails. */ static struct lockInfo *findLockInfo(int fd){ int rc; struct inodeKey key; struct stat statbuf; struct lockInfo *pInfo; rc = fstat(fd, &statbuf); if( rc!=0 ) return 0; memset(&key, 0, sizeof(key)); key.dev = statbuf.st_dev; key.ino = statbuf.st_ino; pInfo = (struct lockInfo*)sqliteHashFind(&lockHash, &key, sizeof(key)); if( pInfo==0 ){ struct lockInfo *pOld; pInfo = sqliteMalloc( sizeof(*pInfo) ); if( pInfo==0 ) return 0; pInfo->key = key; pInfo->nRef = 1; pInfo->cnt = 0; pOld = sqliteHashInsert(&lockHash, &pInfo->key, sizeof(key), pInfo); if( pOld!=0 ){ assert( pOld==pInfo ); sqliteFree(pInfo); pInfo = 0; } }else{ pInfo->nRef++; } return pInfo; } /* ** Release a lockInfo structure previously allocated by findLockInfo(). */ static void releaseLockInfo(struct lockInfo *pInfo){ pInfo->nRef--; if( pInfo->nRef==0 ){ sqliteHashInsert(&lockHash, &pInfo->key, sizeof(pInfo->key), 0); sqliteFree(pInfo); } } #endif /** POSIX advisory lock work-around **/ /* ** If we compile with the SQLITE_TEST macro set, then the following block ** of code will give us the ability to simulate a disk I/O error. This ** is used for testing the I/O recovery logic. */ #ifdef SQLITE_TEST int sqlite_io_error_pending = 0; #define SimulateIOError(A) \ if( sqlite_io_error_pending ) \ if( sqlite_io_error_pending-- == 1 ){ local_ioerr(); return A; } static void local_ioerr(){ sqlite_io_error_pending = 0; /* Really just a place to set a breakpoint */ } #else #define SimulateIOError(A) #endif /* ** Delete the named file */ int sqliteOsDelete(const char *zFilename){ #if OS_UNIX unlink(zFilename); #endif #if OS_WIN DeleteFile(zFilename); #endif return SQLITE_OK; } /* ** Return TRUE if the named file exists. */ int sqliteOsFileExists(const char *zFilename){ #if OS_UNIX return access(zFilename, 0)==0; #endif #if OS_WIN HANDLE h; h = CreateFile(zFilename, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, NULL ); if( h!=INVALID_HANDLE_VALUE ){ CloseHandle(h); return 1; } return 0; #endif } /* ** Attempt to open a file for both reading and writing. If that ** fails, try opening it read-only. If the file does not exist, ** try to create it. ** ** On success, a handle for the open file is written to *id ** and *pReadonly is set to 0 if the file was opened for reading and ** writing or 1 if the file was opened read-only. The function returns ** SQLITE_OK. ** ** On failure, the function returns SQLITE_CANTOPEN and leaves ** *pResulst and *pReadonly unchanged. */ int sqliteOsOpenReadWrite( const char *zFilename, OsFile *id, int *pReadonly ){ #if OS_UNIX id->fd = open(zFilename, O_RDWR|O_CREAT, 0644); if( id->fd<0 ){ id->fd = open(zFilename, O_RDONLY); if( id->fd<0 ){ return SQLITE_CANTOPEN; } *pReadonly = 1; }else{ *pReadonly = 0; } sqliteOsEnterMutex(); id->pLock = findLockInfo(id->fd); sqliteOsLeaveMutex(); if( id->pLock==0 ){ close(id->fd); return SQLITE_NOMEM; } id->locked = 0; return SQLITE_OK; #endif #if OS_WIN HANDLE h = CreateFile(zFilename, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, NULL ); if( h==INVALID_HANDLE_VALUE ){ h = CreateFile(zFilename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, NULL ); if( h==INVALID_HANDLE_VALUE ){ return SQLITE_CANTOPEN; } *pReadonly = 1; }else{ *pReadonly = 0; } id->h = h; id->locked = 0; return SQLITE_OK; #endif } /* ** Attempt to open a new file for exclusive access by this process. ** The file will be opened for both reading and writing. To avoid ** a potential security problem, we do not allow the file to have ** previously existed. Nor do we allow the file to be a symbolic ** link. ** ** On success, write the file handle into *id and return SQLITE_OK. ** ** On failure, return SQLITE_CANTOPEN. */ int sqliteOsOpenExclusive(const char *zFilename, OsFile *id){ #if OS_UNIX if( access(zFilename, 0)==0 ){ return SQLITE_CANTOPEN; } #ifndef O_NOFOLLOW # define O_NOFOLLOW 0 #endif id->fd = open(zFilename, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, 0600); if( id->fd<0 ){ return SQLITE_CANTOPEN; } sqliteOsEnterMutex(); id->pLock = findLockInfo(id->fd); sqliteOsLeaveMutex(); if( id->pLock==0 ){ close(id->fd); unlink(zFilename); return SQLITE_NOMEM; } id->locked = 0; return SQLITE_OK; #endif #if OS_WIN HANDLE h = CreateFile(zFilename, GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, NULL ); if( h==INVALID_HANDLE_VALUE ){ return SQLITE_CANTOPEN; } id->h = h; id->locked = 0; return SQLITE_OK; #endif } /* ** Attempt to open a new file for read-only access. ** ** On success, write the file handle into *id and return SQLITE_OK. ** ** On failure, return SQLITE_CANTOPEN. */ int sqliteOsOpenReadOnly(const char *zFilename, OsFile *id){ #if OS_UNIX id->fd = open(zFilename, O_RDONLY); if( id->fd<0 ){ return SQLITE_CANTOPEN; } sqliteOsEnterMutex(); id->pLock = findLockInfo(id->fd); sqliteOsLeaveMutex(); if( id->pLock==0 ){ close(id->fd); return SQLITE_NOMEM; } id->locked = 0; return SQLITE_OK; #endif #if OS_WIN HANDLE h = CreateFile(zFilename, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, NULL ); if( h==INVALID_HANDLE_VALUE ){ return SQLITE_CANTOPEN; } id->h = h; id->locked = 0; return SQLITE_OK; #endif } /* ** Create a temporary file name in zBuf. zBuf must be big enough to ** hold at least SQLITE_TEMPNAME_SIZE characters. */ int sqliteOsTempFileName(char *zBuf){ #if OS_UNIX static const char *azDirs[] = { ".", "/var/tmp", "/usr/tmp", "/tmp", }; static char zChars[] = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789"; int i, j; struct stat buf; const char *zDir = "."; for(i=0; ifd); sqliteOsEnterMutex(); releaseLockInfo(id->pLock); sqliteOsLeaveMutex(); return SQLITE_OK; #endif #if OS_WIN CloseHandle(id->h); return SQLITE_OK; #endif } /* ** Read data from a file into a buffer. Return SQLITE_OK if all ** bytes were read successfully and SQLITE_IOERR if anything goes ** wrong. */ int sqliteOsRead(OsFile *id, void *pBuf, int amt){ #if OS_UNIX int got; SimulateIOError(SQLITE_IOERR); got = read(id->fd, pBuf, amt); if( got<0 ) got = 0; return got==amt ? SQLITE_OK : SQLITE_IOERR; #endif #if OS_WIN DWORD got; SimulateIOError(SQLITE_IOERR); if( !ReadFile(id->h, pBuf, amt, &got, 0) ){ got = 0; } return got==amt ? SQLITE_OK : SQLITE_IOERR; #endif } /* ** Write data from a buffer into a file. Return SQLITE_OK on success ** or some other error code on failure. */ int sqliteOsWrite(OsFile *id, const void *pBuf, int amt){ #if OS_UNIX int wrote; SimulateIOError(SQLITE_IOERR); wrote = write(id->fd, pBuf, amt); if( wroteh, pBuf, amt, &wrote, 0) || (int)wrotefd, offset, SEEK_SET); return SQLITE_OK; #endif #if OS_WIN SetFilePointer(id->h, offset, 0, FILE_BEGIN); return SQLITE_OK; #endif } /* ** Make sure all writes to a particular file are committed to disk. */ int sqliteOsSync(OsFile *id){ SimulateIOError(SQLITE_IOERR); #if OS_UNIX return fsync(id->fd)==0 ? SQLITE_OK : SQLITE_IOERR; #endif #if OS_WIN return FlushFileBuffers(id->h) ? SQLITE_OK : SQLITE_IOERR; #endif } /* ** Truncate an open file to a specified size */ int sqliteOsTruncate(OsFile *id, int nByte){ SimulateIOError(SQLITE_IOERR); #if OS_UNIX return ftruncate(id->fd, nByte)==0 ? SQLITE_OK : SQLITE_IOERR; #endif #if OS_WIN SetFilePointer(id->h, nByte, 0, FILE_BEGIN); SetEndOfFile(id->h); return SQLITE_OK; #endif } /* ** Determine the current size of a file in bytes */ int sqliteOsFileSize(OsFile *id, int *pSize){ #if OS_UNIX struct stat buf; SimulateIOError(SQLITE_IOERR); if( fstat(id->fd, &buf)!=0 ){ return SQLITE_IOERR; } *pSize = buf.st_size; return SQLITE_OK; #endif #if OS_WIN SimulateIOError(SQLITE_IOERR); *pSize = GetFileSize(id->h, 0); return SQLITE_OK; #endif } /* ** Change the status of the lock on the file "id" to be a readlock. ** If the file was write locked, then this reduces the lock to a read. ** If the file was read locked, then this acquires a new read lock. ** ** Return SQLITE_OK on success and SQLITE_BUSY on failure. */ int sqliteOsReadLock(OsFile *id){ #if OS_UNIX int rc; sqliteOsEnterMutex(); if( id->pLock->cnt>0 ){ if( !id->locked ){ id->pLock->cnt++; id->locked = 1; } rc = SQLITE_OK; }else if( id->locked || id->pLock->cnt==0 ){ struct flock lock; lock.l_type = F_RDLCK; lock.l_whence = SEEK_SET; lock.l_start = lock.l_len = 0L; if( fcntl(id->fd, F_SETLK, &lock)!=0 ){ rc = SQLITE_BUSY; }else{ rc = SQLITE_OK; id->pLock->cnt = 1; id->locked = 1; } }else{ rc = SQLITE_BUSY; } sqliteOsLeaveMutex(); return rc; #endif #if OS_WIN int rc; if( id->locked ){ rc = SQLITE_OK; }else if( LockFile(id->h, 0, 0, 1024, 0) ){ rc = SQLITE_OK; id->locked = 1; }else{ rc = SQLITE_BUSY; } return rc; #endif } /* ** Change the lock status to be an exclusive or write lock. Return ** SQLITE_OK on success and SQLITE_BUSY on a failure. */ int sqliteOsWriteLock(OsFile *id){ #if OS_UNIX int rc; sqliteOsEnterMutex(); if( id->pLock->cnt==0 || (id->pLock->cnt==1 && id->locked==1) ){ struct flock lock; lock.l_type = F_WRLCK; lock.l_whence = SEEK_SET; lock.l_start = lock.l_len = 0L; if( fcntl(id->fd, F_SETLK, &lock)!=0 ){ rc = SQLITE_BUSY; }else{ rc = SQLITE_OK; id->pLock->cnt = -1; id->locked = 1; } }else{ rc = SQLITE_BUSY; } sqliteOsLeaveMutex(); return rc; #endif #if OS_WIN int rc; if( id->locked ){ rc = SQLITE_OK; }else if( LockFile(id->h, 0, 0, 1024, 0) ){ rc = SQLITE_OK; id->locked = 1; }else{ rc = SQLITE_BUSY; } return rc; #endif } /* ** Unlock the given file descriptor. If the file descriptor was ** not previously locked, then this routine is a no-op. */ int sqliteOsUnlock(OsFile *id){ #if OS_UNIX int rc; if( !id->locked ) return SQLITE_OK; sqliteOsEnterMutex(); assert( id->pLock->cnt!=0 ); if( id->pLock->cnt>1 ){ id->pLock->cnt--; rc = SQLITE_OK; }else{ struct flock lock; lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = lock.l_len = 0L; if( fcntl(id->fd, F_SETLK, &lock)!=0 ){ rc = SQLITE_BUSY; }else{ rc = SQLITE_OK; id->pLock->cnt = 0; } } sqliteOsLeaveMutex(); id->locked = 0; return rc; #endif #if OS_WIN int rc; if( !id->locked ){ rc = SQLITE_OK; }else if( UnlockFile(id->h, 0, 0, 1024, 0) ){ rc = SQLITE_OK; id->locked = 0; }else{ rc = SQLITE_BUSY; } return rc; #endif } /* ** Get information to seed the random number generator. */ int sqliteOsRandomSeed(char *zBuf){ static int once = 1; #if OS_UNIX int pid; time((time_t*)zBuf); pid = getpid(); memcpy(&zBuf[sizeof(time_t)], &pid, sizeof(pid)); #endif #if OS_WIN GetSystemTime((LPSYSTEMTIME)zBuf); #endif if( once ){ int seed; memcpy(&seed, zBuf, sizeof(seed)); srand(seed); once = 0; } return SQLITE_OK; } /* ** Sleep for a little while. Return the amount of time slept. */ int sqliteOsSleep(int ms){ #if OS_UNIX #if defined(HAVE_USLEEP) && HAVE_USLEEP usleep(ms*1000); return ms; #else sleep((ms+999)/1000); return 1000*((ms+999)/1000); #endif #endif #if OS_WIN Sleep(ms); return ms; #endif } /* ** Macros used to determine whether or not to use threads. The ** SQLITE_UNIX_THREADS macro is defined if we are synchronizing for ** Posix threads and SQLITE_W32_THREADS is defined if we are ** synchronizing using Win32 threads. */ #if OS_UNIX && defined(THREADSAFE) && THREADSAFE # include # define SQLITE_UNIX_THREADS 1 #endif #if OS_WIN && defined(THREADSAFE) && THREADSAFE # define SQLITE_W32_THREADS 1 #endif /* ** Static variables used for thread synchronization */ static int inMutex = 0; #ifdef SQLITE_UNIX_THREADS static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; #endif #ifdef SQLITE_W32_THREADS static CRITICAL_SECTION cs; #endif /* ** The following pair of routine implement mutual exclusion for ** multi-threaded processes. Only a single thread is allowed to ** executed code that is surrounded by EnterMutex() and LeaveMutex(). ** ** SQLite uses only a single Mutex. There is not much critical ** code and what little there is executes quickly and without blocking. */ void sqliteOsEnterMutex(){ #ifdef SQLITE_UNIX_THREADS pthread_mutex_lock(&mutex); #endif #ifdef SQLITE_W32_THREADS static int isInit = 0; while( !isInit ){ static long lock = 0; if( InterlockedIncrement(&lock)==1 ){ InitializeCriticalSection(&cs); isInit = 1; }else{ Sleep(1); } } EnterCriticalSection(&cs); #endif assert( !inMutex ); inMutex = 1; } void sqliteOsLeaveMutex(){ assert( inMutex ); inMutex = 0; #ifdef SQLITE_UNIX_THREADS pthread_mutex_unlock(&mutex); #endif #ifdef SQLITE_W32_THREADS LeaveCriticalSection(&cs); #endif }