SQLite

Check-in [b9d29ea385]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Retry selected system calls on unix when they fail with EINTR.
Downloads: Tarball | ZIP archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: b9d29ea385bafcf87c7dd07822ce9ec3d3892bd1
User & Date: drh 2011-02-23 13:33:46.402
References
2011-02-23
14:33
Automatically retry system calls that fail with EINTR. This is a backport of the changes from [b9d29ea385bafc] and [af9ba2a6d2c379]. (Leaf check-in: 8609a15dfa user: drh tags: branch-3.7.4)
Context
2011-02-23
14:00
Fix a typo in the robust_flock() macro for systems without EINTR. (check-in: af9ba2a6d2 user: drh tags: trunk)
13:53
The robust_flock() fix that accidently included some unrelated, though harmless changes. I should follow my own checklist! (Closed-Leaf check-in: e701efbd1d user: drh tags: mistake)
13:33
Retry selected system calls on unix when they fail with EINTR. (check-in: b9d29ea385 user: drh tags: trunk)
2011-02-22
03:34
When a stale schema-cookie is seen, expire only the one statement that encountered the bad cookie, not every statement on the database connection. Ticket [b72787b1a7cea1f] (check-in: 1bca0a7e19 user: drh tags: trunk)
Changes
Side-by-Side Diff Ignore Whitespace Patch
Changes to src/os_unix.c.
392
393
394
395
396
397
398













399
400
401
402
403
404
405
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418







+
+
+
+
+
+
+
+
+
+
+
+
+







  errno = savedErrno;
  return s;
}
#define fcntl lockTrace
#endif /* SQLITE_LOCK_TRACE */


/*
** Retry ftruncate() calls that fail due to EINTR
*/
#ifdef EINTR
static int robust_ftruncate(int h, sqlite3_int64 sz){
  int rc;
  do{ rc = ftruncate(h,sz); }while( rc<0 && errno==EINTR );
  return rc;
}
#else
# define robust_ftruncate(a,b) ftruncate(a,b)
#endif 


/*
** This routine translates a standard POSIX errno code into something
** useful to the clients of the sqlite3 functions.  Specifically, it is
** intended to translate a variety of "try again" errors into SQLITE_BUSY
** and a variety of "please close the file descriptor NOW" errors into 
** SQLITE_IOERR
910
911
912
913
914
915
916
917

918
919
920
921
922
923
924
923
924
925
926
927
928
929

930
931
932
933
934
935
936
937







-
+







  ** prior to accessing the inode number.  The one byte written is
  ** an ASCII 'S' character which also happens to be the first byte
  ** in the header of every SQLite database.  In this way, if there
  ** is a race condition such that another thread has already populated
  ** the first page of the database, no damage is done.
  */
  if( statbuf.st_size==0 && (pFile->fsFlags & SQLITE_FSFLAGS_IS_MSDOS)!=0 ){
    rc = write(fd, "S", 1);
    do{ rc = write(fd, "S", 1); }while( rc<0 && errno==EINTR );
    if( rc!=1 ){
      pFile->lastErrno = errno;
      return SQLITE_IOERR;
    }
    rc = fstat(fd, &statbuf);
    if( rc!=0 ){
      pFile->lastErrno = errno;
1786
1787
1788
1789
1790
1791
1792














1793
1794
1795
1796
1797
1798
1799
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826







+
+
+
+
+
+
+
+
+
+
+
+
+
+







** only a single process can be reading the database at a time.
**
** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if
** compiling for VXWORKS.
*/
#if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS

/*
** Retry flock() calls that fail with EINTR
*/
#ifdef EINTR
static int robust_flock(int fd, int op){
  int rc;
  do{ rc = flock(fd,op); }while( rc<0 && errno==EINTR );
  return rc;
}
#else
# define robust_flock(a,b) fclose(a,b)
#endif
     

/*
** This routine checks if there is a RESERVED lock held on the specified
** file by this or any other process. If such a lock is held, set *pResOut
** to a non-zero value otherwise *pResOut is set to zero.  The return value
** is set to SQLITE_OK unless an I/O error occurs during lock checking.
*/
static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){
1809
1810
1811
1812
1813
1814
1815
1816

1817
1818
1819

1820
1821
1822
1823
1824
1825
1826
1836
1837
1838
1839
1840
1841
1842

1843
1844
1845

1846
1847
1848
1849
1850
1851
1852
1853







-
+


-
+







  if( pFile->eFileLock>SHARED_LOCK ){
    reserved = 1;
  }
  
  /* Otherwise see if some other process holds it. */
  if( !reserved ){
    /* attempt to get the lock */
    int lrc = flock(pFile->h, LOCK_EX | LOCK_NB);
    int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB);
    if( !lrc ){
      /* got the lock, unlock it */
      lrc = flock(pFile->h, LOCK_UN);
      lrc = robust_flock(pFile->h, LOCK_UN);
      if ( lrc ) {
        int tErrno = errno;
        /* unlock failed with an error */
        lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 
        if( IS_LOCK_ERROR(lrc) ){
          pFile->lastErrno = tErrno;
          rc = lrc;
1889
1890
1891
1892
1893
1894
1895
1896

1897
1898
1899
1900
1901
1902
1903
1916
1917
1918
1919
1920
1921
1922

1923
1924
1925
1926
1927
1928
1929
1930







-
+







  if (pFile->eFileLock > NO_LOCK) {
    pFile->eFileLock = eFileLock;
    return SQLITE_OK;
  }
  
  /* grab an exclusive lock */
  
  if (flock(pFile->h, LOCK_EX | LOCK_NB)) {
  if (robust_flock(pFile->h, LOCK_EX | LOCK_NB)) {
    int tErrno = errno;
    /* didn't get, must be busy */
    rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
    if( IS_LOCK_ERROR(rc) ){
      pFile->lastErrno = tErrno;
    }
  } else {
1938
1939
1940
1941
1942
1943
1944
1945

1946
1947
1948
1949
1950
1951
1952
1965
1966
1967
1968
1969
1970
1971

1972
1973
1974
1975
1976
1977
1978
1979







-
+







  /* shared can just be set because we always have an exclusive */
  if (eFileLock==SHARED_LOCK) {
    pFile->eFileLock = eFileLock;
    return SQLITE_OK;
  }
  
  /* no, really, unlock. */
  int rc = flock(pFile->h, LOCK_UN);
  int rc = robust_flock(pFile->h, LOCK_UN);
  if (rc) {
    int r, tErrno = errno;
    r = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
    if( IS_LOCK_ERROR(r) ){
      pFile->lastErrno = tErrno;
    }
#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS
2675
2676
2677
2678
2679
2680
2681
2682

2683
2684
2685

2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698

2699
2700
2701
2702
2703
2704
2705
2702
2703
2704
2705
2706
2707
2708

2709
2710
2711

2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724

2725
2726
2727
2728
2729
2730
2731
2732







-
+


-
+












-
+







static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
  int got;
#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
  i64 newOffset;
#endif
  TIMER_START;
#if defined(USE_PREAD)
  got = pread(id->h, pBuf, cnt, offset);
  do{ got = pread(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
  SimulateIOError( got = -1 );
#elif defined(USE_PREAD64)
  got = pread64(id->h, pBuf, cnt, offset);
  do{ got = pread64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
  SimulateIOError( got = -1 );
#else
  newOffset = lseek(id->h, offset, SEEK_SET);
  SimulateIOError( newOffset-- );
  if( newOffset!=offset ){
    if( newOffset == -1 ){
      ((unixFile*)id)->lastErrno = errno;
    }else{
      ((unixFile*)id)->lastErrno = 0;			
    }
    return -1;
  }
  got = read(id->h, pBuf, cnt);
  do{ got = read(id->h, pBuf, cnt); }while( got<0 && errno==EINTR );
#endif
  TIMER_END;
  if( got<0 ){
    ((unixFile*)id)->lastErrno = errno;
  }
  OSTRACE(("READ    %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED));
  return got;
2753
2754
2755
2756
2757
2758
2759
2760

2761
2762

2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773

2774
2775
2776
2777
2778
2779
2780
2780
2781
2782
2783
2784
2785
2786

2787
2788

2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799

2800
2801
2802
2803
2804
2805
2806
2807







-
+

-
+










-
+







static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
  int got;
#if (!defined(USE_PREAD) && !defined(USE_PREAD64))
  i64 newOffset;
#endif
  TIMER_START;
#if defined(USE_PREAD)
  got = pwrite(id->h, pBuf, cnt, offset);
  do{ got = pwrite(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
#elif defined(USE_PREAD64)
  got = pwrite64(id->h, pBuf, cnt, offset);
  do{ got = pwrite64(id->h, pBuf, cnt, offset); }while( got<0 && errno==EINTR );
#else
  newOffset = lseek(id->h, offset, SEEK_SET);
  if( newOffset!=offset ){
    if( newOffset == -1 ){
      ((unixFile*)id)->lastErrno = errno;
    }else{
      ((unixFile*)id)->lastErrno = 0;			
    }
    return -1;
  }
  got = write(id->h, pBuf, cnt);
  do{ got = write(id->h, pBuf, cnt); }while( got<0 && errno==EINTR );
#endif
  TIMER_END;
  if( got<0 ){
    ((unixFile*)id)->lastErrno = errno;
  }

  OSTRACE(("WRITE   %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED));
3057
3058
3059
3060
3061
3062
3063
3064

3065
3066
3067
3068
3069
3070
3071
3084
3085
3086
3087
3088
3089
3090

3091
3092
3093
3094
3095
3096
3097
3098







-
+







  ** actual file size after the operation may be larger than the requested
  ** size).
  */
  if( pFile->szChunk ){
    nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
  }

  rc = ftruncate(pFile->h, (off_t)nByte);
  rc = robust_ftruncate(pFile->h, (off_t)nByte);
  if( rc ){
    pFile->lastErrno = errno;
    return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
  }else{
#ifndef NDEBUG
    /* If we are doing a normal write to a database file (as opposed to
    ** doing a hot-journal rollback or a write to some file other than a
3132
3133
3134
3135
3136
3137
3138


3139
3140



3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153

3154
3155
3156
3157
3158
3159
3160
3159
3160
3161
3162
3163
3164
3165
3166
3167


3168
3169
3170

3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181

3182
3183
3184
3185
3186
3187
3188
3189







+
+
-
-
+
+
+
-











-
+







    struct stat buf;              /* Used to hold return values of fstat() */
   
    if( fstat(pFile->h, &buf) ) return SQLITE_IOERR_FSTAT;

    nSize = ((nByte+pFile->szChunk-1) / pFile->szChunk) * pFile->szChunk;
    if( nSize>(i64)buf.st_size ){
#if defined(HAVE_POSIX_FALLOCATE) && HAVE_POSIX_FALLOCATE
      int rc;
      do{
      if( posix_fallocate(pFile->h, buf.st_size, nSize-buf.st_size) ){
        return SQLITE_IOERR_WRITE;
        rc = posix_fallocate(pFile-.h, buf.st_size, nSize-buf.st_size;
      }while( rc<0 && errno=EINTR );
      if( rc ) return SQLITE_IOERR_WRITE;
      }
#else
      /* If the OS does not have posix_fallocate(), fake it. First use
      ** ftruncate() to set the file size, then write a single byte to
      ** the last byte in each block within the extended region. This
      ** is the same technique used by glibc to implement posix_fallocate()
      ** on systems that do not have a real fallocate() system call.
      */
      int nBlk = buf.st_blksize;  /* File-system block size */
      i64 iWrite;                 /* Next offset to write to */
      int nWrite;                 /* Return value from seekAndWrite() */

      if( ftruncate(pFile->h, nSize) ){
      if( robust_ftruncate(pFile->h, nSize) ){
        pFile->lastErrno = errno;
        return unixLogError(SQLITE_IOERR_TRUNCATE, "ftruncate", pFile->zPath);
      }
      iWrite = ((buf.st_size + 2*nBlk - 1)/nBlk)*nBlk-1;
      do {
        nWrite = seekAndWrite(pFile, iWrite, "", 1);
        iWrite += nBlk;
3506
3507
3508
3509
3510
3511
3512
3513

3514
3515
3516
3517
3518
3519
3520
3535
3536
3537
3538
3539
3540
3541

3542
3543
3544
3545
3546
3547
3548
3549







-
+







    }

    /* Check to see if another process is holding the dead-man switch.
    ** If not, truncate the file to zero length. 
    */
    rc = SQLITE_OK;
    if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){
      if( ftruncate(pShmNode->h, 0) ){
      if( robust_ftruncate(pShmNode->h, 0) ){
        rc = unixLogError(SQLITE_IOERR_SHMOPEN, "ftruncate", zShmFilename);
      }
    }
    if( rc==SQLITE_OK ){
      rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1);
    }
    if( rc ) goto shm_open_err;
3612
3613
3614
3615
3616
3617
3618
3619

3620
3621
3622
3623
3624
3625
3626
3641
3642
3643
3644
3645
3646
3647

3648
3649
3650
3651
3652
3653
3654
3655







-
+







      /* The requested memory region does not exist. If bExtend is set to
      ** false, exit early. *pp will be set to NULL and SQLITE_OK returned.
      **
      ** Alternatively, if bExtend is true, use ftruncate() to allocate
      ** the requested memory region.
      */
      if( !bExtend ) goto shmpage_out;
      if( ftruncate(pShmNode->h, nByte) ){
      if( robust_ftruncate(pShmNode->h, nByte) ){
        rc = unixLogError(SQLITE_IOERR_SHMSIZE,"ftruncate",pShmNode->zFilename);
        goto shmpage_out;
      }
    }

    /* Map the requested memory region into this processes address space. */
    apNew = (char **)sqlite3_realloc(
5009
5010
5011
5012
5013
5014
5015
5016

5017
5018
5019
5020
5021
5022
5023
5038
5039
5040
5041
5042
5043
5044

5045
5046
5047
5048
5049
5050
5051
5052







-
+







      time(&t);
      memcpy(zBuf, &t, sizeof(t));
      pid = getpid();
      memcpy(&zBuf[sizeof(t)], &pid, sizeof(pid));
      assert( sizeof(t)+sizeof(pid)<=(size_t)nBuf );
      nBuf = sizeof(t) + sizeof(pid);
    }else{
      nBuf = read(fd, zBuf, nBuf);
      do{ nBuf = read(fd, zBuf, nBuf); }while( nBuf<0 && errno==EINTR );
      close(fd);
    }
  }
#endif
  return nBuf;
}

5770
5771
5772
5773
5774
5775
5776
5777

5778
5779
5780
5781
5782
5783
5784

5785
5786
5787
5788
5789
5790
5791
5792

5793



5794
5795
5796
5797
5798
5799
5800
5799
5800
5801
5802
5803
5804
5805

5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821
5822
5823

5824
5825
5826
5827
5828
5829
5830
5831
5832
5833







-
+







+








+
-
+
+
+







        memcpy(&writeBuffer[PROXY_HEADERLEN], myHostID, PROXY_HOSTIDLEN);
        if( pCtx->lockProxyPath!=NULL ){
          strlcpy(&writeBuffer[PROXY_PATHINDEX], pCtx->lockProxyPath, MAXPATHLEN);
        }else{
          strlcpy(&writeBuffer[PROXY_PATHINDEX], tempLockPath, MAXPATHLEN);
        }
        writeSize = PROXY_PATHINDEX + strlen(&writeBuffer[PROXY_PATHINDEX]);
        ftruncate(conchFile->h, writeSize);
        robust_ftruncate(conchFile->h, writeSize);
        rc = unixWrite((sqlite3_file *)conchFile, writeBuffer, writeSize, 0);
        fsync(conchFile->h);
        /* If we created a new conch file (not just updated the contents of a 
         ** valid conch file), try to match the permissions of the database 
         */
        if( rc==SQLITE_OK && createConch ){
          struct stat buf;
          int rc;
          int err = fstat(pFile->h, &buf);
          if( err==0 ){
            mode_t cmode = buf.st_mode&(S_IRUSR|S_IWUSR | S_IRGRP|S_IWGRP |
                                        S_IROTH|S_IWOTH);
            /* try to match the database file R/W permissions, ignore failure */
#ifndef SQLITE_PROXY_DEBUG
            fchmod(conchFile->h, cmode);
#else
            do{
            if( fchmod(conchFile->h, cmode)!=0 ){
              rc = fchmod(conchFile->h, cmode);
            }while( rc==(-1) && errno==EINTR );
            if( rc!=0 ){
              int code = errno;
              fprintf(stderr, "fchmod %o FAILED with %d %s\n",
                      cmode, code, strerror(code));
            } else {
              fprintf(stderr, "fchmod %o SUCCEDED\n",cmode);
            }
          }else{