/ Check-in [ebc10e46]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improve the performance and reduce the size of the sqlite3VdbeSerialGet() routine by avoiding the use of stack.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: ebc10e46c15017d7cd232b5f4f3ef67ef740d87f
User & Date: drh 2014-08-22 15:19:59
Context
2014-08-22
15:40
Performance improvement in the printf() logic by avoiding unnecessary stack pointer movement. check-in: f7f2160d user: drh tags: trunk
15:19
Improve the performance and reduce the size of the sqlite3VdbeSerialGet() routine by avoiding the use of stack. check-in: ebc10e46 user: drh tags: trunk
14:56
Handle the 4-byte integer case in the stackless routine. Closed-Leaf check-in: 3f55484e user: drh tags: experimental
13:22
Change a while-loop into a do-loop in sqlite3VdbeSerialPut() for a small size reduction and performance improvement. check-in: 750bb0a0 user: drh tags: trunk
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/sqliteInt.h.

   149    149   # define SQLITE_INT_TO_PTR(X)  ((void*)(intptr_t)(X))
   150    150   # define SQLITE_PTR_TO_INT(X)  ((int)(intptr_t)(X))
   151    151   #else                          /* Generates a warning - but it always works */
   152    152   # define SQLITE_INT_TO_PTR(X)  ((void*)(X))
   153    153   # define SQLITE_PTR_TO_INT(X)  ((int)(X))
   154    154   #endif
   155    155   
          156  +/*
          157  +** A macro to hint to the compiler that a function should not be
          158  +** inlined.
          159  +*/
          160  +#if defined(__GNUC__)
          161  +#  define SQLITE_NOINLINE  __attribute__((noinline))
          162  +#elif defined(_MSC_VER)
          163  +#  define SQLITE_NOINLINE  __declspec(noinline)
          164  +#else
          165  +#  define SQLITE_NOINLINE
          166  +#endif
          167  +
   156    168   /*
   157    169   ** The SQLITE_THREADSAFE macro must be defined as 0, 1, or 2.
   158    170   ** 0 means mutexes are permanently disable and the library is never
   159    171   ** threadsafe.  1 means the library is serialized which is the highest
   160    172   ** level of threadsafety.  2 means the library is multithreaded - multiple
   161    173   ** threads can use SQLite as long as no two threads try to use the same
   162    174   ** database connection at the same time.

Changes to src/vdbeaux.c.

  2957   2957   /* Input "x" is a sequence of unsigned characters that represent a
  2958   2958   ** big-endian integer.  Return the equivalent native integer
  2959   2959   */
  2960   2960   #define ONE_BYTE_INT(x)    ((i8)(x)[0])
  2961   2961   #define TWO_BYTE_INT(x)    (256*(i8)((x)[0])|(x)[1])
  2962   2962   #define THREE_BYTE_INT(x)  (65536*(i8)((x)[0])|((x)[1]<<8)|(x)[2])
  2963   2963   #define FOUR_BYTE_UINT(x)  (((u32)(x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
         2964  +#define FOUR_BYTE_INT(x) (16777216*(i8)((x)[0])|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
  2964   2965   
  2965   2966   /*
  2966   2967   ** Deserialize the data blob pointed to by buf as serial type serial_type
  2967   2968   ** and store the result in pMem.  Return the number of bytes read.
         2969  +**
         2970  +** This function is implemented as two separate routines for performance.
         2971  +** The few cases that require local variables are broken out into a separate
         2972  +** routine so that in most cases the overhead of moving the stack pointer
         2973  +** is avoided.
  2968   2974   */ 
         2975  +static u32 SQLITE_NOINLINE serialGet(
         2976  +  const unsigned char *buf,     /* Buffer to deserialize from */
         2977  +  u32 serial_type,              /* Serial type to deserialize */
         2978  +  Mem *pMem                     /* Memory cell to write value into */
         2979  +){
         2980  +  u64 x = FOUR_BYTE_UINT(buf);
         2981  +  u32 y = FOUR_BYTE_UINT(buf+4);
         2982  +  x = (x<<32) + y;
         2983  +  if( serial_type==6 ){
         2984  +    pMem->u.i = *(i64*)&x;
         2985  +    pMem->flags = MEM_Int;
         2986  +    testcase( pMem->u.i<0 );
         2987  +  }else{
         2988  +#if !defined(NDEBUG) && !defined(SQLITE_OMIT_FLOATING_POINT)
         2989  +    /* Verify that integers and floating point values use the same
         2990  +    ** byte order.  Or, that if SQLITE_MIXED_ENDIAN_64BIT_FLOAT is
         2991  +    ** defined that 64-bit floating point values really are mixed
         2992  +    ** endian.
         2993  +    */
         2994  +    static const u64 t1 = ((u64)0x3ff00000)<<32;
         2995  +    static const double r1 = 1.0;
         2996  +    u64 t2 = t1;
         2997  +    swapMixedEndianFloat(t2);
         2998  +    assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 );
         2999  +#endif
         3000  +    assert( sizeof(x)==8 && sizeof(pMem->r)==8 );
         3001  +    swapMixedEndianFloat(x);
         3002  +    memcpy(&pMem->r, &x, sizeof(x));
         3003  +    pMem->flags = sqlite3IsNaN(pMem->r) ? MEM_Null : MEM_Real;
         3004  +  }
         3005  +  return 8;
         3006  +}
  2969   3007   u32 sqlite3VdbeSerialGet(
  2970   3008     const unsigned char *buf,     /* Buffer to deserialize from */
  2971   3009     u32 serial_type,              /* Serial type to deserialize */
  2972   3010     Mem *pMem                     /* Memory cell to write value into */
  2973   3011   ){
  2974         -  u64 x;
  2975         -  u32 y;
  2976   3012     switch( serial_type ){
  2977   3013       case 10:   /* Reserved for future use */
  2978   3014       case 11:   /* Reserved for future use */
  2979   3015       case 0: {  /* NULL */
  2980   3016         pMem->flags = MEM_Null;
  2981   3017         break;
  2982   3018       }
................................................................................
  2995   3031       case 3: { /* 3-byte signed integer */
  2996   3032         pMem->u.i = THREE_BYTE_INT(buf);
  2997   3033         pMem->flags = MEM_Int;
  2998   3034         testcase( pMem->u.i<0 );
  2999   3035         return 3;
  3000   3036       }
  3001   3037       case 4: { /* 4-byte signed integer */
  3002         -      y = FOUR_BYTE_UINT(buf);
  3003         -      pMem->u.i = (i64)*(int*)&y;
         3038  +      pMem->u.i = FOUR_BYTE_INT(buf);
  3004   3039         pMem->flags = MEM_Int;
  3005   3040         testcase( pMem->u.i<0 );
  3006   3041         return 4;
  3007   3042       }
  3008   3043       case 5: { /* 6-byte signed integer */
  3009   3044         pMem->u.i = FOUR_BYTE_UINT(buf+2) + (((i64)1)<<32)*TWO_BYTE_INT(buf);
  3010   3045         pMem->flags = MEM_Int;
  3011   3046         testcase( pMem->u.i<0 );
  3012   3047         return 6;
  3013   3048       }
  3014   3049       case 6:   /* 8-byte signed integer */
  3015   3050       case 7: { /* IEEE floating point */
  3016         -#if !defined(NDEBUG) && !defined(SQLITE_OMIT_FLOATING_POINT)
  3017         -      /* Verify that integers and floating point values use the same
  3018         -      ** byte order.  Or, that if SQLITE_MIXED_ENDIAN_64BIT_FLOAT is
  3019         -      ** defined that 64-bit floating point values really are mixed
  3020         -      ** endian.
  3021         -      */
  3022         -      static const u64 t1 = ((u64)0x3ff00000)<<32;
  3023         -      static const double r1 = 1.0;
  3024         -      u64 t2 = t1;
  3025         -      swapMixedEndianFloat(t2);
  3026         -      assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 );
  3027         -#endif
  3028         -      x = FOUR_BYTE_UINT(buf);
  3029         -      y = FOUR_BYTE_UINT(buf+4);
  3030         -      x = (x<<32) | y;
  3031         -      if( serial_type==6 ){
  3032         -        pMem->u.i = *(i64*)&x;
  3033         -        pMem->flags = MEM_Int;
  3034         -        testcase( pMem->u.i<0 );
  3035         -      }else{
  3036         -        assert( sizeof(x)==8 && sizeof(pMem->r)==8 );
  3037         -        swapMixedEndianFloat(x);
  3038         -        memcpy(&pMem->r, &x, sizeof(x));
  3039         -        pMem->flags = sqlite3IsNaN(pMem->r) ? MEM_Null : MEM_Real;
  3040         -      }
  3041         -      return 8;
         3051  +      /* These use local variables, so do them in a separate routine
         3052  +      ** to avoid having to move the frame pointer in the common case */
         3053  +      return serialGet(buf,serial_type,pMem);
  3042   3054       }
  3043   3055       case 8:    /* Integer 0 */
  3044   3056       case 9: {  /* Integer 1 */
  3045   3057         pMem->u.i = serial_type-8;
  3046   3058         pMem->flags = MEM_Int;
  3047   3059         return 0;
  3048   3060       }
  3049   3061       default: {
  3050   3062         static const u16 aFlag[] = { MEM_Blob|MEM_Ephem, MEM_Str|MEM_Ephem };
  3051         -      u32 len = (serial_type-12)/2;
  3052   3063         pMem->z = (char *)buf;
  3053         -      pMem->n = len;
         3064  +      pMem->n = (serial_type-12)/2;
  3054   3065         pMem->xDel = 0;
  3055   3066         pMem->flags = aFlag[serial_type&1];
  3056         -      return len;
         3067  +      return pMem->n;
  3057   3068       }
  3058   3069     }
  3059   3070     return 0;
  3060   3071   }
  3061         -
  3062   3072   /*
  3063   3073   ** This routine is used to allocate sufficient space for an UnpackedRecord
  3064   3074   ** structure large enough to be used with sqlite3VdbeRecordUnpack() if
  3065   3075   ** the first argument is a pointer to KeyInfo structure pKeyInfo.
  3066   3076   **
  3067   3077   ** The space is either allocated using sqlite3DbMallocRaw() or from within
  3068   3078   ** the unaligned buffer passed via the second and third arguments (presumably