Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Add the sqlite4_translate() API, for translating between utf-8 and utf-16 text encodings. Also the sqlite4_buffer object. |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
f56bc22c9ee8e63fd7f1e7a953507d64 |
User & Date: | dan 2013-06-10 19:52:02.933 |
Context
2013-06-11
| ||
15:18 | Add tests and minor fixes for the sqlite4_translate() API. check-in: 211c1baef7 user: dan tags: trunk | |
2013-06-10
| ||
19:52 | Add the sqlite4_translate() API, for translating between utf-8 and utf-16 text encodings. Also the sqlite4_buffer object. check-in: f56bc22c9e user: dan tags: trunk | |
2013-06-07
| ||
19:29 | Allow collation sequence comparison functions to return errors. check-in: 596c1f3869 user: dan tags: trunk | |
Changes
Changes to src/mem.c.
︙ | ︙ | |||
726 727 728 729 730 731 732 | pMM = 0; break; } } va_end(ap); return pMM; } | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 | pMM = 0; break; } } va_end(ap); return pMM; } /************************************************************************* ** sqlite4_buffer implementation. */ void sqlite4_buffer_init(sqlite4_buffer *pBuf, sqlite4_mm *pMM){ memset(pBuf, 0, sizeof(*pBuf)); pBuf->pMM = pMM; } int sqlite4_buffer_resize(sqlite4_buffer *pBuf, sqlite4_size_t nReq){ sqlite4_size_t nCurrent; /* Current buffer size */ nCurrent = sqlite4_mm_msize(pBuf->pMM, pBuf->p); if( nCurrent<nReq ){ void *pNew = sqlite4_mm_realloc(pBuf->pMM, pBuf->p, nReq); if( pNew==0 ) return SQLITE4_NOMEM; pBuf->p = pNew; } pBuf->n = nReq; return SQLITE4_OK; } int sqlite4_buffer_append( sqlite4_buffer *pBuf, const void *p, sqlite4_size_t n ){ int rc; /* Return code */ sqlite4_size_t nOrig = pBuf->n; /* Initial buffer size in bytes */ rc = sqlite4_buffer_resize(pBuf, nOrig+n); if( rc==SQLITE4_OK ){ memcpy(&((u8 *)pBuf->p)[nOrig], p, n); } return rc; } void sqlite4_buffer_clear(sqlite4_buffer *pBuf){ sqlite4_mm_free(pBuf->pMM, pBuf->p); sqlite4_buffer_init(pBuf, pBuf->pMM); } |
Changes to src/sqlite.h.in.
︙ | ︙ | |||
219 220 221 222 223 224 225 226 227 228 229 230 231 232 | ** Enable or disable benign failure mode. Benign failure mode can be ** nested. In benign failure mode, OOM errors do not necessarily propagate ** back out to the application but can be dealt with internally. Memory ** allocations that occur in benign failure mode are considered "optional". */ void sqlite4_mm_benign_failures(sqlite4_mm*, int bEnable); /* ** CAPIREF: Run-time Environment Object ** ** An instance of the following object defines the run-time environment ** for an SQLite4 database connection. This object defines the interface ** to appropriate mutex routines, memory allocation routines, a | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | ** Enable or disable benign failure mode. Benign failure mode can be ** nested. In benign failure mode, OOM errors do not necessarily propagate ** back out to the application but can be dealt with internally. Memory ** allocations that occur in benign failure mode are considered "optional". */ void sqlite4_mm_benign_failures(sqlite4_mm*, int bEnable); /* ** CAPIREF: Buffer Object */ typedef struct sqlite4_buffer sqlite4_buffer; struct sqlite4_buffer { sqlite4_mm *pMM; void *p; sqlite4_size_t n; }; void sqlite4_buffer_init(sqlite4_buffer *, sqlite4_mm *); void sqlite4_buffer_clear(sqlite4_buffer *); int sqlite4_buffer_resize(sqlite4_buffer *, sqlite4_size_t); int sqlite4_buffer_append(sqlite4_buffer *, const void *, sqlite4_size_t); /* ** CAPIREF: Translate Text Encodings ** ** This API function is used to translate between utf-8 and utf-16 text ** encodings. ** ** The translated output is stored in the sqlite4_buffer object indicated ** by the first argument. If no error occurs, the value returned is a copy ** of the sqlite4_buffer.p value (a pointer to the blob of memory containing ** the actual translated data). Or, if an out-of-memory error occurs, the ** buffer is zeroed and a NULL pointer returned. ** ** The input data is specified by the second and third arguments. The second ** is a pointer to the buffer containing the input text. If the third ** parameter is non-negative, it is the size of the input buffer in bytes. ** Otherwise, if the third parameter is a negative value, the input buffer ** is read up until the first nul-terminator character (codepoint 0). ** ** The required translation is specified by the fourth argument, which must ** be passed one of the SQLITE4_TRANSLATE_* constants. */ #define SQLITE4_TRANSLATE_UTF8_UTF16 1 /* utf-8 to native-endian utf-16 */ #define SQLITE4_TRANSLATE_UTF16_UTF8 2 /* native-endian utf-16 to utf-8 */ #define SQLITE4_TRANSLATE_UTF8_UTF16LE 3 /* utf-8 to little-endian utf-16 */ #define SQLITE4_TRANSLATE_UTF16LE_UTF8 4 /* native-endian utf-16 to utf-8 */ #define SQLITE4_TRANSLATE_UTF8_UTF16BE 5 /* utf-8 to big-endian utf-16 */ #define SQLITE4_TRANSLATE_UTF16BE_UTF8 6 /* native-endian utf-16 to utf-8 */ void *sqlite4_translate(sqlite4_buffer *, const void *p, int n, int eTrans); /* ** CAPIREF: Run-time Environment Object ** ** An instance of the following object defines the run-time environment ** for an SQLite4 database connection. This object defines the interface ** to appropriate mutex routines, memory allocation routines, a |
︙ | ︙ |
Changes to src/utf.c.
︙ | ︙ | |||
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | } /* ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is ** printed on stderr on the way into and out of sqlite4VdbeMemTranslate(). */ /* #define TRANSLATE_TRACE 1 */ #ifndef SQLITE4_OMIT_UTF16 /* ** This routine transforms the internal text encoding used by pMem to ** desiredEnc. It is an error if the string is already of the desired ** encoding, or if *pMem does not contain a string value. */ int sqlite4VdbeMemTranslate(Mem *pMem, u8 desiredEnc){ | > > > < < < < > | | < < < < < < < < < < < < < < < < < < < < < < | < < < < | < < | < < < | < | | | < < < < | < | | < < < < < | < < < | | > > > | < < | < < | < < < < | | > | > | < < | | | | | > | | | | 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 | } /* ** If the TRANSLATE_TRACE macro is defined, the value of each Mem is ** printed on stderr on the way into and out of sqlite4VdbeMemTranslate(). */ /* #define TRANSLATE_TRACE 1 */ #ifndef SQLITE4_OMIT_UTF16 /* ** This routine transforms the internal text encoding used by pMem to ** desiredEnc. It is an error if the string is already of the desired ** encoding, or if *pMem does not contain a string value. */ int sqlite4VdbeMemTranslate(Mem *pMem, u8 desiredEnc){ unsigned char *zIn; /* Input iterator */ unsigned char *zTerm; /* End of input */ assert( pMem->db==0 || sqlite4_mutex_held(pMem->db->mutex) ); assert( pMem->flags&MEM_Str ); assert( pMem->enc!=desiredEnc ); assert( pMem->enc!=0 ); assert( pMem->n>=0 ); #if defined(TRANSLATE_TRACE) && defined(SQLITE4_DEBUG) { char zBuf[100]; sqlite4VdbeMemPrettyPrint(pMem, zBuf); fprintf(stderr, "INPUT: %s\n", zBuf); } #endif if( pMem->enc!=SQLITE4_UTF8 && desiredEnc!=SQLITE4_UTF8 ){ /* If the translation is between UTF-16 little and big endian, then ** all that is required is to swap the byte order. */ u8 temp; int rc; rc = sqlite4VdbeMemMakeWriteable(pMem); if( rc!=SQLITE4_OK ){ assert( rc==SQLITE4_NOMEM ); return SQLITE4_NOMEM; } zIn = (u8*)pMem->z; zTerm = &zIn[pMem->n&~1]; while( zIn<zTerm ){ temp = *zIn; *zIn = *(zIn+1); zIn++; *zIn++ = temp; } pMem->enc = desiredEnc; }else{ int eTrans; sqlite4_buffer buf; sqlite4_mm *pMM = pMem->db->pEnv->pMM; switch( pMem->enc ){ case SQLITE4_UTF8: if( desiredEnc==SQLITE4_UTF16BE ){ eTrans = SQLITE4_TRANSLATE_UTF8_UTF16BE; }else{ eTrans = SQLITE4_TRANSLATE_UTF8_UTF16LE; } break; case SQLITE4_UTF16BE: eTrans = SQLITE4_TRANSLATE_UTF16BE_UTF8; break; default: assert( pMem->enc==SQLITE4_UTF16LE ); eTrans = SQLITE4_TRANSLATE_UTF16LE_UTF8; break; } sqlite4_buffer_init(&buf, pMM); if( 0==sqlite4_translate(&buf, pMem->z, pMem->n, eTrans) ){ return SQLITE4_NOMEM; } sqlite4VdbeMemRelease(pMem); pMem->flags &= ~(MEM_Static|MEM_Dyn|MEM_Ephem); pMem->enc = desiredEnc; pMem->flags |= (MEM_Term|MEM_Dyn); pMem->z = (char*)buf.p; pMem->n = (int)buf.n; pMem->zMalloc = pMem->z; } #if defined(TRANSLATE_TRACE) && defined(SQLITE4_DEBUG) { char zBuf[100]; sqlite4VdbeMemPrettyPrint(pMem, zBuf); fprintf(stderr, "OUTPUT: %s\n", zBuf); } #endif |
︙ | ︙ | |||
378 379 380 381 382 383 384 | */ int sqlite4VdbeMemHandleBom(Mem *pMem){ int rc = SQLITE4_OK; u8 bom = 0; assert( pMem->n>=0 ); if( pMem->n>1 ){ | | | | 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 | */ int sqlite4VdbeMemHandleBom(Mem *pMem){ int rc = SQLITE4_OK; u8 bom = 0; assert( pMem->n>=0 ); if( pMem->n>1 ){ u8 b1 = *(u8*)pMem->z; u8 b2 = *(((u8*)pMem->z) + 1); if( b1==0xFE && b2==0xFF ){ bom = SQLITE4_UTF16BE; } if( b1==0xFF && b2==0xFE ){ bom = SQLITE4_UTF16LE; } } |
︙ | ︙ | |||
530 531 532 533 534 535 536 537 538 539 540 541 542 543 | while( n<nChar ){ READ_UTF16LE(z, 1, c); n++; } } return (int)(z-(unsigned char const *)zIn); } #if defined(SQLITE4_TEST) /* ** This routine is called from the TCL test function "translate_selftest". ** It checks that the primitives for serializing and deserializing ** characters in each encoding are inverses of each other. */ | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 | while( n<nChar ){ READ_UTF16LE(z, 1, c); n++; } } return (int)(z-(unsigned char const *)zIn); } /* ** Helper function for sqlite4_translate(). */ static void translateEncoding( int eTrans, /* SQLITE4_TRANSLATE_XXX constant */ const void *pIn, int nIn, /* Input buffer */ void *pOut, /* Pointer to output buffer */ sqlite4_size_t *pnOut /* OUT: Bytes written to output buffer */ ){ u8 *zIn = (u8*)pIn; /* Input pointer */ u8 *zTerm = &zIn[nIn]; /* One byte past the end of the input */ u8 *z = (u8*)pOut; /* Output pointer */ int e16; /* utf-16 variant */ unsigned int c; if( eTrans==SQLITE4_TRANSLATE_UTF8_UTF16 || eTrans==SQLITE4_TRANSLATE_UTF16_UTF8 ){ e16 = SQLITE4_UTF16NATIVE; }else if( eTrans==SQLITE4_TRANSLATE_UTF8_UTF16BE || eTrans==SQLITE4_TRANSLATE_UTF16BE_UTF8 ){ e16 = SQLITE4_UTF16BE; }else{ e16 = SQLITE4_UTF16LE; } assert( (SQLITE4_TRANSLATE_UTF8_UTF16 & 0x01) && (SQLITE4_TRANSLATE_UTF8_UTF16LE & 0x01) && (SQLITE4_TRANSLATE_UTF8_UTF16BE & 0x01) ); if( eTrans & 0x01 ){ if( e16==SQLITE4_UTF16LE ){ /* UTF-8 -> UTF-16 Little-endian */ while( zIn<zTerm ){ /* c = sqlite4Utf8Read(zIn, zTerm, (const u8**)&zIn); */ READ_UTF8(zIn, zTerm, c); WRITE_UTF16LE(z, c); } }else{ assert( e16==SQLITE4_UTF16BE ); /* UTF-8 -> UTF-16 Big-endian */ while( zIn<zTerm ){ /* c = sqlite4Utf8Read(zIn, zTerm, (const u8**)&zIn); */ READ_UTF8(zIn, zTerm, c); WRITE_UTF16BE(z, c); } } }else{ if( e16==SQLITE4_UTF16LE ){ /* UTF-16 Little-endian -> UTF-8 */ while( zIn<zTerm ){ READ_UTF16LE(zIn, zIn<zTerm, c); WRITE_UTF8(z, c); } }else{ /* UTF-16 Big-endian -> UTF-8 */ while( zIn<zTerm ){ READ_UTF16BE(zIn, zIn<zTerm, c); WRITE_UTF8(z, c); } } } *pnOut = (sqlite4_size_t)(z - (u8*)pOut); if( eTrans & 0x01 ) *z++ = 0; *z = 0; } /* ** Translate the input buffer described by the second and third arguments ** according to the value of parameter eTrans (one of the SQLITE4_TRANSLATE_* ** constants). Write the results into buffer pBuf. */ void *sqlite4_translate( sqlite4_buffer *pBuf, /* Buffer to store output in */ const void *p, int n, /* Input buffer and its size in bytes */ int eTrans /* Requested translation */ ){ int nReq; /* Size of required buffer in bytes */ assert( (SQLITE4_TRANSLATE_UTF8_UTF16 & 0x01) && (SQLITE4_TRANSLATE_UTF8_UTF16LE & 0x01) && (SQLITE4_TRANSLATE_UTF8_UTF16BE & 0x01) ); if( eTrans & 0x01 ){ /* When converting from UTF-8 to UTF-16 the maximum growth is caused ** when a 1-byte UTF-8 character is translated into a 2-byte UTF-16 ** character. Two bytes are required in the output buffer for the ** nul-terminator. */ if( n<0 ){ u8 *z = (u8*)p; while( z[0] || z[1] ) z += 2; n = z - (u8*)p; } nReq = n * 2 + 1; }else{ /* When converting from UTF-16, the maximum growth results from ** translating a 2-byte character to a 4-byte UTF-8 character. ** A single byte is required for the output string ** nul-terminator. */ if( n<0 ) n = sqlite4Strlen30(p); nReq = n * 2 + 1; } if( SQLITE4_OK!=sqlite4_buffer_resize(pBuf, nReq) ){ sqlite4_buffer_clear(pBuf); }else{ translateEncoding(eTrans, p, n, pBuf->p, &pBuf->n); } return pBuf->p; } #if defined(SQLITE4_TEST) /* ** This routine is called from the TCL test function "translate_selftest". ** It checks that the primitives for serializing and deserializing ** characters in each encoding are inverses of each other. */ |
︙ | ︙ |