SQLite4
Check-in [bd4efbb2f7]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Add new files fts5.c and fts5func.c.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: bd4efbb2f7c4d985a19cec6daeefe6c3efc392fd
User & Date: dan 2012-12-17 19:36:32
Context
2012-12-17
20:18
Add a test for the "colname:phrase" syntax to fts5expr1.test. check-in: c472cae982 user: dan tags: trunk
19:36
Add new files fts5.c and fts5func.c. check-in: bd4efbb2f7 user: dan tags: trunk
2012-12-03
20:19
Add support for testing against mdb. check-in: 5181638475 user: dan tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to main.mk.

60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
...
242
243
244
245
246
247
248


249
250
251
252
253
254
255
...
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
...
517
518
519
520
521
522
523

524
525
526
527
528
529
530
531

# This is how we compile
#
TCCX =  $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) 
TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3
TCCX += -I$(TOP)/ext/async



# Object files for the SQLite library.
#
FTS3_OBJ = fts3.o fts3_aux.o fts3_expr.o fts3_hash.o fts3_icu.o fts3_porter.o \
         fts3_snippet.o fts3_tokenizer.o fts3_tokenizer1.o \
         fts3_write.o
# To remove fts3 (if it won't compile for you), unset FTS3_OBJ:
# FTS3_OBJ =

LIBOBJ+= alter.o analyze.o attach.o auth.o \
         build.o \
         callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \
         $(FTS3_OBJ) \
         func.o global.o hash.o \
         icu.o insert.o kv.o kvlsm.o kvmem.o legacy.o \
         lsm_ckpt.o lsm_file.o lsm_log.o lsm_main.o lsm_mem.o lsm_mutex.o \
         lsm_shared.o lsm_str.o lsm_sorted.o lsm_tree.o \
         lsm_unix.o lsm_varint.o \
         main.o malloc.o math.o mem0.o mem1.o mem2.o mem3.o mem5.o \
         mutex.o mutex_noop.o mutex_unix.o mutex_w32.o \
................................................................................
  parse.h \
  sqlite4.h


# Source code to the test files.
#
TESTSRC = \
  $(TOP)/ext/fts3/fts3_term.c \
  $(TOP)/ext/fts3/fts3_test.c \
  $(TOP)/test/test_main.c \
  $(TOP)/test/test_thread0.c \
  $(TOP)/test/test_utf.c \
  $(TOP)/test/test_misc1.c \
  $(TOP)/test/test_config.c \
  $(TOP)/test/test_func.c \
  $(TOP)/test/test_hexio.c \
................................................................................
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c

TESTSRC2 = \
  $(TOP)/src/attach.c \
  $(TOP)/src/build.c \
  $(TOP)/src/date.c \
  $(TOP)/src/expr.c \


  $(TOP)/src/func.c \
  $(TOP)/src/insert.c \
  $(TOP)/src/mem5.c \
  $(TOP)/src/os.c \
  $(TOP)/src/pragma.c \
  $(TOP)/src/prepare.c \
  $(TOP)/src/printf.c \
................................................................................
  $(TOP)/src/util.c \
  $(TOP)/src/vdbeapi.c \
  $(TOP)/src/vdbeaux.c \
  $(TOP)/src/vdbe.c \
  $(TOP)/src/vdbemem.c \
  $(TOP)/src/where.c \
  parse.c \
  $(TOP)/ext/fts3/fts3.c \
  $(TOP)/ext/fts3/fts3_aux.c \
  $(TOP)/ext/fts3/fts3_expr.c \
  $(TOP)/ext/fts3/fts3_tokenizer.c \
  $(TOP)/ext/fts3/fts3_write.c

# Header files used by all library source files.
#
HDR = \
   $(TOP)/src/hash.h \
   $(TOP)/src/hwtime.h \
   keywordhash.h \
................................................................................

test:	testfixture$(EXE) sqlite4$(EXE)
	./testfixture$(EXE) $(TOP)/test/src4.test

# Rules to build the 'lsmtest' application.
#
lsmtest$(EXE): libsqlite4.a $(LSMTESTSRC) $(LSMTESTHDR)

	$(TCCX) $(LSMTESTSRC) libsqlite4.a -o lsmtest$(EXE) $(THREADLIB) -lsqlite3


varint$(EXE):	$(TOP)/src/varint.c
	$(TCCX) -DVARINT_TOOL -o varint$(EXE) $(TOP)/src/varint.c

# The next two rules are used to support the "threadtest" target. Building
# threadtest runs a few thread-safety tests that are implemented in C. This







>

<
<
<
<
<
<
<




|







 







<
<







 







>
>







 







<
<
<
<
<







 







>
|







60
61
62
63
64
65
66
67
68







69
70
71
72
73
74
75
76
77
78
79
80
...
210
211
212
213
214
215
216


217
218
219
220
221
222
223
...
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
...
254
255
256
257
258
259
260





261
262
263
264
265
266
267
...
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521

# This is how we compile
#
TCCX =  $(TCC) $(OPTS) -I. -I$(TOP)/src -I$(TOP) 
TCCX += -I$(TOP)/ext/rtree -I$(TOP)/ext/icu -I$(TOP)/ext/fts3
TCCX += -I$(TOP)/ext/async

TCPPX = g++ -Wall -g -I. -I$(TOP)/src $(OPTS)









LIBOBJ+= alter.o analyze.o attach.o auth.o \
         build.o \
         callback.o complete.o ctime.o date.o delete.o expr.o fault.o fkey.o \
	 fts5.o fts5func.o \
         func.o global.o hash.o \
         icu.o insert.o kv.o kvlsm.o kvmem.o legacy.o \
         lsm_ckpt.o lsm_file.o lsm_log.o lsm_main.o lsm_mem.o lsm_mutex.o \
         lsm_shared.o lsm_str.o lsm_sorted.o lsm_tree.o \
         lsm_unix.o lsm_varint.o \
         main.o malloc.o math.o mem0.o mem1.o mem2.o mem3.o mem5.o \
         mutex.o mutex_noop.o mutex_unix.o mutex_w32.o \
................................................................................
  parse.h \
  sqlite4.h


# Source code to the test files.
#
TESTSRC = \


  $(TOP)/test/test_main.c \
  $(TOP)/test/test_thread0.c \
  $(TOP)/test/test_utf.c \
  $(TOP)/test/test_misc1.c \
  $(TOP)/test/test_config.c \
  $(TOP)/test/test_func.c \
  $(TOP)/test/test_hexio.c \
................................................................................
#TESTSRC += $(TOP)/ext/fts3/fts3_tokenizer.c

TESTSRC2 = \
  $(TOP)/src/attach.c \
  $(TOP)/src/build.c \
  $(TOP)/src/date.c \
  $(TOP)/src/expr.c \
  $(TOP)/src/fts5.c \
  $(TOP)/src/fts5func.c \
  $(TOP)/src/func.c \
  $(TOP)/src/insert.c \
  $(TOP)/src/mem5.c \
  $(TOP)/src/os.c \
  $(TOP)/src/pragma.c \
  $(TOP)/src/prepare.c \
  $(TOP)/src/printf.c \
................................................................................
  $(TOP)/src/util.c \
  $(TOP)/src/vdbeapi.c \
  $(TOP)/src/vdbeaux.c \
  $(TOP)/src/vdbe.c \
  $(TOP)/src/vdbemem.c \
  $(TOP)/src/where.c \
  parse.c \






# Header files used by all library source files.
#
HDR = \
   $(TOP)/src/hash.h \
   $(TOP)/src/hwtime.h \
   keywordhash.h \
................................................................................

test:	testfixture$(EXE) sqlite4$(EXE)
	./testfixture$(EXE) $(TOP)/test/src4.test

# Rules to build the 'lsmtest' application.
#
lsmtest$(EXE): libsqlite4.a $(LSMTESTSRC) $(LSMTESTHDR)
	$(TCPPX) -c $(TOP)/lsm-test/lsmtest_tdb2.cc
	$(TCCX) $(LSMTESTSRC) lsmtest_tdb2.o libsqlite4.a -o lsmtest$(EXE) $(THREADLIB) -lsqlite3


varint$(EXE):	$(TOP)/src/varint.c
	$(TCCX) -DVARINT_TOOL -o varint$(EXE) $(TOP)/src/varint.c

# The next two rules are used to support the "threadtest" target. Building
# threadtest runs a few thread-safety tests that are implemented in C. This

Added src/fts5.c.





































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
/*
** 2012 December 15
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/

#include "sqliteInt.h"

/*
** Token types used by expression parser.
*/
#define TOKEN_EOF       0         /* end of expression - no more tokens */
#define TOKEN_PRIMITIVE 1         /* quoted string or non-keyword */
#define TOKEN_STAR      2         /* * */
#define TOKEN_PLUS      3         /* + */
#define TOKEN_NEAR      4         /* NEAR/nnn */
#define TOKEN_COLON     5         /* : */
#define TOKEN_NOT       6         /* NOT */
#define TOKEN_AND       7         /* AND */
#define TOKEN_OR        8         /* OR */
#define TOKEN_LP        9         /* ( */
#define TOKEN_RP       10         /* ) */

/*
** Each tokenizer registered with a database handle is stored as an object
** of the following type. All objects associated with a single database
** connection are stored in the singly-linked list starting at 
** sqlite4.pTokenizer and connected by Fts5Tokenizer.pNext.
*/
struct Fts5Tokenizer {
  char *zName;                   /* Name of tokenizer (nul-terminated) */
  void *pCtx;
  int (*xCreate)(void*, const char**, int, sqlite4_tokenizer**);
  int (*xTokenize)(void*, sqlite4_tokenizer*,
      const char*, int, int(*x)(void*, int, const char*, int)
  );
  int (*xDestroy)(void*, sqlite4_tokenizer *);
  Fts5Tokenizer *pNext;
};

/*
** Expression grammar:
**
**   phrase := PRIMITIVE
**   phrase := PRIMITIVE *
**   phrase := phrase + phrase
**   phrase := phrase NEAR phrase
**
**   expr := phrase
**   expr := PRIMITIVE COLON phrase
**
**   expr := expr NOT expr
**   expr := expr AND expr
**   expr := expr OR  expr
**   expr := LP expr RP
*/

/*
** Context object used by expression parser.
*/
typedef struct Fts5Parser Fts5Parser;
typedef struct Fts5ParserToken Fts5ParserToken;

typedef struct Fts5Token Fts5Token;
typedef struct Fts5Str Fts5Str;
typedef struct Fts5Phrase Fts5Phrase;
typedef struct Fts5ExprNode Fts5ExprNode;
typedef struct Fts5Expr Fts5Expr;


struct Fts5ParserToken {
  int eType;                      /* Token type */
  int n;                          /* Size of z[] in bytes */
  const char *z;                  /* Token value */
};

struct Fts5Parser {
  Fts5Tokenizer *pTokenizer;
  sqlite4_tokenizer *p;
  sqlite4 *db;                    /* Database handle */

  char *zErr;                     /* Error message (or NULL) */

  const char *zExpr;              /* Pointer to expression text (nul-term) */
  int iExpr;                      /* Current offset in zExpr */
  Fts5ParserToken next;           /* Next token */

  const char **azCol;             /* Column names of indexed table */
  int nCol;                       /* Size of azCol[] in bytes */

  /* Space for dequoted copies of strings */
  char *aSpace;
  int iSpace;
};

struct Fts5Token {
  int bPrefix;                    /* True for a prefix search */
  int n;                          /* Size of z[] in bytes */
  const char *z;                  /* Token value */
};

struct Fts5Str {
  Fts5Token *aToken;
  int nToken;
};

struct Fts5Phrase {
  int iCol;                       /* Column of table to search (-1 -> all) */
  int nStr;
  Fts5Str *aStr;
  int *aiNear;
};

struct Fts5ExprNode {
  int eType;
  Fts5Phrase *pPhrase;
  Fts5ExprNode *pLeft;
  Fts5ExprNode *pRight;
};

struct Fts5Expr {
  Fts5ExprNode *pRoot;
};

/*
** Return true if argument c is an ASCII whitespace character.
*/
static int fts5IsWhite(char c){
  return (c==' ' || c=='\n' || c=='\r' || c=='\t');
}

/*
** Return true if argument c is one of the special non-whitespace 
** characters that ends an unquoted expression token. 
*/
static int fts5IsSpecial(char c){
  return (c==':' || c=='(' || c==')' || c=='+' || c=='"');
}

static int fts5NextToken(
  Fts5Parser *pParse,             /* Parser context */
  Fts5ParserToken *p              /* OUT: Populate this object */
){
  const char *z = pParse->zExpr;
  char c;

  memset(p, 0, sizeof(Fts5ParserToken));

  /* Skip past any whitespace */
  while( fts5IsWhite(z[pParse->iExpr]) ) pParse->iExpr++;

  c = z[pParse->iExpr];
  if( c=='\0' ){
    p->eType = TOKEN_EOF;
  }

  else if( c=='(' ){
    pParse->iExpr++;
    p->eType = TOKEN_LP;
  }

  else if( c==')' ){
    pParse->iExpr++;
    p->eType = TOKEN_RP;
  }

  else if( c==':' ){
    pParse->iExpr++;
    p->eType = TOKEN_COLON;
  }

  else if( c=='+' ){
    pParse->iExpr++;
    p->eType = TOKEN_PLUS;
  }

  else if( c=='"' ){
    char *zOut = &pParse->aSpace[pParse->iSpace];
    const char *zPrimitive = zOut;
    int i = pParse->iExpr+1;

    while( z[i] ){
      if( z[i]=='"' ){
        if( z[i+1]=='"' ){
          i++;
        }else{
          break;
        }
      }
      *zOut++ = z[i];
      i++;
    }
    if( z[i]!='"' ){
      /* Mismatched quotation mark */
      return SQLITE4_ERROR;
    }

    pParse->iExpr = i+1;
    p->eType = TOKEN_PRIMITIVE;
    p->z = zPrimitive;
    p->n = (zOut - zPrimitive);
    pParse->iSpace += (zOut - zPrimitive);
  }

  else{
    const char *zPrimitive = &z[pParse->iExpr];
    int n = 0;
    while( zPrimitive[n] 
        && fts5IsSpecial(zPrimitive[n])==0
        && fts5IsWhite(zPrimitive[n])==0 
    ){
      n++;
    }
    pParse->iExpr += n;

    if( n==3 && memcmp(zPrimitive, "NOT", 3)==0 ){
      p->eType = TOKEN_NOT;
    }
    else if( n==2 && memcmp(zPrimitive, "OR", 2)==0 ){
      p->eType = TOKEN_OR;
    }
    else if( n==3 && memcmp(zPrimitive, "AND", 3)==0 ){
      p->eType = TOKEN_AND;
    }else{
      p->eType = TOKEN_PRIMITIVE;
      p->z = zPrimitive;
      p->n = n;
    }
  }

  return SQLITE4_OK;
}

static int fts5NextToken2(
  Fts5Parser *pParse,
  Fts5ParserToken *p
){
  int rc = SQLITE4_OK;
  if( pParse->iExpr==0 ){
    rc = fts5NextToken(pParse, p);
  }else{
    *p = pParse->next;
  }

  if( rc==SQLITE4_OK && p->eType!=TOKEN_EOF ){
    rc = fts5NextToken(pParse, &pParse->next);
  }

  return rc;
}

static int fts5PhraseNewStr(
  Fts5Parser *pParse,             /* Expression parsing context */
  Fts5Phrase *pPhrase,            /* Phrase to add a new Fts5Str to */
  int nNear                       /* Value of nnn in NEAR/nnn operator */
){
  const int nIncr = 4;

  if( (pPhrase->nStr % nIncr)==0 ){
    Fts5Str *aNew;
    aNew = (Fts5Str *)sqlite4DbRealloc(pParse->db, 
        pPhrase->aStr, (pPhrase->nStr+nIncr)*sizeof(Fts5Str)
    );
    if( !aNew ) return SQLITE4_NOMEM;
    memset(&aNew[pPhrase->nStr], 0, nIncr*sizeof(Fts5Str));
    pPhrase->aStr = aNew;
  }

  pPhrase->nStr++;
  return SQLITE4_OK;
}

/*
** Callback for fts5CountTokens().
*/
static int fts5CountTokensCb(void *pCtx, int iWeight, const char *z, int n){
  (*((int *)pCtx))++;
  return 0;
}

/*
** Count the number of tokens in document zDoc/nDoc using the tokenizer and
** tokenizer instance supplied as the first two arguments. Set *pnToken to
** the result before returning.
*/
static int fts5CountTokens(
  Fts5Tokenizer *pTokenizer,
  sqlite4_tokenizer *p,
  const char *zDoc,
  int nDoc,
  int *pnToken
){
  int nToken = 0;
  int rc;
  rc = pTokenizer->xTokenize((void *)&nToken, p, zDoc, nDoc, fts5CountTokensCb);
  *pnToken = nToken;
  return rc;
}

struct AppendTokensCtx {
  Fts5Parser *pParse;
  Fts5Str *pStr;
};

static int fts5AppendTokensCb(void *pCtx, int iWeight, const char *z, int n){
  struct AppendTokensCtx *p = (struct AppendTokensCtx *)pCtx;
  Fts5Token *pToken;
  char *zSpace;

  zSpace = &p->pParse->aSpace[p->pParse->iSpace];
  p->pParse->iSpace += (n+1);
  memcpy(zSpace, z, n);
  zSpace[n] = '\0';

  pToken = &p->pStr->aToken[p->pStr->nToken];
  p->pStr->nToken++;
  pToken->bPrefix = 0;
  pToken->z = zSpace;
  pToken->n = n;

  return 0;
}

static int fts5AppendTokens(
  Fts5Parser *pParse,
  Fts5Str *pStr,
  const char *zPrim,
  int nPrim
){
  struct AppendTokensCtx ctx;
  ctx.pParse = pParse;
  ctx.pStr = pStr;

  return pParse->pTokenizer->xTokenize(
      (void *)&ctx, pParse->p , zPrim, nPrim, fts5AppendTokensCb
  );
}

static int fts5PhraseAppend(
  Fts5Parser *pParse,
  Fts5Phrase *pPhrase,
  const char *zPrim,
  int nPrim
){
  Fts5Tokenizer *pTok = pParse->pTokenizer;
  int nToken;
  int rc;

  rc = fts5CountTokens(pTok, pParse->p, zPrim, nPrim, &nToken);
  if( rc==SQLITE4_OK && nToken>0 ){
    /* Extend the size of the token array by nToken entries */
    Fts5Str *pStr = &pPhrase->aStr[pPhrase->nStr-1];

    pStr->aToken = sqlite4DbReallocOrFree(pParse->db, pStr->aToken,
        (pStr->nToken + nToken) * sizeof(Fts5Token)
    );
    if( !pStr->aToken ){
      rc = SQLITE4_NOMEM;
    }else{
      rc = fts5AppendTokens(pParse, pStr, zPrim, nPrim);
    }
  }

  return rc;
}

static void fts5PhraseFree(sqlite4 *db, Fts5Phrase *p){
  if( p ){
    int i;
    for(i=0; i<p->nStr; i++){
      sqlite4DbFree(db, p->aStr[i].aToken);
    }
    sqlite4DbFree(db, p->aiNear);
    sqlite4DbFree(db, p->aStr);
    sqlite4DbFree(db, p);
  }
}

static int fts5NextTokenOrPhrase(
  Fts5Parser *pParse,             /* Parser context */
  int *peType,                    /* OUT: Token type */
  Fts5Phrase **ppPhrase           /* OUT: New phrase object */
){
  int rc;
  Fts5Phrase *pPhrase = 0;
  Fts5ParserToken t;

  rc = fts5NextToken2(pParse, &t);
  *peType = t.eType;
  if( rc==SQLITE4_OK && t.eType==TOKEN_PRIMITIVE ){

    /* Allocate the Fts5Phrase object */
    pPhrase = sqlite4DbMallocZero(pParse->db, sizeof(Fts5Phrase));
    if( pPhrase==0 ){
      rc = SQLITE4_NOMEM;
      goto token_or_phrase_out;
    }
    pPhrase->iCol = -1;

    /* Check if this first primitive is a column name or not. */
    if( pParse->next.eType==TOKEN_COLON ){
      int iCol;
      for(iCol=0; iCol<pParse->nCol; iCol++){
        if( sqlite4StrNICmp(pParse->azCol[iCol], t.z, t.n)==0 ) break;
      }
      if( iCol==pParse->nCol ){
        pParse->zErr = sqlite4MPrintf(pParse->db, 
            "fts5: no such column: %.*s", t.n, t.z
        );
        rc = SQLITE4_ERROR;
        goto token_or_phrase_out;
      }
      pPhrase->iCol = iCol;

      rc = fts5NextToken2(pParse, &t);
      if( rc==SQLITE4_OK ) rc = fts5NextToken2(pParse, &t);
      if( rc==SQLITE4_OK && t.eType!=TOKEN_PRIMITIVE ){
        rc = SQLITE4_ERROR;
      }
      if( rc!=SQLITE4_OK ) goto token_or_phrase_out;
    }

    /* Add the first Fts5Str to the new phrase object. Populate it with the
    ** results of tokenizing t.z/t.n. */
    rc = fts5PhraseNewStr(pParse, pPhrase, 0);
    if( rc==SQLITE4_OK ){
      rc = fts5PhraseAppend(pParse, pPhrase, t.z, t.n);
    }

    /* Add any further primitives connected by "+" or NEAR operators. */
    while( rc==SQLITE4_OK && 
        (pParse->next.eType==TOKEN_PLUS || pParse->next.eType==TOKEN_NEAR)
    ){
      rc = fts5NextToken2(pParse, &t);
      if( rc==SQLITE4_OK ){
        if( t.eType==TOKEN_NEAR ){
          rc = fts5PhraseNewStr(pParse, pPhrase, t.n);
          if( rc!=SQLITE4_OK ) goto token_or_phrase_out;
        }
        rc = fts5NextToken2(pParse, &t);
        if( rc!=SQLITE4_OK ) goto token_or_phrase_out;
        if( t.eType!=TOKEN_PRIMITIVE ){
          rc = SQLITE4_ERROR;
        }else{
          rc = fts5PhraseAppend(pParse, pPhrase, t.z, t.n);
        }
      }
    }
  }

 token_or_phrase_out:
  if( rc!=SQLITE4_OK ){
    fts5PhraseFree(pParse->db, pPhrase);
  }else{
    *ppPhrase = pPhrase;
  }
  return rc;
}

static void fts5FreeExprNode(sqlite4 *db, Fts5ExprNode *pNode){
  if( pNode ){
    fts5PhraseFree(db, pNode->pPhrase);
    fts5FreeExprNode(db, pNode->pLeft);
    fts5FreeExprNode(db, pNode->pRight);
    sqlite4DbFree(db, pNode);
  }
}

static void fts5ExpressionFree(sqlite4 *db, Fts5Expr *pExpr){
  if( pExpr ){
    fts5FreeExprNode(db, pExpr->pRoot);
    sqlite4DbFree(db, pExpr);
  }
}

typedef struct ExprHier ExprHier;
struct ExprHier {
  Fts5ExprNode **ppNode;
  int nOpen;
};

static int fts5GrowExprHier(
  sqlite4 *db, 
  int *pnAlloc, 
  ExprHier **paHier, 
  int nReq
){
  int rc = SQLITE4_OK;
  int nAlloc = *pnAlloc;
  if( nAlloc<nReq ){
    ExprHier *aNew;
    nAlloc += 8;
    aNew = (ExprHier *)sqlite4DbReallocOrFree(
        db, *paHier, nAlloc*sizeof(ExprHier)
    );
    if( aNew==0 ) rc = SQLITE4_NOMEM;
    *paHier = aNew;
  }
  return rc;
}

static int fts5AddBinary(
  sqlite4 *db, 
  int eType,
  int *pnHier, 
  int *pnHierAlloc, 
  ExprHier **paHier
){
  Fts5ExprNode *pNode;
  Fts5ExprNode **pp;
  int rc;

  rc = fts5GrowExprHier(db, pnHierAlloc, paHier, *pnHier+1);
  if( rc!=SQLITE4_OK ) return rc;
  pNode = sqlite4DbMallocZero(db, sizeof(Fts5ExprNode));
  if( !pNode ) return SQLITE4_NOMEM;
  pNode->eType = eType;

  pp = (*paHier)[*pnHier-1].ppNode;
  pNode->pLeft = *pp;
  *pp = pNode;
  (*paHier)[*pnHier].ppNode = &pNode->pRight;
  (*paHier)[*pnHier].nOpen = 0;
  (*pnHier)++;

  return SQLITE4_OK;
}

static int fts5ParseExpression(
  sqlite4 *db,                    /* Database handle */
  Fts5Tokenizer *pTokenizer,      /* Tokenizer module */
  sqlite4_tokenizer *p,           /* Tokenizer instance */
  const char **azCol,             /* Array of column names (nul-term'd) */
  int nCol,                       /* Size of array azCol[] */
  const char *zExpr,              /* FTS expression text */
  Fts5Expr **ppExpr,              /* OUT: Expression object */
  char **pzErr                    /* OUT: Error message */
){
  int rc = SQLITE4_OK;
  Fts5Parser sParse;
  int nExpr;
  int i;
  Fts5Expr *pExpr;

  int nHier = 0;
  int nHierAlloc = 0;
  ExprHier *aHier = 0;

  nExpr = sqlite4Strlen30(zExpr);
  memset(&sParse, 0, sizeof(Fts5Parser));
  sParse.zExpr = zExpr;
  sParse.azCol = azCol;
  sParse.nCol = nCol;
  sParse.pTokenizer = pTokenizer;
  sParse.p = p;
  sParse.db = db;

  pExpr = sqlite4DbMallocZero(db, sizeof(Fts5Expr) + nExpr*2);
  if( !pExpr ) return SQLITE4_NOMEM;
  sParse.aSpace = (char *)&pExpr[1];

  rc = fts5GrowExprHier(db, &nHierAlloc, &aHier, 1);
  if( rc==SQLITE4_OK ){
    aHier[0].ppNode = &pExpr->pRoot;
    aHier[0].nOpen = 0;
    nHier = 1;
  }

  while( rc==SQLITE4_OK ){
    int eType = 0;
    Fts5Phrase *pPhrase = 0;
    Fts5ExprNode *pNode = 0;

    rc = fts5NextTokenOrPhrase(&sParse, &eType, &pPhrase);
    if( rc!=SQLITE4_OK || eType==TOKEN_EOF ) break;

    switch( eType ){
      case TOKEN_PRIMITIVE: {
        Fts5ExprNode **pp = aHier[nHier-1].ppNode;
        if( *pp ){
          rc = fts5AddBinary(db, TOKEN_AND, &nHier, &nHierAlloc, &aHier);
          pp = aHier[nHier-1].ppNode;
        }
        if( rc==SQLITE4_OK ){
          pNode = sqlite4DbMallocZero(db, sizeof(Fts5ExprNode));
          if( pNode==0 ){
            rc = SQLITE4_NOMEM;
          }else{
            pNode->eType = TOKEN_PRIMITIVE;
            pNode->pPhrase = pPhrase;
            *pp = pNode;
          }
        }
        break;
      }

      case TOKEN_AND:
      case TOKEN_OR:
      case TOKEN_NOT: {
        Fts5ExprNode **pp = aHier[nHier-1].ppNode;

        if( *pp==0 ){
          rc = SQLITE4_ERROR;
        }else{
          while( nHier>1 
             && aHier[nHier-1].nOpen==0 
             && (*aHier[nHier-2].ppNode)->eType  < eType 
          ){
            nHier--;
          }

          rc = fts5AddBinary(db, eType, &nHier, &nHierAlloc, &aHier);
        }
        break;
      }

      case TOKEN_LP: {
        Fts5ExprNode **pp = aHier[nHier-1].ppNode;
        if( *pp ){
          rc = SQLITE4_ERROR;
        }else{
          aHier[nHier-1].nOpen++;
        }
        break;
      }

      case TOKEN_RP: {
        Fts5ExprNode **pp = aHier[nHier-1].ppNode;
        if( *pp==0 ){
          rc = SQLITE4_ERROR;
        }else{
          for(i=nHier-1; i>=0; i--){
            if( aHier[i].nOpen>0 ) break;
          }
          if( i<0 ){
            rc = SQLITE4_ERROR;
          }else{
            aHier[i].nOpen--;
            nHier = i+1;
          }
        }
        break;
      }

      default:
        rc = SQLITE4_ERROR;
        break;
    }

    if( rc!=SQLITE4_OK ){
      sqlite4DbFree(db, pNode);
      break;
    }
  }

  if( rc==SQLITE4_OK && *aHier[nHier-1].ppNode==0 ){
    rc = SQLITE4_ERROR;
  }
  for(i=0; rc==SQLITE4_OK && i<nHier; i++){
    if( aHier[i].nOpen>0 ) rc = SQLITE4_ERROR;
  }

  if( rc!=SQLITE4_OK ){
    fts5ExpressionFree(db, pExpr);
    *pzErr = sParse.zErr;
  }else{
    *ppExpr = pExpr;
  }
  sqlite4DbFree(db, aHier);
  return rc;
}

/*
** Search for the Fts5Tokenizer object named zName. Return a pointer to it
** if it exists, or NULL otherwise.
*/
static Fts5Tokenizer *fts5FindTokenizer(sqlite4 *db, const char *zName){
  Fts5Tokenizer *p;
  for(p=db->pTokenizer; p; p=p->pNext){
    if( 0==sqlite4StrICmp(zName, p->zName) ) break;
  }
  return p;
}

#ifdef SQLITE4_TEST
static int fts5PrintExprNode(sqlite4 *, Fts5ExprNode *, char **);
static int fts5PrintExprNodeParen(
  sqlite4 *db, 
  Fts5ExprNode *pNode, 
  char **pzRet
){
  int bParen = (pNode->eType!=TOKEN_PRIMITIVE || pNode->pPhrase->nStr>1);
  sqlite4_env *pEnv = sqlite4_db_env(db);
  char *zRet = *pzRet;

  if( bParen ) zRet = sqlite4_mprintf(pEnv, "%z(", zRet);
  fts5PrintExprNode(db, pNode, &zRet);
  if( bParen ) zRet = sqlite4_mprintf(pEnv, "%z)", zRet);

  *pzRet = zRet;
  return SQLITE4_OK;
}
static int fts5PrintExprNode(sqlite4 *db, Fts5ExprNode *pNode, char **pzRet){
  sqlite4_env *pEnv = sqlite4_db_env(db);
  char *zRet = *pzRet;

  assert(
      pNode->eType==TOKEN_AND || pNode->eType==TOKEN_OR
   || pNode->eType==TOKEN_NOT || pNode->eType==TOKEN_PRIMITIVE
  );
  assert( (pNode->eType==TOKEN_PRIMITIVE)==(pNode->pPhrase!=0) );

  if( pNode->eType==TOKEN_PRIMITIVE ){
    int iStr;
    Fts5Phrase *pPhrase = pNode->pPhrase;
    for(iStr=0; iStr<pPhrase->nStr; iStr++){
      int iToken;
      Fts5Str *pStr = &pPhrase->aStr[iStr];
      if( iStr>0 ){
        zRet = sqlite4_mprintf(
            pEnv, "%z NEAR/%d ", zRet, pPhrase->aiNear[iStr-1]
        );
      }
      for(iToken=0; iToken<pStr->nToken; iToken++){
        int nRet = sqlite4Strlen30(zRet);
        const char *z = pStr->aToken[iToken].z;
        int n = pStr->aToken[iToken].n;
        int i;

        zRet = (char *)sqlite4_realloc(pEnv, zRet, nRet + n*2+4);
        if( iToken>0 ) zRet[nRet++] = '+';
        zRet[nRet++] = '"';

        for(i=0; i<n; i++){
          if( z[i]=='"' ) zRet[nRet++] = '"';
          zRet[nRet++] = z[i];
        }

        zRet[nRet++] = '"';
        zRet[nRet++] = '\0';
      }
    }
  }else{
    fts5PrintExprNodeParen(db, pNode->pLeft, &zRet);
    switch( pNode->eType ){
      case TOKEN_AND:
        zRet = sqlite4_mprintf(pEnv, "%z AND ", zRet);
        break;
      case TOKEN_OR:
        zRet = sqlite4_mprintf(pEnv, "%z OR ", zRet);
        break;
      case TOKEN_NOT:
        zRet = sqlite4_mprintf(pEnv, "%z NOT ", zRet);
        break;
    }
    fts5PrintExprNodeParen(db, pNode->pRight, &zRet);
  }

  *pzRet = zRet;
  return SQLITE4_OK;
}
static int fts5PrintExpr(sqlite4 *db, Fts5Expr *pExpr, char **pzRet){
  return fts5PrintExprNode(db, pExpr->pRoot, pzRet);
}

/*
** A user defined function used to test the fts5 expression parser. As follows:
**
**   fts5_parse_expr(<tokenizer>, <expr>);
*/
static void fts5_parse_expr(
  sqlite4_context *pCtx, 
  int nVal, 
  sqlite4_value **aVal
){
  int rc;
  Fts5Expr *pExpr = 0;
  Fts5Tokenizer *pTok;
  sqlite4_tokenizer *p;
  sqlite4 *db;

  const char *zTokenizer;
  const char *zExpr;
  char *zErr = 0;
  char *zRet = 0;

  db = sqlite4_context_db_handle(pCtx);
  assert( nVal==2 );
  zTokenizer = (const char *)sqlite4_value_text(aVal[0]);
  zExpr = (const char *)sqlite4_value_text(aVal[1]);

  pTok = fts5FindTokenizer(db, zTokenizer);
  if( pTok==0 ){
    zErr = sqlite4MPrintf(db, "no such tokenizer: %s", zTokenizer);
    goto fts5_parse_expr_out;
  }else{
    rc = pTok->xCreate(pTok->pCtx, 0, 0, &p);
    if( rc!=SQLITE4_OK ){
      zErr = sqlite4MPrintf(db, "error creating tokenizer: %d", rc);
      goto fts5_parse_expr_out;
    }
  }

  rc = fts5ParseExpression(db, pTok, p, 0, 0, zExpr, &pExpr, &zErr);
  if( rc!=SQLITE4_OK ){
    if( zErr==0 ){
      zErr = sqlite4MPrintf(db, "error parsing expression: %d", rc);
    }
    goto fts5_parse_expr_out;
  }

  fts5PrintExpr(db, pExpr, &zRet);
  sqlite4_result_text(pCtx, zRet, -1, SQLITE4_TRANSIENT);
  fts5ExpressionFree(db, pExpr);
  sqlite4_free(sqlite4_db_env(db), zRet);

 fts5_parse_expr_out:
  if( zErr ){
    sqlite4_result_error(pCtx, zErr, -1);
    sqlite4DbFree(db, zErr);
  }
}
#endif

void sqlite4ShutdownFts5(sqlite4 *db){
  Fts5Tokenizer *p;
  Fts5Tokenizer *pNext;
  for(p=db->pTokenizer; p; p=pNext){
    pNext = p->pNext;
    sqlite4DbFree(db, p);
  }
  
}

/*
** Register the default FTS5 tokenizer and functions with handle db.
*/
int sqlite4InitFts5(sqlite4 *db){
#ifdef SQLITE4_TEST
  int rc = sqlite4_create_function(
      db, "fts5_parse_expr", 2, SQLITE4_UTF8, 0, fts5_parse_expr, 0, 0
  );
  if( rc!=SQLITE4_OK ) return rc;
#endif
  return sqlite4InitFts5Func(db);
}

/*
** This function is used to install custom FTS tokenizers.
*/
int sqlite4_create_tokenizer(
  sqlite4 *db,
  const char *zName,
  void *pCtx,
  int (*xCreate)(void*, const char**, int, sqlite4_tokenizer**),
  int (*xTokenize)(void*, sqlite4_tokenizer*,
      const char*, int, int(*x)(void*, int, const char*, int)
  ),
  int (*xDestroy)(void*, sqlite4_tokenizer *)
){
  int rc = SQLITE4_OK;
  sqlite4_mutex_enter(db->mutex);

  /* It is not possible to override an existing tokenizer */
  if( fts5FindTokenizer(db, zName) ){
    rc = SQLITE4_ERROR;
  }else{
    int nName = sqlite4Strlen30(zName);
    Fts5Tokenizer *pTokenizer = (Fts5Tokenizer *)sqlite4DbMallocZero(db, 
        sizeof(Fts5Tokenizer) + nName+1
    );
    if( !pTokenizer ){
      rc = SQLITE4_NOMEM;
    }else{
      pTokenizer->pCtx = pCtx;
      pTokenizer->xCreate = xCreate;
      pTokenizer->xTokenize = xTokenize;
      pTokenizer->xDestroy = xDestroy;
      pTokenizer->zName = (char *)&pTokenizer[1];
      memcpy(pTokenizer->zName, zName, nName+1);

      pTokenizer->pNext = db->pTokenizer;
      db->pTokenizer = pTokenizer;
    }
  }

  rc = sqlite4ApiExit(db, rc);
  sqlite4_mutex_leave(db->mutex);
  return rc;
}

Added src/fts5func.c.































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/*
** 2012 December 17
**
** The author disclaims copyright to this source code.  In place of
** a legal notice, here is a blessing:
**
**    May you do good and not evil.
**    May you find forgiveness for yourself and forgive others.
**    May you share freely, never taking more than you give.
**
*************************************************************************
*/

#include "sqliteInt.h"

static int fts5SimpleCreate(
  void *pCtx, 
  const char **azArg, 
  int nArg, 
  sqlite4_tokenizer **pp
){
  *pp = (sqlite4_tokenizer *)pCtx;
  return SQLITE4_OK;
}

static int fts5SimpleDestroy(void *pCtx, sqlite4_tokenizer *p){
  return SQLITE4_OK;
}

static char fts5Tolower(char c){
  if( c>='A' && c<='Z' ) c = c + ('a' - 'A');
  return c;
}

static int fts5SimpleTokenize(
  void *pCtx, 
  sqlite4_tokenizer *p,
  const char *zDoc,
  int nDoc,
  int(*x)(void*, int, const char*, int)
){
  sqlite4_env *pEnv = (sqlite4_env *)p;
  char *aBuf;
  int nBuf;
  int iBuf;
  int i;
  int brk = 0;

  nBuf = 128;
  aBuf = (char *)sqlite4_malloc(pEnv, nBuf);
  if( !aBuf ) return SQLITE4_NOMEM;

  iBuf = 0;
  for(i=0; brk==0 && i<nDoc; i++){
    if( sqlite4Isalnum(zDoc[i]) ){
      aBuf[iBuf++] = fts5Tolower(zDoc[i]);
    }else if( iBuf>0 ){
      brk = x(pCtx, 0, aBuf, iBuf);
      iBuf = 0;
    }
  }
  if( iBuf>0 ) x(pCtx, 0, aBuf, iBuf);

  sqlite4_free(pEnv, aBuf);
  return SQLITE4_OK;
}

int sqlite4InitFts5Func(sqlite4 *db){
  int rc;
  sqlite4_env *pEnv = sqlite4_db_env(db);

  rc = sqlite4_create_tokenizer(db, "simple", (void *)pEnv, 
      fts5SimpleCreate, fts5SimpleTokenize, fts5SimpleDestroy
  );
  if( rc!=SQLITE4_OK ) return rc;

  return rc;
}

Changes to src/lsm_file.c.

165
166
167
168
169
170
171
172

173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
**
**   In mmap() mode, this list contains all currently allocated pages that
**   are carrying pointers into the database file mapping (pMap/nMap). If the
**   file has to be unmapped and then remapped (required to grow the mapping
**   as the file grows), the Page.aData pointers are updated by iterating
**   through the contents of this list.
**
**   In non-mmap() mode, this list is an LRU list of cached pages with nRef==0.

*/
struct FileSystem {
  lsm_db *pDb;                    /* Database handle that owns this object */
  lsm_env *pEnv;                  /* Environment pointer */
  char *zDb;                      /* Database file name */
  char *zLog;                     /* Database file name */
  int nMetasize;                  /* Size of meta pages in bytes */
  int nPagesize;                  /* Database page-size in bytes */
  int nBlocksize;                 /* Database block-size in bytes */

  /* r/w file descriptors for both files. */
  LsmFile *pLsmFile;
  lsm_file *fdDb;                 /* Database file */
  lsm_file *fdLog;                /* Log file */
  int szSector;                   /* Database file sector size */

  /* If this is a compressed database, a pointer to the compression methods.
  ** For an uncompressed database, a NULL pointer.  */
  lsm_compress *pCompress;







|
>











|







165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
**
**   In mmap() mode, this list contains all currently allocated pages that
**   are carrying pointers into the database file mapping (pMap/nMap). If the
**   file has to be unmapped and then remapped (required to grow the mapping
**   as the file grows), the Page.aData pointers are updated by iterating
**   through the contents of this list.
**
**   In non-mmap() mode, this list is an LRU list of cached pages with 
**   nRef==0.
*/
struct FileSystem {
  lsm_db *pDb;                    /* Database handle that owns this object */
  lsm_env *pEnv;                  /* Environment pointer */
  char *zDb;                      /* Database file name */
  char *zLog;                     /* Database file name */
  int nMetasize;                  /* Size of meta pages in bytes */
  int nPagesize;                  /* Database page-size in bytes */
  int nBlocksize;                 /* Database block-size in bytes */

  /* r/w file descriptors for both files. */
  LsmFile *pLsmFile;              /* Used after lsm_close() to link into list */
  lsm_file *fdDb;                 /* Database file */
  lsm_file *fdLog;                /* Log file */
  int szSector;                   /* Database file sector size */

  /* If this is a compressed database, a pointer to the compression methods.
  ** For an uncompressed database, a NULL pointer.  */
  lsm_compress *pCompress;

Changes to src/main.c.

770
771
772
773
774
775
776



777
778
779
780
781
782
783
....
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
  }
  sqlite4ResetInternalSchema(db, -1);

  /* Tell the code in notify.c that the connection no longer holds any
  ** locks and does not require any further unlock-notify callbacks.
  */
  sqlite4ConnectionClosed(db);




  assert( db->nDb<=2 );
  assert( db->aDb==db->aDbStatic );
  {
    FuncDef *pNext, *pSame, *p;
    for(p=db->aFunc.pFirst; p; p=pNext){
      pNext = p->pNextName;
................................................................................
    /* sqlite4AutoLoadExtensions(db); */
    rc = sqlite4_errcode(db);
    if( rc!=SQLITE4_OK ){
      goto opendb_out;
    }
  }

#ifdef SQLITE4_ENABLE_FTS3
  if( !db->mallocFailed && rc==SQLITE4_OK ){
    rc = sqlite4Fts3Init(db);
  }
#endif

#ifdef SQLITE4_ENABLE_ICU
  if( !db->mallocFailed && rc==SQLITE4_OK ){
    rc = sqlite4IcuInit(db);
  }
#endif








>
>
>







 







<

|

<







770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
....
1783
1784
1785
1786
1787
1788
1789

1790
1791
1792

1793
1794
1795
1796
1797
1798
1799
  }
  sqlite4ResetInternalSchema(db, -1);

  /* Tell the code in notify.c that the connection no longer holds any
  ** locks and does not require any further unlock-notify callbacks.
  */
  sqlite4ConnectionClosed(db);
  
  /* Delete tokenizers */
  sqlite4ShutdownFts5(db);

  assert( db->nDb<=2 );
  assert( db->aDb==db->aDbStatic );
  {
    FuncDef *pNext, *pSame, *p;
    for(p=db->aFunc.pFirst; p; p=pNext){
      pNext = p->pNextName;
................................................................................
    /* sqlite4AutoLoadExtensions(db); */
    rc = sqlite4_errcode(db);
    if( rc!=SQLITE4_OK ){
      goto opendb_out;
    }
  }


  if( !db->mallocFailed && rc==SQLITE4_OK ){
    rc = sqlite4InitFts5(db);
  }


#ifdef SQLITE4_ENABLE_ICU
  if( !db->mallocFailed && rc==SQLITE4_OK ){
    rc = sqlite4IcuInit(db);
  }
#endif

Changes to src/sqlite.h.in.

227
228
229
230
231
232
233
234

235
236
237
238
239
240
241
....
4364
4365
4366
4367
4368
4369
4370





















4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
** [database connection] objects associated with E can be used at the
** same time in different threads, so long as no single [database connection]
** object is used by two or more threads at the same time.  This
** corresponds to [SQLITE4_ENVCONFIG_MULTITHREAD].
**
** ^The sqlite4_threadsafe(E) function returns two if the same
** [database connection] can be used at the same time from two or more
** separate threads.  This setting corresponds to [SQLITE4_ENVCONFIG_SERIALIZED].

**
** Note that SQLite4 is always threadsafe in this sense: Two or more
** objects each associated with different [sqlite4_env] objects can
** always be used at the same time in separate threads.
*/
int sqlite4_threadsafe(sqlite4_env*);

................................................................................
int sqlite4_num_to_text(sqlite4_num, char*);

/*
** CAPI4REF: Flags For Text-To-Numeric Conversion
*/
#define SQLITE4_PREFIX_ONLY         0x10
#define SQLITE4_IGNORE_WHITESPACE   0x20






















/*
** Undo the hack that converts floating point types to integer for
** builds on processors without floating point support.
*/
#ifdef SQLITE4_OMIT_FLOATING_POINT
# undef double
#endif

#ifdef __cplusplus
}  /* End of the 'extern "C"' block */
#endif
#endif







|
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
....
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
** [database connection] objects associated with E can be used at the
** same time in different threads, so long as no single [database connection]
** object is used by two or more threads at the same time.  This
** corresponds to [SQLITE4_ENVCONFIG_MULTITHREAD].
**
** ^The sqlite4_threadsafe(E) function returns two if the same
** [database connection] can be used at the same time from two or more
** separate threads.  This setting corresponds to 
** [SQLITE4_ENVCONFIG_SERIALIZED].
**
** Note that SQLite4 is always threadsafe in this sense: Two or more
** objects each associated with different [sqlite4_env] objects can
** always be used at the same time in separate threads.
*/
int sqlite4_threadsafe(sqlite4_env*);

................................................................................
int sqlite4_num_to_text(sqlite4_num, char*);

/*
** CAPI4REF: Flags For Text-To-Numeric Conversion
*/
#define SQLITE4_PREFIX_ONLY         0x10
#define SQLITE4_IGNORE_WHITESPACE   0x20

typedef struct sqlite4_tokenizer sqlite4_tokenizer;

/*
** CAPI4REF: Register an FTS tokenizer implementation
**
** xTokenize:
**   This function does the actual tokenization of an input string. For
**   each token in the input, the callback function is invoked once.
*/
int sqlite4_create_tokenizer(
  sqlite4 *db,
  const char *zName,
  void *pCtx,
  int (*xCreate)(void*, const char**, int, sqlite4_tokenizer**),
  int (*xTokenize)(void*, sqlite4_tokenizer*,
      const char*, int, 
      int(*x)(void *ctx, int iWeight, const char *zToken, int nToken)
  ),
  int (*xDestroy)(void*, sqlite4_tokenizer *)
);

/*
** Undo the hack that converts floating point types to integer for
** builds on processors without floating point support.
*/
#ifdef SQLITE4_OMIT_FLOATING_POINT
# undef double
#endif

#ifdef __cplusplus
}  /* End of the 'extern "C"' block */
#endif
#endif

Changes to src/sqliteInt.h.

557
558
559
560
561
562
563

564
565
566
567
568
569
570
...
867
868
869
870
871
872
873




874
875
876
877
878
879
880
....
3220
3221
3222
3223
3224
3225
3226




3227
typedef struct ExprList ExprList;
typedef struct ExprListItem ExprListItem;
typedef struct ExprSpan ExprSpan;
typedef struct FKey FKey;
typedef struct FuncDestructor FuncDestructor;
typedef struct FuncDef FuncDef;
typedef struct FuncDefTable FuncDefTable;

typedef struct IdList IdList;
typedef struct IdListItem IdListItem;
typedef struct Index Index;
typedef struct IndexSample IndexSample;
typedef struct KeyClass KeyClass;
typedef struct KeyInfo KeyInfo;
typedef struct Lookaside Lookaside;
................................................................................
  */
  sqlite4 *pBlockingConnection; /* Connection that caused SQLITE4_LOCKED */
  sqlite4 *pUnlockConnection;           /* Connection to watch for unlock */
  void *pUnlockArg;                     /* Argument to xUnlockNotify */
  void (*xUnlockNotify)(void **, int);  /* Unlock notify callback */
  sqlite4 *pNextBlocked;        /* Next in list of all blocked connections */
#endif




};

/*
** A macro to discover the encoding of a database.
*/
#define ENC(db) ((db)->aDb[0].pSchema->enc)

................................................................................
# define sqlite4MemdebugNoType(X,Y)   1
#endif
#define MEMTYPE_HEAP       0x01  /* General heap allocations */
#define MEMTYPE_LOOKASIDE  0x02  /* Might have been lookaside memory */
#define MEMTYPE_SCRATCH    0x04  /* Scratch allocations */
#define MEMTYPE_DB         0x10  /* Uses sqlite4DbMalloc, not sqlite_malloc */





#endif /* _SQLITEINT_H_ */







>







 







>
>
>
>







 







>
>
>
>

557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
...
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
....
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
typedef struct ExprList ExprList;
typedef struct ExprListItem ExprListItem;
typedef struct ExprSpan ExprSpan;
typedef struct FKey FKey;
typedef struct FuncDestructor FuncDestructor;
typedef struct FuncDef FuncDef;
typedef struct FuncDefTable FuncDefTable;
typedef struct Fts5Tokenizer Fts5Tokenizer;
typedef struct IdList IdList;
typedef struct IdListItem IdListItem;
typedef struct Index Index;
typedef struct IndexSample IndexSample;
typedef struct KeyClass KeyClass;
typedef struct KeyInfo KeyInfo;
typedef struct Lookaside Lookaside;
................................................................................
  */
  sqlite4 *pBlockingConnection; /* Connection that caused SQLITE4_LOCKED */
  sqlite4 *pUnlockConnection;           /* Connection to watch for unlock */
  void *pUnlockArg;                     /* Argument to xUnlockNotify */
  void (*xUnlockNotify)(void **, int);  /* Unlock notify callback */
  sqlite4 *pNextBlocked;        /* Next in list of all blocked connections */
#endif

#ifndef SQLITE_OMIT_FTS5
  Fts5Tokenizer *pTokenizer;      /* First in list of tokenizers */
#endif
};

/*
** A macro to discover the encoding of a database.
*/
#define ENC(db) ((db)->aDb[0].pSchema->enc)

................................................................................
# define sqlite4MemdebugNoType(X,Y)   1
#endif
#define MEMTYPE_HEAP       0x01  /* General heap allocations */
#define MEMTYPE_LOOKASIDE  0x02  /* Might have been lookaside memory */
#define MEMTYPE_SCRATCH    0x04  /* Scratch allocations */
#define MEMTYPE_DB         0x10  /* Uses sqlite4DbMalloc, not sqlite_malloc */

int sqlite4InitFts5(sqlite4 *db);
int sqlite4InitFts5Func(sqlite4 *db);
void sqlite4ShutdownFts5(sqlite4 *db);

#endif /* _SQLITEINT_H_ */

Added test/fts5expr1.test.







































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# 2012 December 17
#
# The author disclaims copyright to this source code.  In place of
# a legal notice, here is a blessing:
#
#    May you do good and not evil.
#    May you find forgiveness for yourself and forgive others.
#    May you share freely, never taking more than you give.
#
#*************************************************************************
#
#

set testdir [file dirname $argv0]
source $testdir/tester.tcl
set testprefix fts5expr1

foreach {tn expr res} {


  1 { abc }                  {"abc"}
  2 { abc AND def }          {"abc" AND "def"}
  3 { (abc) }                {"abc"}
  4 { (((abc))) }            {"abc"}
  5 { one OR two AND three } {"one" OR ("two" AND "three")}
  6 { one AND two OR three } {("one" AND "two") OR "three"}

  7 { "abc def" }            {"abc"+"def"}
  8 {  abc+def  }            {"abc"+"def"}
  9 { "abc def" + ghi }      {"abc"+"def"+"ghi"}

  10 { one AND two OR three   } {("one" AND "two") OR "three"}
  11 { one AND (two OR three) } {"one" AND ("two" OR "three")}

  12 { "abc""def" }             {"abc"+"def"}
  13 { "abc" "def" }            {"abc" AND "def"}
  14 { "abc" + "def" }          {"abc"+"def"}

  15 { a NOT b AND c OR d }     {(("a" NOT "b") AND "c") OR "d"}
  16 { a OR b AND c NOT d }     {"a" OR ("b" AND ("c" NOT "d"))}
  17 { (a OR b) AND c NOT d }   {("a" OR "b") AND ("c" NOT "d")}

} {
  do_execsql_test 1.$tn { 
    SELECT fts5_parse_expr('simple', $expr) 
  } [list [string trim $res]]
}



finish_test

Changes to test/permutations.test.

135
136
137
138
139
140
141

142
143
144
145
146
147
148
} -files {
  simple.test simple2.test
  log3.test 
  lsm1.test lsm2.test
  csr1.test
  ckpt1.test
  mc1.test


  aggerror.test
  attach.test
  autoindex1.test
  badutf.test
  between.test
  bigrow.test







>







135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
} -files {
  simple.test simple2.test
  log3.test 
  lsm1.test lsm2.test
  csr1.test
  ckpt1.test
  mc1.test
  fts5expr1.test

  aggerror.test
  attach.test
  autoindex1.test
  badutf.test
  between.test
  bigrow.test