Artifact 4c5ffe31d63622869eb6eec1503df7f6996fd1bd:
- File
ext/fts2/fts2_tokenizer.h
— part of check-in
[d0d1e7cdcc]
at
2006-10-10 17:37:14
on branch trunk
— Copy fts1/ to fts2/, changing reference from fts1 to fts2. For future
reference, the source versions copied were:
README.txt r1.1 fts1.c r1.37 fts1.h r1.2 fts1_hash.c r1.1 fts1_hash.h r1.1 fts1_porter.c r1.1 fts1_tokenizer.h r1.4 fts1_tokenizer1.c r1.6 (CVS 3471) (user: shess size: 3383)
/* ** 2006 July 10 ** ** The author disclaims copyright to this source code. ** ************************************************************************* ** Defines the interface to tokenizers used by fulltext-search. There ** are three basic components: ** ** sqlite3_tokenizer_module is a singleton defining the tokenizer ** interface functions. This is essentially the class structure for ** tokenizers. ** ** sqlite3_tokenizer is used to define a particular tokenizer, perhaps ** including customization information defined at creation time. ** ** sqlite3_tokenizer_cursor is generated by a tokenizer to generate ** tokens from a particular input. */ #ifndef _FTS2_TOKENIZER_H_ #define _FTS2_TOKENIZER_H_ /* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. ** If tokenizers are to be allowed to call sqlite3_*() functions, then ** we will need a way to register the API consistently. */ #include "sqlite3.h" /* ** Structures used by the tokenizer interface. */ typedef struct sqlite3_tokenizer sqlite3_tokenizer; typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; struct sqlite3_tokenizer_module { int iVersion; /* currently 0 */ /* ** Create and destroy a tokenizer. argc/argv are passed down from ** the fulltext virtual table creation to allow customization. */ int (*xCreate)(int argc, const char *const*argv, sqlite3_tokenizer **ppTokenizer); int (*xDestroy)(sqlite3_tokenizer *pTokenizer); /* ** Tokenize a particular input. Call xOpen() to prepare to ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then ** xClose() to free any internal state. The pInput passed to ** xOpen() must exist until the cursor is closed. The ppToken ** result from xNext() is only valid until the next call to xNext() ** or until xClose() is called. */ /* TODO(shess) current implementation requires pInput to be ** nul-terminated. This should either be fixed, or pInput/nBytes ** should be converted to zInput. */ int (*xOpen)(sqlite3_tokenizer *pTokenizer, const char *pInput, int nBytes, sqlite3_tokenizer_cursor **ppCursor); int (*xClose)(sqlite3_tokenizer_cursor *pCursor); int (*xNext)(sqlite3_tokenizer_cursor *pCursor, const char **ppToken, int *pnBytes, int *piStartOffset, int *piEndOffset, int *piPosition); }; struct sqlite3_tokenizer { const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ /* Tokenizer implementations will typically add additional fields */ }; struct sqlite3_tokenizer_cursor { sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ /* Tokenizer implementations will typically add additional fields */ }; /* ** Get the module for a tokenizer which generates tokens based on a ** set of non-token characters. The default is to break tokens at any ** non-alnum character, though the set of delimiters can also be ** specified by the first argv argument to xCreate(). */ /* TODO(shess) This doesn't belong here. Need some sort of ** registration process. */ void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); #endif /* _FTS2_TOKENIZER_H_ */