Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | Refactor PLWriter in preparation for buffered-document change.
Currently, PLWriter (Position List Writer) creates a locally-owned
DataBuffer to write into. This is necessary to support doclist
collection during tokenization, where there is no obvious buffer to
write output to, but is not necessary for the other users of PLWriter.
This change adds a DLCollector (Doc List Collector) structure to
handle the tokenization case.
Also fix a potential memory leak in writeZeroSegment(). In case of error from leafWriterStep(), the DataBuffer dl was being leaked. (CVS 3706) |
---|---|
Downloads: | Tarball | ZIP archive |
Timelines: | family | ancestors | descendants | both | trunk |
Files: | files | file ages | folders |
SHA1: |
1b9918e20767aebc9c1e7523027139e5 |
User & Date: | shess 2007-03-20 23:52:38.000 |
Context
2007-03-22
| ||
00:14 | Refactor PLWriter to remove owned buffer. DLCollector (Document List Collector) now handles the case where PLWriter (Position List Writer) needed a local buffer. Change to using the associated DLWriter (Document List Writer) buffer, which reduces the number of memory copies needed in doclist processing, and brings PLWriter operation in line with DLWriter operation. (CVS 3707) (check-in: d04fa3a13a user: shess tags: trunk) | |
2007-03-20
| ||
23:52 |
Refactor PLWriter in preparation for buffered-document change.
Currently, PLWriter (Position List Writer) creates a locally-owned
DataBuffer to write into. This is necessary to support doclist
collection during tokenization, where there is no obvious buffer to
write output to, but is not necessary for the other users of PLWriter.
This change adds a DLCollector (Doc List Collector) structure to
handle the tokenization case.
Also fix a potential memory leak in writeZeroSegment(). In case of error from leafWriterStep(), the DataBuffer dl was being leaked. (CVS 3706) (check-in: 1b9918e207 user: shess tags: trunk) | |
2007-03-19
| ||
17:44 | Modify the interface to the pager sub-system in preparation for performing IO in blocks based on sector-size, not database page-size. (CVS 3705) (check-in: 7dc7658887 user: danielk1977 tags: trunk) | |
Changes
Changes to ext/fts2/fts2.c.
︙ | ︙ | |||
938 939 940 941 942 943 944 | pWriter->iPos = 0; pWriter->iOffset = 0; } static void plwInit(PLWriter *pWriter, sqlite_int64 iDocid, DocListType iType){ dataBufferInit(&pWriter->b, 0); plwReset(pWriter, iDocid, iType); } | | > | > | > > > > > > > > > > | > | > | > > | > > > > > | > > > > | | > | | 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 | pWriter->iPos = 0; pWriter->iOffset = 0; } static void plwInit(PLWriter *pWriter, sqlite_int64 iDocid, DocListType iType){ dataBufferInit(&pWriter->b, 0); plwReset(pWriter, iDocid, iType); } static void plwDestroy(PLWriter *pWriter){ dataBufferDestroy(&pWriter->b); SCRAMBLE(pWriter); } /*******************************************************************/ /* DLCollector wraps PLWriter and DLWriter to provide a ** dynamically-allocated doclist area to use during tokenization. ** ** dlcNew - malloc up and initialize a collector. ** dlcDelete - destroy a collector and all contained items. ** dlcAddPos - append position and offset information. ** dlcAddDoclist - add the collected doclist to the given buffer. */ typedef struct DLCollector { PLWriter plw; } DLCollector; static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){ DLWriter dlw; dlwInit(&dlw, pCollector->plw.iType, b); plwDlwAdd(&pCollector->plw, &dlw); dlwDestroy(&dlw); } static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos, int iStartOffset, int iEndOffset){ plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset); } static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){ DLCollector *pCollector = malloc(sizeof(DLCollector)); plwInit(&pCollector->plw, iDocid, iType); return pCollector; } static void dlcDelete(DLCollector *pCollector){ plwDestroy(&pCollector->plw); SCRAMBLE(pCollector); free(pCollector); } /* Copy the doclist data of iType in pData/nData into *out, trimming ** unnecessary data as we go. Only columns matching iColumn are ** copied, all columns copied if iColimn is -1. Elements with no ** matching columns are dropped. The output is an iOutType doclist. |
︙ | ︙ | |||
3529 3530 3531 3532 3533 3534 3535 | if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, &pToken, &nTokenBytes, &iStartOffset, &iEndOffset, &iPosition) ){ | | | | | 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 | if( rc!=SQLITE_OK ) return rc; pCursor->pTokenizer = pTokenizer; while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, &pToken, &nTokenBytes, &iStartOffset, &iEndOffset, &iPosition) ){ DLCollector *p; /* Positions can't be negative; we use -1 as a terminator internally. */ if( iPosition<0 ){ pTokenizer->pModule->xClose(pCursor); return SQLITE_ERROR; } p = fts2HashFind(terms, pToken, nTokenBytes); if( p==NULL ){ p = dlcNew(iDocid, DL_DEFAULT); fts2HashInsert(terms, pToken, nTokenBytes, p); } if( iColumn>=0 ){ dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset); } } /* TODO(shess) Check return? Should this be able to cause errors at ** this point? Actually, same question about sqlite3_finalize(), ** though one could argue that failure there means that the data is ** not durable. *ponder* |
︙ | ︙ | |||
5041 5042 5043 5044 5045 5046 5047 | } /****************************************************************/ /* Used to hold hashtable data for sorting. */ typedef struct TermData { const char *pTerm; int nTerm; | | | 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 | } /****************************************************************/ /* Used to hold hashtable data for sorting. */ typedef struct TermData { const char *pTerm; int nTerm; DLCollector *pCollector; } TermData; /* Orders TermData elements in strcmp fashion ( <0 for less-than, 0 ** for equal, >0 for greater-than). */ static int termDataCmp(const void *av, const void *bv){ const TermData *a = (const TermData *)av; |
︙ | ︙ | |||
5077 5078 5079 5080 5081 5082 5083 | n = fts2HashCount(pTerms); pData = malloc(n*sizeof(TermData)); for(i = 0, e = fts2HashFirst(pTerms); e; i++, e = fts2HashNext(e)){ assert( i<n ); pData[i].pTerm = fts2HashKey(e); pData[i].nTerm = fts2HashKeysize(e); | | < < | < < > | 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 | n = fts2HashCount(pTerms); pData = malloc(n*sizeof(TermData)); for(i = 0, e = fts2HashFirst(pTerms); e; i++, e = fts2HashNext(e)){ assert( i<n ); pData[i].pTerm = fts2HashKey(e); pData[i].nTerm = fts2HashKeysize(e); pData[i].pCollector = fts2HashData(e); } assert( i==n ); /* TODO(shess) Should we allow user-defined collation sequences, ** here? I think we only need that once we support prefix searches. */ if( n>1 ) qsort(pData, n, sizeof(*pData), termDataCmp); /* TODO(shess) Refactor so that we can write directly to the segment ** DataBuffer, as happens for segment merges. */ leafWriterInit(0, idx, &writer); dataBufferInit(&dl, 0); for(i=0; i<n; i++){ dataBufferReset(&dl); dlcAddDoclist(pData[i].pCollector, &dl); rc = leafWriterStep(v, &writer, pData[i].pTerm, pData[i].nTerm, dl.pData, dl.nData); if( rc!=SQLITE_OK ) goto err; } rc = leafWriterFinalize(v, &writer); err: dataBufferDestroy(&dl); free(pData); leafWriterDestroy(&writer); return rc; } /* This function implements the xUpdate callback; it's the top-level entry * point for inserting, deleting or updating a row in a full-text table. */ |
︙ | ︙ | |||
5154 5155 5156 5157 5158 5159 5160 | rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms); } if( rc==SQLITE_OK ) rc = writeZeroSegment(v, &terms); /* clean up */ for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){ | | | 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 | rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms); } if( rc==SQLITE_OK ) rc = writeZeroSegment(v, &terms); /* clean up */ for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){ dlcDelete(fts2HashData(e)); } fts2HashClear(&terms); return rc; } /* |
︙ | ︙ |