/ Check-in [d04fa3a1]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Refactor PLWriter to remove owned buffer. DLCollector (Document List Collector) now handles the case where PLWriter (Position List Writer) needed a local buffer. Change to using the associated DLWriter (Document List Writer) buffer, which reduces the number of memory copies needed in doclist processing, and brings PLWriter operation in line with DLWriter operation. (CVS 3707)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:d04fa3a13a84f49074c673b8ee2fb6541da061b5
User & Date: shess 2007-03-22 00:14:29
Context
2007-03-22
15:20
Call sqlite3_free() instead of free() to release a buffer allocated by sqlite3_vmprintf() in test_async.c (test suite bug only). (CVS 3708) check-in: b078f09b user: danielk1977 tags: trunk
00:14
Refactor PLWriter to remove owned buffer. DLCollector (Document List Collector) now handles the case where PLWriter (Position List Writer) needed a local buffer. Change to using the associated DLWriter (Document List Writer) buffer, which reduces the number of memory copies needed in doclist processing, and brings PLWriter operation in line with DLWriter operation. (CVS 3707) check-in: d04fa3a1 user: shess tags: trunk
2007-03-20
23:52
Refactor PLWriter in preparation for buffered-document change. Currently, PLWriter (Position List Writer) creates a locally-owned DataBuffer to write into. This is necessary to support doclist collection during tokenization, where there is no obvious buffer to write output to, but is not necessary for the other users of PLWriter. This change adds a DLCollector (Doc List Collector) structure to handle the tokenization case.

Also fix a potential memory leak in writeZeroSegment(). In case of error from leafWriterStep(), the DataBuffer dl was being leaked. (CVS 3706) check-in: 1b9918e2 user: shess tags: trunk

Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to ext/fts2/fts2.c.

686
687
688
689
690
691
692

693
694
695
696
697
698
699
...
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
...
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874

875
876
877
878
879
880
881
882




883
884
885
886
887
888





889
890
891

892
893
894
895
896
897
898
899
900
901
902


903
904
905
906
907
908
909
910



911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933


934




935
936

937
938
939
940
941
942
943













944





945
946
947
948
949
950
951
952
953
954
955
956
957
958
959


960
961
962








963
964
965
966
967







968
969
970
971
972
973
974
975


976
977
978
979
980


981
982
983
984
985
986
987
988
989





990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004

1005
1006
1007
1008
1009
1010
1011
1012


1013

1014
1015
1016
1017
1018
1019



1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
....
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188


1189

1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
....
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
....
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
....
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
/* DLWriter is used to write doclist data to a DataBuffer.  DLWriter
** always appends to the buffer and does not own it.
**
** dlwInit - initialize to write a given type doclistto a buffer.
** dlwDestroy - clear the writer's memory.  Does not free buffer.
** dlwAppend - append raw doclist data to buffer.
** dlwAdd - construct doclist element and append to buffer.

*/
typedef struct DLWriter {
  DocListType iType;
  DataBuffer *b;
  sqlite_int64 iPrevDocid;
} DLWriter;

................................................................................
    dataBufferAppend2(pWriter->b, c, nFirstNew,
                      pData+nFirstOld, nData-nFirstOld);
  }else{
    dataBufferAppend(pWriter->b, c, nFirstNew);
  }
  pWriter->iPrevDocid = iLastDocid;
}
static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid,
                   const char *pPosList, int nPosList){
  char c[VARINT_MAX];
  int n = putVarint(c, iDocid-pWriter->iPrevDocid);

  assert( pWriter->iPrevDocid<iDocid );
  assert( pPosList==0 || pWriter->iType>DL_DOCIDS );

  dataBufferAppend(pWriter->b, c, n);

  if( pWriter->iType>DL_DOCIDS ){
    n = putVarint(c, 0);
    if( nPosList>0 ){
      dataBufferAppend2(pWriter->b, pPosList, nPosList, c, n);
    }else{
      dataBufferAppend(pWriter->b, c, n);
    }
  }
  pWriter->iPrevDocid = iDocid;
}

/*******************************************************************/
/* PLReader is used to read data from a document's position list.  As
** the caller steps through the list, data is cached so that varints
** only need to be decoded once.
................................................................................
    pReader->iEndOffset = pReader->iStartOffset+i;
  }
  assert( n<=pReader->nData );
  pReader->pData += n;
  pReader->nData -= n;
}

static void plrInit(PLReader *pReader, DocListType iType,
                    const char *pData, int nData){
  pReader->pData = pData;
  pReader->nData = nData;
  pReader->iType = iType;
  pReader->iColumn = 0;
  pReader->iPosition = 0;
  pReader->iStartOffset = 0;
  pReader->iEndOffset = 0;
  plrStep(pReader);
}
static void plrDestroy(PLReader *pReader){
  SCRAMBLE(pReader);
}

/*******************************************************************/
/* PLWriter is used in constructing a document's position list.  As a
** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.

**
** plwInit - init for writing a document's poslist.
** plwReset - reset the writer for a new document.
** plwDestroy - clear a writer.
** plwNew - malloc storage and initialize it.
** plwDelete - clear and free storage.
** plwDlwAdd - append the docid and poslist to a doclist writer.
** plwAdd - append position and offset information.




*/
/* TODO(shess) PLWriter is used in two ways.  fulltextUpdate() uses it
** in construction of a new doclist.  docListTrim() and mergePosList()
** use it when trimming.  In the former case, it wants to own the
** DataBuffer, in the latter it's possible it could encode into a
** pre-existing DataBuffer.





*/
typedef struct PLWriter {
  DataBuffer b;


  sqlite_int64 iDocid;
  DocListType iType;
  int iColumn;    /* the last column written */
  int iPos;       /* the last position written */
  int iOffset;    /* the last start offset written */
} PLWriter;

static void plwDlwAdd(PLWriter *pWriter, DLWriter *dlWriter){
  dlwAdd(dlWriter, pWriter->iDocid, pWriter->b.pData, pWriter->b.nData);
}


static void plwAdd(PLWriter *pWriter, int iColumn, int iPos,
                   int iStartOffset, int iEndOffset){
  /* Worst-case space for POS_COLUMN, iColumn, iPosDelta,
  ** iStartOffsetDelta, and iEndOffsetDelta.
  */
  char c[5*VARINT_MAX];
  int n = 0;




  if( pWriter->iType==DL_DOCIDS ) return;

  if( iColumn!=pWriter->iColumn ){
    n += putVarint(c+n, POS_COLUMN);
    n += putVarint(c+n, iColumn);
    pWriter->iColumn = iColumn;
    pWriter->iPos = 0;
    pWriter->iOffset = 0;
  }
  assert( iPos>=pWriter->iPos );
  n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos));
  pWriter->iPos = iPos;
  if( pWriter->iType==DL_POSITIONS_OFFSETS ){
    assert( iStartOffset>=pWriter->iOffset );
    n += putVarint(c+n, iStartOffset-pWriter->iOffset);
    pWriter->iOffset = iStartOffset;
    assert( iEndOffset>=iStartOffset );
    n += putVarint(c+n, iEndOffset-iStartOffset);
  }
  dataBufferAppend(&pWriter->b, c, n);
}
static void plwReset(PLWriter *pWriter,
                     sqlite_int64 iDocid, DocListType iType){


  dataBufferReset(&pWriter->b);




  pWriter->iDocid = iDocid;
  pWriter->iType = iType;

  pWriter->iColumn = 0;
  pWriter->iPos = 0;
  pWriter->iOffset = 0;
}
static void plwInit(PLWriter *pWriter, sqlite_int64 iDocid, DocListType iType){
  dataBufferInit(&pWriter->b, 0);
  plwReset(pWriter, iDocid, iType);













}





static void plwDestroy(PLWriter *pWriter){
  dataBufferDestroy(&pWriter->b);
  SCRAMBLE(pWriter);
}

/*******************************************************************/
/* DLCollector wraps PLWriter and DLWriter to provide a
** dynamically-allocated doclist area to use during tokenization.
**
** dlcNew - malloc up and initialize a collector.
** dlcDelete - destroy a collector and all contained items.
** dlcAddPos - append position and offset information.
** dlcAddDoclist - add the collected doclist to the given buffer.
*/
typedef struct DLCollector {


  PLWriter plw;
} DLCollector;









static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){
  DLWriter dlw;
  dlwInit(&dlw, pCollector->plw.iType, b);
  plwDlwAdd(&pCollector->plw, &dlw);
  dlwDestroy(&dlw);







}
static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,
                      int iStartOffset, int iEndOffset){
  plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);
}

static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){
  DLCollector *pCollector = malloc(sizeof(DLCollector));


  plwInit(&pCollector->plw, iDocid, iType);
  return pCollector;
}
static void dlcDelete(DLCollector *pCollector){
  plwDestroy(&pCollector->plw);


  SCRAMBLE(pCollector);
  free(pCollector);
}


/* Copy the doclist data of iType in pData/nData into *out, trimming
** unnecessary data as we go.  Only columns matching iColumn are
** copied, all columns copied if iColimn is -1.  Elements with no
** matching columns are dropped.  The output is an iOutType doclist.





*/
static void docListTrim(DocListType iType, const char *pData, int nData,
                        int iColumn, DocListType iOutType, DataBuffer *out){
  DLReader dlReader;
  DLWriter dlWriter;
  PLWriter plWriter;

  assert( iOutType<=iType );

  dlrInit(&dlReader, iType, pData, nData);
  dlwInit(&dlWriter, iOutType, out);
  plwInit(&plWriter, 0, iOutType);

  while( !dlrAtEnd(&dlReader) ){
    PLReader plReader;

    int match = 0;

    plrInit(&plReader, dlReader.iType,
            dlrPosData(&dlReader), dlrPosDataLen(&dlReader));
    plwReset(&plWriter, dlrDocid(&dlReader), iOutType);

    while( !plrAtEnd(&plReader) ){
      if( iColumn==-1 || plrColumn(&plReader)==iColumn ){


        match = 1;

        plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),
               plrStartOffset(&plReader), plrEndOffset(&plReader));
      }
      plrStep(&plReader);
    }
    if( match ) plwDlwAdd(&plWriter, &dlWriter);




    plrDestroy(&plReader);
    dlrStep(&dlReader);
  }
  plwDestroy(&plWriter);
  dlwDestroy(&dlWriter);
  dlrDestroy(&dlReader);
}

/* Used by docListMerge() to keep doclists in the ascending order by
** docid, then ascending order by age (so the newest comes first).
*/
................................................................................
  PLReader left, right;
  PLWriter writer;
  int match = 0;

  assert( dlrDocid(pLeft)==dlrDocid(pRight) );
  assert( pOut->iType!=DL_POSITIONS_OFFSETS );

  plrInit(&left, pLeft->iType, dlrPosData(pLeft), dlrPosDataLen(pLeft));
  plrInit(&right, pRight->iType, dlrPosData(pRight), dlrPosDataLen(pRight));
  plwInit(&writer, dlrDocid(pLeft), pOut->iType);

  while( !plrAtEnd(&left) && !plrAtEnd(&right) ){
    if( plrColumn(&left)<plrColumn(&right) ){
      plrStep(&left);
    }else if( plrColumn(&left)>plrColumn(&right) ){
      plrStep(&right);
    }else if( plrPosition(&left)+1<plrPosition(&right) ){
      plrStep(&left);
    }else if( plrPosition(&left)+1>plrPosition(&right) ){
      plrStep(&right);
    }else{


      match = 1;

      plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
      plrStep(&left);
      plrStep(&right);
    }
  }

  /* TODO(shess) We could remember the output position, encode the
  ** docid, then encode the poslist directly into the output.  If no
  ** match, we back out to the stored output position.  This would
  ** also reduce the malloc count.
  */
  if( match ) plwDlwAdd(&writer, pOut);

  plrDestroy(&left);
  plrDestroy(&right);
  plwDestroy(&writer);
}

/* We have two doclists with positions:  pLeft and pRight.
** Write the phrase intersection of these two doclists into pOut.
**
** A phrase intersection means that two documents only match
** if pLeft.iPos+1==pRight.iPos.
................................................................................

  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
    if( dlrDocid(&left)<dlrDocid(&right) ){
      dlrStep(&left);
    }else if( dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }else{
      dlwAdd(&writer, dlrDocid(&left), 0, 0);
      dlrStep(&left);
      dlrStep(&right);
    }
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
................................................................................

  dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
  dlrInit(&right, DL_DOCIDS, pRight, nRight);
  dlwInit(&writer, DL_DOCIDS, pOut);

  while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
    if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
      dlwAdd(&writer, dlrDocid(&left), 0, 0);
      dlrStep(&left);
    }else if( dlrAtEnd(&left) || dlrDocid(&right)<dlrDocid(&left) ){
      dlwAdd(&writer, dlrDocid(&right), 0, 0);
      dlrStep(&right);
    }else{
      dlwAdd(&writer, dlrDocid(&left), 0, 0);
      dlrStep(&left);
      dlrStep(&right);
    }
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
................................................................................
  dlwInit(&writer, DL_DOCIDS, pOut);

  while( !dlrAtEnd(&left) ){
    while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }
    if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
      dlwAdd(&writer, dlrDocid(&left), 0, 0);
    }
    dlrStep(&left);
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
  dlwDestroy(&writer);







>







 







|
<




|


<
<
<
<
<
<
<
<
<







 







|
<
|
|
|













>


<

<
<
<

>
>
>
>

<
|
|
|
|
>
>
>
>
>


<
>

<
<





|
|
<
>
>








>
>
>
|











|






|

|
|
>
>
|
>
>
>
>
|
<
>




<
<
<
>
>
>
>
>
>
>
>
>
>
>
>
>
|
>
>
>
>
>

<













>
>



>
>
>
>
>
>
>
>

<
<
<
<
>
>
>
>
>
>
>








>
>
|




>
>







|

>
>
>
>
>





<





<



>


|
<
<



>
>
|
>





|
>
>
>




<







 







|
|
<











>
>
|
>






|
|
|
|
<
<



<







 







|







 







|


|


|







 







|







686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
...
748
749
750
751
752
753
754
755

756
757
758
759
760
761
762









763
764
765
766
767
768
769
...
841
842
843
844
845
846
847
848

849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867

868



869
870
871
872
873
874

875
876
877
878
879
880
881
882
883
884
885

886
887


888
889
890
891
892
893
894

895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938

939
940
941
942
943



944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963

964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990




991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033

1034
1035
1036
1037
1038

1039
1040
1041
1042
1043
1044
1045


1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065

1066
1067
1068
1069
1070
1071
1072
....
1209
1210
1211
1212
1213
1214
1215
1216
1217

1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242


1243
1244
1245

1246
1247
1248
1249
1250
1251
1252
....
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
....
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
....
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
/* DLWriter is used to write doclist data to a DataBuffer.  DLWriter
** always appends to the buffer and does not own it.
**
** dlwInit - initialize to write a given type doclistto a buffer.
** dlwDestroy - clear the writer's memory.  Does not free buffer.
** dlwAppend - append raw doclist data to buffer.
** dlwAdd - construct doclist element and append to buffer.
**    Only apply dlwAdd() to DL_DOCIDS doclists (else use PLWriter).
*/
typedef struct DLWriter {
  DocListType iType;
  DataBuffer *b;
  sqlite_int64 iPrevDocid;
} DLWriter;

................................................................................
    dataBufferAppend2(pWriter->b, c, nFirstNew,
                      pData+nFirstOld, nData-nFirstOld);
  }else{
    dataBufferAppend(pWriter->b, c, nFirstNew);
  }
  pWriter->iPrevDocid = iLastDocid;
}
static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){

  char c[VARINT_MAX];
  int n = putVarint(c, iDocid-pWriter->iPrevDocid);

  assert( pWriter->iPrevDocid<iDocid );
  assert( pWriter->iType==DL_DOCIDS );

  dataBufferAppend(pWriter->b, c, n);









  pWriter->iPrevDocid = iDocid;
}

/*******************************************************************/
/* PLReader is used to read data from a document's position list.  As
** the caller steps through the list, data is cached so that varints
** only need to be decoded once.
................................................................................
    pReader->iEndOffset = pReader->iStartOffset+i;
  }
  assert( n<=pReader->nData );
  pReader->pData += n;
  pReader->nData -= n;
}

static void plrInit(PLReader *pReader, DLReader *pDLReader){

  pReader->pData = dlrPosData(pDLReader);
  pReader->nData = dlrPosDataLen(pDLReader);
  pReader->iType = pDLReader->iType;
  pReader->iColumn = 0;
  pReader->iPosition = 0;
  pReader->iStartOffset = 0;
  pReader->iEndOffset = 0;
  plrStep(pReader);
}
static void plrDestroy(PLReader *pReader){
  SCRAMBLE(pReader);
}

/*******************************************************************/
/* PLWriter is used in constructing a document's position list.  As a
** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.
** PLWriter writes to the associated DLWriter's buffer.
**
** plwInit - init for writing a document's poslist.

** plwDestroy - clear a writer.



** plwAdd - append position and offset information.
** plwTerminate - add any necessary doclist terminator.
**
** Calling plwAdd() after plwTerminate() may result in a corrupt
** doclist.
*/

/* TODO(shess) Until we've written the second item, we can cache the
** first item's information.  Then we'd have three states:
**
** - initialized with docid, no positions.
** - docid and one position.
** - docid and multiple positions.
**
** Only the last state needs to actually write to dlw->b, which would
** be an improvement in the DLCollector case.
*/
typedef struct PLWriter {

  DLWriter *dlw;



  int iColumn;    /* the last column written */
  int iPos;       /* the last position written */
  int iOffset;    /* the last start offset written */
} PLWriter;

/* TODO(shess) In the case where the parent is reading these values
** from a PLReader, we could optimize to a copy if that PLReader has

** the same type as pWriter.
*/
static void plwAdd(PLWriter *pWriter, int iColumn, int iPos,
                   int iStartOffset, int iEndOffset){
  /* Worst-case space for POS_COLUMN, iColumn, iPosDelta,
  ** iStartOffsetDelta, and iEndOffsetDelta.
  */
  char c[5*VARINT_MAX];
  int n = 0;

  /* Ban plwAdd() after plwTerminate(). */
  assert( pWriter->iPos!=-1 );

  if( pWriter->dlw->iType==DL_DOCIDS ) return;

  if( iColumn!=pWriter->iColumn ){
    n += putVarint(c+n, POS_COLUMN);
    n += putVarint(c+n, iColumn);
    pWriter->iColumn = iColumn;
    pWriter->iPos = 0;
    pWriter->iOffset = 0;
  }
  assert( iPos>=pWriter->iPos );
  n += putVarint(c+n, POS_BASE+(iPos-pWriter->iPos));
  pWriter->iPos = iPos;
  if( pWriter->dlw->iType==DL_POSITIONS_OFFSETS ){
    assert( iStartOffset>=pWriter->iOffset );
    n += putVarint(c+n, iStartOffset-pWriter->iOffset);
    pWriter->iOffset = iStartOffset;
    assert( iEndOffset>=iStartOffset );
    n += putVarint(c+n, iEndOffset-iStartOffset);
  }
  dataBufferAppend(pWriter->dlw->b, c, n);
}
static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){
  char c[VARINT_MAX];
  int n;

  pWriter->dlw = dlw;

  assert( iDocid>pWriter->dlw->iPrevDocid );
  n = putVarint(c, iDocid-pWriter->dlw->iPrevDocid);
  dataBufferAppend(pWriter->dlw->b, c, n);
  pWriter->dlw->iPrevDocid = iDocid;


  pWriter->iColumn = 0;
  pWriter->iPos = 0;
  pWriter->iOffset = 0;
}



/* TODO(shess) Should plwDestroy() also terminate the doclist?  But
** then plwDestroy() would no longer be just a destructor, it would
** also be doing work, which isn't consistent with the overall idiom.
** Another option would be for plwAdd() to always append any necessary
** terminator, so that the output is always correct.  But that would
** add incremental work to the common case with the only benefit being
** API elegance.  Punt for now.
*/
static void plwTerminate(PLWriter *pWriter){
  if( pWriter->dlw->iType>DL_DOCIDS ){
    char c[VARINT_MAX];
    int n = putVarint(c, POS_END);
    dataBufferAppend(pWriter->dlw->b, c, n);
  }
#ifndef NDEBUG
  /* Mark as terminated for assert in plwAdd(). */
  pWriter->iPos = -1;
#endif
}
static void plwDestroy(PLWriter *pWriter){

  SCRAMBLE(pWriter);
}

/*******************************************************************/
/* DLCollector wraps PLWriter and DLWriter to provide a
** dynamically-allocated doclist area to use during tokenization.
**
** dlcNew - malloc up and initialize a collector.
** dlcDelete - destroy a collector and all contained items.
** dlcAddPos - append position and offset information.
** dlcAddDoclist - add the collected doclist to the given buffer.
*/
typedef struct DLCollector {
  DataBuffer b;
  DLWriter dlw;
  PLWriter plw;
} DLCollector;

/* TODO(shess) This could also be done by calling plwTerminate() and
** dataBufferAppend().  I tried that, expecting nominal performance
** differences, but it seemed to pretty reliably be worth 1% to code
** it this way.  I suspect it's the incremental malloc overhead (some
** percentage of the plwTerminate() calls will cause a realloc), so
** this might be worth revisiting if the DataBuffer implementation
** changes.
*/
static void dlcAddDoclist(DLCollector *pCollector, DataBuffer *b){




  if( pCollector->dlw.iType>DL_DOCIDS ){
    char c[VARINT_MAX];
    int n = putVarint(c, POS_END);
    dataBufferAppend2(b, pCollector->b.pData, pCollector->b.nData, c, n);
  }else{
    dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData);
  }
}
static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,
                      int iStartOffset, int iEndOffset){
  plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);
}

static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){
  DLCollector *pCollector = malloc(sizeof(DLCollector));
  dataBufferInit(&pCollector->b, 0);
  dlwInit(&pCollector->dlw, iType, &pCollector->b);
  plwInit(&pCollector->plw, &pCollector->dlw, iDocid);
  return pCollector;
}
static void dlcDelete(DLCollector *pCollector){
  plwDestroy(&pCollector->plw);
  dlwDestroy(&pCollector->dlw);
  dataBufferDestroy(&pCollector->b);
  SCRAMBLE(pCollector);
  free(pCollector);
}


/* Copy the doclist data of iType in pData/nData into *out, trimming
** unnecessary data as we go.  Only columns matching iColumn are
** copied, all columns copied if iColumn is -1.  Elements with no
** matching columns are dropped.  The output is an iOutType doclist.
*/
/* NOTE(shess) This code is only valid after all doclists are merged.
** If this is run before merges, then doclist items which represent
** deletion will be trimmed, and will thus not effect a deletion
** during the merge.
*/
static void docListTrim(DocListType iType, const char *pData, int nData,
                        int iColumn, DocListType iOutType, DataBuffer *out){
  DLReader dlReader;
  DLWriter dlWriter;


  assert( iOutType<=iType );

  dlrInit(&dlReader, iType, pData, nData);
  dlwInit(&dlWriter, iOutType, out);


  while( !dlrAtEnd(&dlReader) ){
    PLReader plReader;
    PLWriter plWriter;
    int match = 0;

    plrInit(&plReader, &dlReader);



    while( !plrAtEnd(&plReader) ){
      if( iColumn==-1 || plrColumn(&plReader)==iColumn ){
        if( !match ){
          plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader));
          match = 1;
        }
        plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),
               plrStartOffset(&plReader), plrEndOffset(&plReader));
      }
      plrStep(&plReader);
    }
    if( match ){
      plwTerminate(&plWriter);
      plwDestroy(&plWriter);
    }

    plrDestroy(&plReader);
    dlrStep(&dlReader);
  }

  dlwDestroy(&dlWriter);
  dlrDestroy(&dlReader);
}

/* Used by docListMerge() to keep doclists in the ascending order by
** docid, then ascending order by age (so the newest comes first).
*/
................................................................................
  PLReader left, right;
  PLWriter writer;
  int match = 0;

  assert( dlrDocid(pLeft)==dlrDocid(pRight) );
  assert( pOut->iType!=DL_POSITIONS_OFFSETS );

  plrInit(&left, pLeft);
  plrInit(&right, pRight);


  while( !plrAtEnd(&left) && !plrAtEnd(&right) ){
    if( plrColumn(&left)<plrColumn(&right) ){
      plrStep(&left);
    }else if( plrColumn(&left)>plrColumn(&right) ){
      plrStep(&right);
    }else if( plrPosition(&left)+1<plrPosition(&right) ){
      plrStep(&left);
    }else if( plrPosition(&left)+1>plrPosition(&right) ){
      plrStep(&right);
    }else{
      if( !match ){
        plwInit(&writer, pOut, dlrDocid(pLeft));
        match = 1;
      }
      plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
      plrStep(&left);
      plrStep(&right);
    }
  }

  if( match ){
    plwTerminate(&writer);
    plwDestroy(&writer);
  }



  plrDestroy(&left);
  plrDestroy(&right);

}

/* We have two doclists with positions:  pLeft and pRight.
** Write the phrase intersection of these two doclists into pOut.
**
** A phrase intersection means that two documents only match
** if pLeft.iPos+1==pRight.iPos.
................................................................................

  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
    if( dlrDocid(&left)<dlrDocid(&right) ){
      dlrStep(&left);
    }else if( dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }else{
      dlwAdd(&writer, dlrDocid(&left));
      dlrStep(&left);
      dlrStep(&right);
    }
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
................................................................................

  dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
  dlrInit(&right, DL_DOCIDS, pRight, nRight);
  dlwInit(&writer, DL_DOCIDS, pOut);

  while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
    if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
      dlwAdd(&writer, dlrDocid(&left));
      dlrStep(&left);
    }else if( dlrAtEnd(&left) || dlrDocid(&right)<dlrDocid(&left) ){
      dlwAdd(&writer, dlrDocid(&right));
      dlrStep(&right);
    }else{
      dlwAdd(&writer, dlrDocid(&left));
      dlrStep(&left);
      dlrStep(&right);
    }
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
................................................................................
  dlwInit(&writer, DL_DOCIDS, pOut);

  while( !dlrAtEnd(&left) ){
    while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){
      dlrStep(&right);
    }
    if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
      dlwAdd(&writer, dlrDocid(&left));
    }
    dlrStep(&left);
  }

  dlrDestroy(&left);
  dlrDestroy(&right);
  dlwDestroy(&writer);