@@ -11,7 +11,7 @@ use crate::positions::PositionSerializer;
1111use crate :: postings:: compression:: { BlockEncoder , VIntEncoder , COMPRESSION_BLOCK_SIZE } ;
1212use crate :: postings:: skip:: SkipSerializer ;
1313use crate :: query:: Bm25Weight ;
14- use crate :: schema:: { Field , FieldEntry , FieldType , IndexRecordOption , Schema } ;
14+ use crate :: schema:: { Field , FieldEntry , IndexRecordOption , Schema } ;
1515use crate :: termdict:: TermDictionaryBuilder ;
1616use crate :: { DocId , Score } ;
1717
@@ -80,9 +80,12 @@ impl InvertedIndexSerializer {
8080 let term_dictionary_write = self . terms_write . for_field ( field) ;
8181 let postings_write = self . postings_write . for_field ( field) ;
8282 let positions_write = self . positions_write . for_field ( field) ;
83- let field_type: FieldType = ( * field_entry. field_type ( ) ) . clone ( ) ;
83+ let index_record_option = field_entry
84+ . field_type ( )
85+ . index_record_option ( )
86+ . unwrap_or ( IndexRecordOption :: Basic ) ;
8487 FieldSerializer :: create (
85- & field_type ,
88+ index_record_option ,
8689 total_num_tokens,
8790 term_dictionary_write,
8891 postings_write,
@@ -102,29 +105,27 @@ impl InvertedIndexSerializer {
102105
103106/// The field serializer is in charge of
104107/// the serialization of a specific field.
105- pub struct FieldSerializer < ' a > {
106- term_dictionary_builder : TermDictionaryBuilder < & ' a mut CountingWriter < WritePtr > > ,
108+ pub struct FieldSerializer < ' a , W : Write = WritePtr > {
109+ term_dictionary_builder : TermDictionaryBuilder < & ' a mut CountingWriter < W > > ,
107110 postings_serializer : PostingsSerializer ,
108- positions_serializer_opt : Option < PositionSerializer < & ' a mut CountingWriter < WritePtr > > > ,
111+ positions_serializer_opt : Option < PositionSerializer < & ' a mut CountingWriter < W > > > ,
109112 current_term_info : TermInfo ,
110113 term_open : bool ,
111- postings_write : & ' a mut CountingWriter < WritePtr > ,
114+ postings_write : & ' a mut CountingWriter < W > ,
112115 postings_start_offset : u64 ,
113116}
114117
115- impl < ' a > FieldSerializer < ' a > {
116- fn create (
117- field_type : & FieldType ,
118+ impl < ' a , W : Write > FieldSerializer < ' a , W > {
119+ /// Creates a new `FieldSerializer` for the given field type.
120+ pub fn create (
121+ index_record_option : IndexRecordOption ,
118122 total_num_tokens : u64 ,
119- term_dictionary_write : & ' a mut CountingWriter < WritePtr > ,
120- postings_write : & ' a mut CountingWriter < WritePtr > ,
121- positions_write : & ' a mut CountingWriter < WritePtr > ,
123+ term_dictionary_write : & ' a mut CountingWriter < W > ,
124+ postings_write : & ' a mut CountingWriter < W > ,
125+ positions_write : & ' a mut CountingWriter < W > ,
122126 fieldnorm_reader : Option < FieldNormReader > ,
123- ) -> io:: Result < FieldSerializer < ' a > > {
127+ ) -> io:: Result < FieldSerializer < ' a , W > > {
124128 total_num_tokens. serialize ( postings_write) ?;
125- let index_record_option = field_type
126- . index_record_option ( )
127- . unwrap_or ( IndexRecordOption :: Basic ) ;
128129 let term_dictionary_builder = TermDictionaryBuilder :: create ( term_dictionary_write) ?;
129130 let average_fieldnorm = fieldnorm_reader
130131 . as_ref ( )
@@ -192,6 +193,11 @@ impl<'a> FieldSerializer<'a> {
192193 Ok ( ( ) )
193194 }
194195
196+ /// Starts the postings for a new term without recording term frequencies.
197+ pub fn new_term_without_freq ( & mut self , term : & [ u8 ] ) -> io:: Result < ( ) > {
198+ self . new_term ( term, 0 , false )
199+ }
200+
195201 /// Serialize the information that a document contains for the current term:
196202 /// its term frequency, and the position deltas.
197203 ///
@@ -297,6 +303,7 @@ impl Block {
297303 }
298304}
299305
306+ /// Serializer for postings lists.
300307pub struct PostingsSerializer {
301308 last_doc_id_encoded : u32 ,
302309
@@ -316,6 +323,9 @@ pub struct PostingsSerializer {
316323}
317324
318325impl PostingsSerializer {
326+ /// Creates a new `PostingsSerializer`.
327+ /// * avg_fieldnorm - average field norm for the field being serialized.
328+ /// * mode - indexing options for the field being serialized.
319329 pub fn new (
320330 avg_fieldnorm : Score ,
321331 mode : IndexRecordOption ,
@@ -338,6 +348,8 @@ impl PostingsSerializer {
338348 }
339349 }
340350
351+ /// Starts the serialization for a new term.
352+ /// * term_doc_freq - the number of documents containing the term.
341353 pub fn new_term ( & mut self , term_doc_freq : u32 , record_term_freq : bool ) {
342354 self . bm25_weight = None ;
343355
@@ -377,6 +389,7 @@ impl PostingsSerializer {
377389 self . postings_write . extend ( block_encoded) ;
378390 }
379391 if self . term_has_freq {
392+ // encode the term frequencies
380393 let ( num_bits, block_encoded) : ( u8 , & [ u8 ] ) = self
381394 . block_encoder
382395 . compress_block_unsorted ( self . block . term_freqs ( ) , true ) ;
@@ -417,13 +430,17 @@ impl PostingsSerializer {
417430 self . block . clear ( ) ;
418431 }
419432
433+ /// Register that the given document contains the current term.
434+ /// * doc_id - the document id.
435+ /// * term_freq - the term frequency within the document.
420436 pub fn write_doc ( & mut self , doc_id : DocId , term_freq : u32 ) {
421437 self . block . append_doc ( doc_id, term_freq) ;
422438 if self . block . is_full ( ) {
423439 self . write_block ( ) ;
424440 }
425441 }
426442
443+ /// Finish the serialization for this term.
427444 pub fn close_term (
428445 & mut self ,
429446 doc_freq : u32 ,
0 commit comments