|
| 1 | +use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM}; |
| 2 | +use common::BitSet; |
| 3 | +use rand::rngs::StdRng; |
| 4 | +use rand::{Rng, SeedableRng}; |
| 5 | +use tantivy::postings::BlockSegmentPostings; |
| 6 | +use tantivy::schema::*; |
| 7 | +use tantivy::{doc, DocSet as _, Index, InvertedIndexReader as _, TantivyDocument}; |
| 8 | + |
| 9 | +#[global_allocator] |
| 10 | +pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM; |
| 11 | + |
| 12 | +fn main() { |
| 13 | + let index = build_test_index(); |
| 14 | + let reader = index.reader().unwrap(); |
| 15 | + let searcher = reader.searcher(); |
| 16 | + let segment_reader = &searcher.segment_readers()[0]; |
| 17 | + let text_field = index.schema().get_field("text").unwrap(); |
| 18 | + let inverted_index = segment_reader.inverted_index(text_field).unwrap(); |
| 19 | + let max_doc = segment_reader.max_doc(); |
| 20 | + |
| 21 | + let term = Term::from_field_text(text_field, "hello"); |
| 22 | + let term_info = inverted_index.get_term_info(&term).unwrap().unwrap(); |
| 23 | + |
| 24 | + let mut runner = BenchRunner::new(); |
| 25 | + runner.set_name("fill_bitset"); |
| 26 | + |
| 27 | + let mut group = runner.new_group(); |
| 28 | + { |
| 29 | + let inverted_index = &inverted_index; |
| 30 | + let term_info = &term_info; |
| 31 | + // This is the path used by queries (AutomatonWeight, RangeQuery, etc.) |
| 32 | + // It dispatches via DynInvertedIndexReader::fill_bitset_from_terminfo. |
| 33 | + group.register("fill_bitset_from_terminfo (via trait)", move |_| { |
| 34 | + let mut bitset = BitSet::with_max_value(max_doc); |
| 35 | + inverted_index |
| 36 | + .fill_bitset_from_terminfo(term_info, &mut bitset) |
| 37 | + .unwrap(); |
| 38 | + black_box(bitset); |
| 39 | + }); |
| 40 | + } |
| 41 | + { |
| 42 | + let inverted_index = &inverted_index; |
| 43 | + let term_info = &term_info; |
| 44 | + // This constructs a SegmentPostings via read_docset_from_terminfo and calls fill_bitset. |
| 45 | + group.register("read_docset + fill_bitset", move |_| { |
| 46 | + let mut postings = inverted_index.read_docset_from_terminfo(term_info).unwrap(); |
| 47 | + let mut bitset = BitSet::with_max_value(max_doc); |
| 48 | + postings.fill_bitset(&mut bitset); |
| 49 | + black_box(bitset); |
| 50 | + }); |
| 51 | + } |
| 52 | + { |
| 53 | + let inverted_index = &inverted_index; |
| 54 | + let term_info = &term_info; |
| 55 | + // This uses BlockSegmentPostings directly, bypassing SegmentPostings entirely. |
| 56 | + group.register("BlockSegmentPostings direct", move |_| { |
| 57 | + let raw = inverted_index |
| 58 | + .read_raw_postings_data(term_info, IndexRecordOption::Basic) |
| 59 | + .unwrap(); |
| 60 | + let mut block_postings = BlockSegmentPostings::open( |
| 61 | + term_info.doc_freq, |
| 62 | + raw.postings_data, |
| 63 | + raw.record_option, |
| 64 | + raw.effective_option, |
| 65 | + ) |
| 66 | + .unwrap(); |
| 67 | + let mut bitset = BitSet::with_max_value(max_doc); |
| 68 | + loop { |
| 69 | + let docs = block_postings.docs(); |
| 70 | + if docs.is_empty() { |
| 71 | + break; |
| 72 | + } |
| 73 | + for &doc in docs { |
| 74 | + bitset.insert(doc); |
| 75 | + } |
| 76 | + block_postings.advance(); |
| 77 | + } |
| 78 | + black_box(bitset); |
| 79 | + }); |
| 80 | + } |
| 81 | + group.run(); |
| 82 | +} |
| 83 | + |
| 84 | +fn build_test_index() -> Index { |
| 85 | + let mut schema_builder = Schema::builder(); |
| 86 | + schema_builder.add_text_field("text", TEXT); |
| 87 | + let schema = schema_builder.build(); |
| 88 | + let index = Index::create_in_ram(schema.clone()); |
| 89 | + let text_field = schema.get_field("text").unwrap(); |
| 90 | + |
| 91 | + let mut writer = index.writer::<TantivyDocument>(250_000_000).unwrap(); |
| 92 | + let mut rng = StdRng::from_seed([42u8; 32]); |
| 93 | + for _ in 0..100_000 { |
| 94 | + if rng.random_bool(0.5) { |
| 95 | + writer |
| 96 | + .add_document(doc!(text_field => "hello world")) |
| 97 | + .unwrap(); |
| 98 | + } else { |
| 99 | + writer |
| 100 | + .add_document(doc!(text_field => "goodbye world")) |
| 101 | + .unwrap(); |
| 102 | + } |
| 103 | + } |
| 104 | + writer.commit().unwrap(); |
| 105 | + index |
| 106 | +} |
0 commit comments