Skip to content

Commit d2c1b8b

Browse files
authored
Optimized intersection count using a bitset when the first leg is dense
1 parent a651071 commit d2c1b8b

File tree

5 files changed

+247
-8
lines changed

5 files changed

+247
-8
lines changed

common/src/bitset.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ impl TinySet {
4747
TinySet(val)
4848
}
4949

50+
/// An empty `TinySet` constant.
51+
pub const EMPTY: TinySet = TinySet(0u64);
52+
5053
/// Returns an empty `TinySet`.
5154
#[inline]
5255
pub fn empty() -> TinySet {

src/collector/count_collector.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use super::Collector;
22
use crate::collector::SegmentCollector;
3+
use crate::query::Weight;
34
use crate::{DocId, Score, SegmentOrdinal, SegmentReader};
45

56
/// `CountCollector` collector only counts how many
@@ -55,6 +56,15 @@ impl Collector for Count {
5556
fn merge_fruits(&self, segment_counts: Vec<usize>) -> crate::Result<usize> {
5657
Ok(segment_counts.into_iter().sum())
5758
}
59+
60+
fn collect_segment(
61+
&self,
62+
weight: &dyn Weight,
63+
_segment_ord: u32,
64+
reader: &SegmentReader,
65+
) -> crate::Result<usize> {
66+
Ok(weight.count(reader)? as usize)
67+
}
5868
}
5969

6070
#[derive(Default)]

src/docset.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use std::borrow::{Borrow, BorrowMut};
22

3+
use common::TinySet;
4+
35
use crate::fastfield::AliveBitSet;
46
use crate::DocId;
57

@@ -14,6 +16,12 @@ pub const TERMINATED: DocId = i32::MAX as u32;
1416
/// exactly this size as long as we can fill the buffer.
1517
pub const COLLECT_BLOCK_BUFFER_LEN: usize = 64;
1618

19+
/// Number of `TinySet` (64-bit) buckets in a block used by [`DocSet::fill_bitset_block`].
20+
pub const BLOCK_NUM_TINYBITSETS: usize = 16;
21+
22+
/// Number of doc IDs covered by one block: `BLOCK_NUM_TINYBITSETS * 64 = 1024`.
23+
pub const BLOCK_WINDOW: u32 = BLOCK_NUM_TINYBITSETS as u32 * 64;
24+
1725
/// Represents an iterable set of sorted doc ids.
1826
pub trait DocSet: Send {
1927
/// Goes to the next element.
@@ -160,6 +168,31 @@ pub trait DocSet: Send {
160168
self.size_hint() as u64
161169
}
162170

171+
/// Fills a bitmask representing which documents in `[min_doc, min_doc + BLOCK_WINDOW)` are
172+
/// present in this docset.
173+
///
174+
/// The window is divided into `BLOCK_NUM_TINYBITSETS` buckets of 64 docs each.
175+
/// Returns the next doc `>= min_doc + BLOCK_WINDOW`, or `TERMINATED` if exhausted.
176+
fn fill_bitset_block(
177+
&mut self,
178+
min_doc: DocId,
179+
mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
180+
) -> DocId {
181+
self.seek(min_doc);
182+
let horizon = min_doc + BLOCK_WINDOW;
183+
loop {
184+
let doc = self.doc();
185+
if doc >= horizon {
186+
return doc;
187+
}
188+
let delta = doc - min_doc;
189+
mask[(delta / 64) as usize].insert_mut(delta % 64);
190+
if self.advance() == TERMINATED {
191+
return TERMINATED;
192+
}
193+
}
194+
}
195+
163196
/// Returns the number documents matching.
164197
/// Calling this method consumes the `DocSet`.
165198
fn count(&mut self, alive_bitset: &AliveBitSet) -> u32 {
@@ -214,6 +247,18 @@ impl DocSet for &mut dyn DocSet {
214247
(**self).seek_danger(target)
215248
}
216249

250+
fn fill_buffer(&mut self, buffer: &mut [DocId; COLLECT_BLOCK_BUFFER_LEN]) -> usize {
251+
(**self).fill_buffer(buffer)
252+
}
253+
254+
fn fill_bitset_block(
255+
&mut self,
256+
min_doc: DocId,
257+
mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
258+
) -> DocId {
259+
(**self).fill_bitset_block(min_doc, mask)
260+
}
261+
217262
fn doc(&self) -> u32 {
218263
(**self).doc()
219264
}
@@ -256,6 +301,15 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
256301
unboxed.fill_buffer(buffer)
257302
}
258303

304+
fn fill_bitset_block(
305+
&mut self,
306+
min_doc: DocId,
307+
mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
308+
) -> DocId {
309+
let unboxed: &mut TDocSet = self.borrow_mut();
310+
unboxed.fill_bitset_block(min_doc, mask)
311+
}
312+
259313
fn doc(&self) -> DocId {
260314
let unboxed: &TDocSet = self.borrow();
261315
unboxed.doc()

src/query/intersection.rs

Lines changed: 174 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
use common::TinySet;
2+
13
use super::size_hint::estimate_intersection;
2-
use crate::docset::{DocSet, SeekDangerResult, TERMINATED};
4+
use crate::docset::{DocSet, SeekDangerResult, BLOCK_NUM_TINYBITSETS, TERMINATED};
35
use crate::query::term_query::TermScorer;
46
use crate::query::{EmptyScorer, Scorer};
57
use crate::{DocId, Score};
@@ -17,7 +19,7 @@ use crate::{DocId, Score};
1719
/// `size_hint` of the intersection.
1820
pub fn intersect_scorers(
1921
mut scorers: Vec<Box<dyn Scorer>>,
20-
num_docs_segment: u32,
22+
segment_num_docs: u32,
2123
) -> Box<dyn Scorer> {
2224
if scorers.is_empty() {
2325
return Box::new(EmptyScorer);
@@ -42,14 +44,14 @@ pub fn intersect_scorers(
4244
left: *(left.downcast::<TermScorer>().map_err(|_| ()).unwrap()),
4345
right: *(right.downcast::<TermScorer>().map_err(|_| ()).unwrap()),
4446
others: scorers,
45-
num_docs: num_docs_segment,
47+
segment_num_docs,
4648
});
4749
}
4850
Box::new(Intersection {
4951
left,
5052
right,
5153
others: scorers,
52-
num_docs: num_docs_segment,
54+
segment_num_docs,
5355
})
5456
}
5557

@@ -58,7 +60,7 @@ pub struct Intersection<TDocSet: DocSet, TOtherDocSet: DocSet = Box<dyn Scorer>>
5860
left: TDocSet,
5961
right: TDocSet,
6062
others: Vec<TOtherDocSet>,
61-
num_docs: u32,
63+
segment_num_docs: u32,
6264
}
6365

6466
fn go_to_first_doc<TDocSet: DocSet>(docsets: &mut [TDocSet]) -> DocId {
@@ -78,7 +80,10 @@ fn go_to_first_doc<TDocSet: DocSet>(docsets: &mut [TDocSet]) -> DocId {
7880

7981
impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
8082
/// num_docs is the number of documents in the segment.
81-
pub(crate) fn new(mut docsets: Vec<TDocSet>, num_docs: u32) -> Intersection<TDocSet, TDocSet> {
83+
pub(crate) fn new(
84+
mut docsets: Vec<TDocSet>,
85+
segment_num_docs: u32,
86+
) -> Intersection<TDocSet, TDocSet> {
8287
let num_docsets = docsets.len();
8388
assert!(num_docsets >= 2);
8489
docsets.sort_by_key(|docset| docset.cost());
@@ -97,7 +102,7 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
97102
left,
98103
right,
99104
others: docsets,
100-
num_docs,
105+
segment_num_docs,
101106
}
102107
}
103108
}
@@ -214,7 +219,7 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
214219
[self.left.size_hint(), self.right.size_hint()]
215220
.into_iter()
216221
.chain(self.others.iter().map(DocSet::size_hint)),
217-
self.num_docs,
222+
self.segment_num_docs,
218223
)
219224
}
220225

@@ -224,6 +229,91 @@ impl<TDocSet: DocSet, TOtherDocSet: DocSet> DocSet for Intersection<TDocSet, TOt
224229
// If there are docsets that are bad at skipping, they should also influence the cost.
225230
self.left.cost()
226231
}
232+
233+
fn count_including_deleted(&mut self) -> u32 {
234+
const DENSITY_THRESHOLD_INVERSE: u32 = 32;
235+
if self
236+
.left
237+
.size_hint()
238+
.saturating_mul(DENSITY_THRESHOLD_INVERSE)
239+
< self.segment_num_docs
240+
{
241+
// Sparse path: if the lead iterator covers less than ~3% of docs,
242+
// the block approach wastes time on mostly-empty blocks.
243+
self.count_including_deleted_sparse()
244+
} else {
245+
// Dense approach. We push documents into a block bitset to then
246+
// perform count using popcount.
247+
self.count_including_deleted_dense()
248+
}
249+
}
250+
}
251+
252+
const EMPTY_BLOCK: [TinySet; BLOCK_NUM_TINYBITSETS] = [TinySet::EMPTY; BLOCK_NUM_TINYBITSETS];
253+
254+
/// ANDs `other` into `mask` in-place. Returns `true` if the result is all zeros.
255+
#[inline]
256+
fn and_blocks_and_return_is_empty(
257+
mask: &mut [TinySet; BLOCK_NUM_TINYBITSETS],
258+
update: &[TinySet; BLOCK_NUM_TINYBITSETS],
259+
) -> bool {
260+
let mut all_empty = true;
261+
for (mask_tinyset, update_tinyset) in mask.iter_mut().zip(update.iter()) {
262+
*mask_tinyset = mask_tinyset.intersect(*update_tinyset);
263+
all_empty &= mask_tinyset.is_empty();
264+
}
265+
all_empty
266+
}
267+
268+
impl<TDocSet: DocSet, TOtherDocSet: DocSet> Intersection<TDocSet, TOtherDocSet> {
269+
fn count_including_deleted_sparse(&mut self) -> u32 {
270+
let mut count = 0u32;
271+
let mut doc = self.doc();
272+
while doc != TERMINATED {
273+
count += 1;
274+
doc = self.advance();
275+
}
276+
count
277+
}
278+
279+
/// Dense block-wise bitmask intersection count.
280+
///
281+
/// Fills a 1024-doc window from each iterator, ANDs the bitmasks together,
282+
/// and popcounts the result. `fill_bitset_block` handles seeking tails forward
283+
/// when they lag behind the current block.
284+
fn count_including_deleted_dense(&mut self) -> u32 {
285+
let mut count = 0u32;
286+
let mut next_base = self.left.doc();
287+
288+
while next_base < TERMINATED {
289+
let base = next_base;
290+
291+
// Fill lead bitmask.
292+
let mut mask = EMPTY_BLOCK;
293+
next_base = next_base.max(self.left.fill_bitset_block(base, &mut mask));
294+
295+
let mut tail_mask = EMPTY_BLOCK;
296+
next_base = next_base.max(self.right.fill_bitset_block(base, &mut tail_mask));
297+
298+
if and_blocks_and_return_is_empty(&mut mask, &tail_mask) {
299+
continue;
300+
}
301+
// AND with each additional tail.
302+
for other in &mut self.others {
303+
let mut other_mask = EMPTY_BLOCK;
304+
next_base = next_base.max(other.fill_bitset_block(base, &mut other_mask));
305+
if and_blocks_and_return_is_empty(&mut mask, &other_mask) {
306+
continue;
307+
}
308+
}
309+
310+
for tinyset in &mask {
311+
count += tinyset.len();
312+
}
313+
}
314+
315+
count
316+
}
227317
}
228318

229319
impl<TScorer, TOtherScorer> Scorer for Intersection<TScorer, TOtherScorer>
@@ -421,6 +511,82 @@ mod tests {
421511
}
422512
}
423513

514+
proptest! {
515+
#[test]
516+
fn prop_test_count_including_deleted_matches_default(
517+
a in sorted_deduped_vec(1200, 400),
518+
b in sorted_deduped_vec(1200, 400),
519+
c in sorted_deduped_vec(1200, 400),
520+
num_docs in 1200u32..2000u32,
521+
) {
522+
// Compute expected count via set intersection.
523+
let expected: u32 = a.iter()
524+
.filter(|doc| b.contains(doc) && c.contains(doc))
525+
.count() as u32;
526+
527+
// Test count_including_deleted (dense path).
528+
let make_intersection = || {
529+
Intersection::new(
530+
vec![
531+
VecDocSet::from(a.clone()),
532+
VecDocSet::from(b.clone()),
533+
VecDocSet::from(c.clone()),
534+
],
535+
num_docs,
536+
)
537+
};
538+
539+
let mut intersection = make_intersection();
540+
let count = intersection.count_including_deleted();
541+
prop_assert_eq!(count, expected,
542+
"count_including_deleted mismatch: a={:?}, b={:?}, c={:?}", a, b, c);
543+
}
544+
}
545+
546+
#[test]
547+
fn test_count_including_deleted_two_way() {
548+
let left = VecDocSet::from(vec![1, 3, 9]);
549+
let right = VecDocSet::from(vec![3, 4, 9, 18]);
550+
let mut intersection = Intersection::new(vec![left, right], 100);
551+
assert_eq!(intersection.count_including_deleted(), 2);
552+
}
553+
554+
#[test]
555+
fn test_count_including_deleted_empty() {
556+
let a = VecDocSet::from(vec![1, 3]);
557+
let b = VecDocSet::from(vec![1, 4]);
558+
let c = VecDocSet::from(vec![3, 9]);
559+
let mut intersection = Intersection::new(vec![a, b, c], 100);
560+
assert_eq!(intersection.count_including_deleted(), 0);
561+
}
562+
563+
/// Test with enough documents to exercise the dense path (>= num_docs/32).
564+
#[test]
565+
fn test_count_including_deleted_dense_path() {
566+
// Create dense docsets: many docs relative to segment size.
567+
let docs_a: Vec<u32> = (0..2000).step_by(2).collect(); // even numbers 0..2000
568+
let docs_b: Vec<u32> = (0..2000).step_by(3).collect(); // multiples of 3
569+
let expected = docs_a.iter().filter(|d| *d % 3 == 0).count() as u32;
570+
571+
let a = VecDocSet::from(docs_a);
572+
let b = VecDocSet::from(docs_b);
573+
let mut intersection = Intersection::new(vec![a, b], 2000);
574+
assert_eq!(intersection.count_including_deleted(), expected);
575+
}
576+
577+
/// Test that spans multiple blocks (>1024 docs).
578+
#[test]
579+
fn test_count_including_deleted_multi_block() {
580+
let docs_a: Vec<u32> = (0..5000).collect();
581+
let docs_b: Vec<u32> = (0..5000).step_by(7).collect();
582+
let expected = docs_b.len() as u32; // all of b is in a
583+
584+
let a = VecDocSet::from(docs_a);
585+
let b = VecDocSet::from(docs_b);
586+
let mut intersection = Intersection::new(vec![a, b], 5000);
587+
assert_eq!(intersection.count_including_deleted(), expected);
588+
}
589+
424590
#[test]
425591
fn test_bug_2811_intersection_candidate_should_increase() {
426592
let mut schema_builder = Schema::builder();

src/query/term_query/term_scorer.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ impl DocSet for TermScorer {
117117
fn size_hint(&self) -> u32 {
118118
self.postings.size_hint()
119119
}
120+
121+
// TODO
122+
// It is probably possible to optimize fill_bitset_block for TermScorer,
123+
// working directly with the blocks, enabling vectorization.
124+
// I did not manage to get a performance improvement on Mac ARM,
125+
// and do not have access to x86 to investigate.
120126
}
121127

122128
impl Scorer for TermScorer {

0 commit comments

Comments
 (0)