Skip to content

Commit 3b6489a

Browse files
authored
Merge pull request #710 from Amanieu/fix-rehash-unwind
Fix incorrect length if a hasher panics during rehash
2 parents b1c4403 + 375087f commit 3b6489a

2 files changed

Lines changed: 167 additions & 6 deletions

File tree

src/raw.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2996,14 +2996,17 @@ impl RawTableInner {
29962996
}
29972997

29982998
let mut guard = guard(self, move |self_| {
2999-
if let Some(drop) = drop {
3000-
for i in 0..self_.num_buckets() {
3001-
unsafe {
3002-
if *self_.ctrl(i) == Tag::DELETED {
3003-
self_.set_ctrl(i, Tag::EMPTY);
2999+
for i in 0..self_.num_buckets() {
3000+
unsafe {
3001+
// Any elements that haven't been rehashed yet have a
3002+
// DELETED tag. These need to be dropped and have their tag
3003+
// reset to EMPTY.
3004+
if *self_.ctrl(i) == Tag::DELETED {
3005+
self_.set_ctrl(i, Tag::EMPTY);
3006+
if let Some(drop) = drop {
30043007
drop(self_.bucket_ptr(i, size_of));
3005-
self_.items -= 1;
30063008
}
3009+
self_.items -= 1;
30073010
}
30083011
}
30093012
}

tests/hasher_unwind.rs

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
//! Repro for a caught-panic corruption path in `std::collections::HashMap`.
2+
//!
3+
//! The bug class is: start a multi-step internal transition, let user code
4+
//! panic in the middle, catch the unwind, and keep using the partially updated
5+
//! object.
6+
//!
7+
//! In this case the user-controlled hook is `BuildHasher::build_hasher` during
8+
//! an in-place rehash. The table keeps its logical length after the panic, but
9+
//! lookups can no longer find the original keys and iteration starts yielding
10+
//! repeated garbage-like entries.
11+
12+
use hashbrown::HashMap;
13+
use std::collections::BTreeSet;
14+
use std::{
15+
hash::{BuildHasher, Hash, Hasher},
16+
panic::{AssertUnwindSafe, catch_unwind},
17+
sync::Mutex,
18+
sync::atomic::{AtomicUsize, Ordering},
19+
};
20+
21+
/// One-shot panic switch used to trigger the first `build_hasher` call that
22+
/// occurs inside `reserve(1)`.
23+
static PANIC_COUNTER: AtomicUsize = AtomicUsize::new(0);
24+
static TEST_LOCK: Mutex<()> = Mutex::new(());
25+
26+
/// A deterministic hasher that maps everything to the same bucket group.
27+
///
28+
/// This maximizes collisions and makes the in-place rehash path easy to reach
29+
/// with a small, fixed workload.
30+
#[derive(Default)]
31+
struct ZeroHasher;
32+
33+
impl Hasher for ZeroHasher {
34+
fn finish(&self) -> u64 {
35+
0
36+
}
37+
38+
fn write(&mut self, _bytes: &[u8]) {}
39+
}
40+
41+
/// `BuildHasher` that panics once when armed.
42+
///
43+
/// Using a panicking build hook mirrors the upstream interner trigger more
44+
/// closely than a panicking `Hash` impl.
45+
#[derive(Clone, Default)]
46+
struct PanicBuildHasher;
47+
48+
impl BuildHasher for PanicBuildHasher {
49+
type Hasher = ZeroHasher;
50+
51+
fn build_hasher(&self) -> Self::Hasher {
52+
if PANIC_COUNTER.fetch_sub(1, Ordering::SeqCst) == 0 {
53+
panic!("panic in BuildHasher::build_hasher");
54+
}
55+
ZeroHasher
56+
}
57+
}
58+
59+
/// Simple integer key type so the test can verify reachability after the panic.
60+
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
61+
struct Key(u64);
62+
63+
type Map = HashMap<Key, u64, PanicBuildHasher>;
64+
65+
/// Fill a map until `len == capacity`.
66+
///
67+
/// With the current toolchain this yields a map with `len == capacity == 224`
68+
/// when constructed from `with_capacity_and_hasher(128, ...)`.
69+
fn make_full_map() -> Map {
70+
PANIC_COUNTER.store(!0, Ordering::SeqCst);
71+
let mut map = HashMap::with_capacity_and_hasher(128, PanicBuildHasher);
72+
for i in 0.. {
73+
map.insert(Key(i), i);
74+
if map.len() == map.capacity() {
75+
return map;
76+
}
77+
}
78+
unreachable!()
79+
}
80+
81+
fn panics_silently(f: impl FnOnce()) -> bool {
82+
let previous_hook = std::panic::take_hook();
83+
std::panic::set_hook(Box::new(|_| {}));
84+
let panicked = catch_unwind(AssertUnwindSafe(f)).is_err();
85+
std::panic::set_hook(previous_hook);
86+
panicked
87+
}
88+
89+
fn hashmap_reserve_survives_panicking_build_hasher_inner(count: usize) {
90+
// Phase 1: fill a colliding table, then carve out the exact tombstone
91+
// pattern that forces `reserve(1)` down the in-place rehash path.
92+
let mut map = make_full_map();
93+
let original_len = map.len();
94+
assert_eq!(
95+
(map.len(), map.capacity()),
96+
(224, 224),
97+
"this minimized workload is tuned for the validated std/hashbrown layout"
98+
);
99+
100+
for i in 1..114 {
101+
assert_eq!(map.remove(&Key(i)), Some(i));
102+
}
103+
assert_eq!(
104+
map.len(),
105+
111,
106+
"setup should leave the expected tombstone pattern"
107+
);
108+
109+
// Phase 2: make `BuildHasher::build_hasher` panic during the rehash, then
110+
// keep using the recovered map.
111+
PANIC_COUNTER.store(count, Ordering::SeqCst);
112+
let reserve_panicked = panics_silently(|| {
113+
map.reserve(1);
114+
});
115+
assert!(
116+
reserve_panicked,
117+
"the minimized workload should panic during the in-place rehash"
118+
);
119+
120+
// Phase 3: a correct table should keep every surviving key reachable and
121+
// should not start yielding duplicate entries.
122+
let mut expected_visible_keys: Vec<_> = map.keys().map(|&Key(i)| i).collect();
123+
let visible_keys: Vec<_> = (0..original_len as u64)
124+
.filter(|&i| map.get(&Key(i)).copied() == Some(i))
125+
.collect();
126+
expected_visible_keys.sort();
127+
let iter_sample: Vec<_> = map.iter().take(8).map(|(k, v)| (k.0, *v)).collect();
128+
let distinct_entries = iter_sample.iter().copied().collect::<BTreeSet<_>>();
129+
130+
assert_eq!(
131+
map.len(),
132+
expected_visible_keys.len(),
133+
"the table length should stay coherent"
134+
);
135+
assert_eq!(
136+
visible_keys, expected_visible_keys,
137+
"the surviving keys should stay reachable after the caught panic"
138+
);
139+
assert_eq!(
140+
distinct_entries.len(),
141+
iter_sample.len(),
142+
"the iterator sample should not contain duplicate entries after the caught panic"
143+
);
144+
}
145+
146+
#[test]
147+
fn hashmap_reserve_survives_panicking_build_hasher() {
148+
let _guard = TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner());
149+
if cfg!(miri) {
150+
for i in [0, 50, 110] {
151+
hashmap_reserve_survives_panicking_build_hasher_inner(i);
152+
}
153+
} else {
154+
for i in 0..111 {
155+
hashmap_reserve_survives_panicking_build_hasher_inner(i);
156+
}
157+
}
158+
}

0 commit comments

Comments
 (0)