|
| 1 | +//! Type-to-jump: highest-scoring fuzzy filename match within a cached listing. |
| 2 | +//! |
| 3 | +//! Powers the in-directory navigation feature where the user types a few characters |
| 4 | +//! in a focused file pane and the cursor jumps to the best-matching entry. |
| 5 | +//! |
| 6 | +//! ## Crate choice — why `nucleo-matcher` |
| 7 | +//! |
| 8 | +//! Picked `nucleo-matcher = "0.3.1"` (Helix editor's matcher, also used by Zellij). |
| 9 | +//! Pros: microsecond-scale per-match cost, smart-case behavior (lowercase query = |
| 10 | +//! case-insensitive, uppercase letter in the query opts into case-sensitive matching |
| 11 | +//! for that character), Unicode normalization, MIT-style scoring that prefers prefix / |
| 12 | +//! word-boundary matches. Pinned at 0.3.1 (published 2024-02-20, comfortably older |
| 13 | +//! than the 1-month minimum). License is MPL-2.0, which is allowed by `deny.toml`. |
| 14 | +//! The crate is small (~3 kLOC) and has no async runtime / heavy transitive deps. |
| 15 | +//! |
| 16 | +//! `sublime_fuzzy` (MIT) was the documented fallback if `nucleo-matcher` failed |
| 17 | +//! license / `cargo deny` review. That didn't happen, so we shipped nucleo-matcher. |
| 18 | +//! |
| 19 | +//! ## Why a separate module |
| 20 | +//! |
| 21 | +//! `find_first_match` is a pure function over `&[FileEntry]` — no `LISTING_CACHE` |
| 22 | +//! lock, no `tokio`. That makes it trivial to unit-test against in-memory fixtures |
| 23 | +//! and keeps the Tauri command layer (`commands/file_system/listing.rs`) a thin |
| 24 | +//! pass-through that just grabs the read lock and delegates here. |
| 25 | +
|
| 26 | +use std::time::Instant; |
| 27 | + |
| 28 | +use nucleo_matcher::{ |
| 29 | + Config, Matcher, Utf32Str, |
| 30 | + pattern::{CaseMatching, Normalization, Pattern}, |
| 31 | +}; |
| 32 | + |
| 33 | +use crate::file_system::listing::caching::LISTING_CACHE; |
| 34 | +use crate::file_system::listing::metadata::FileEntry; |
| 35 | + |
| 36 | +/// Returns the **visible-space** index of the highest-scoring fuzzy match for `query`, |
| 37 | +/// or `None` if no entry matches. |
| 38 | +/// |
| 39 | +/// Rules: |
| 40 | +/// - When `include_hidden` is `false`, dotfiles (`name.starts_with('.')`) are skipped. |
| 41 | +/// - The match runs against the whole filename (including extension) — fuzzy scoring |
| 42 | +/// already rewards prefix and word-boundary matches, so we don't split on the dot. |
| 43 | +/// - Smart-case: an all-lowercase query matches case-insensitively; any uppercase |
| 44 | +/// character makes that character case-sensitive (delegated to nucleo-matcher). |
| 45 | +/// - Ties (equal score) resolve to the lower index, which matches the listing's |
| 46 | +/// active sort order. |
| 47 | +/// - Empty query → `None`. Empty listing → `None`. |
| 48 | +/// - The synthetic `..` parent entry is **not** in `LISTING_CACHE` (it's prepended |
| 49 | +/// by the frontend), so there's no special case for it here. |
| 50 | +/// |
| 51 | +/// ## Index space |
| 52 | +/// |
| 53 | +/// The returned index counts entries in the **visible** sequence — the same |
| 54 | +/// sequence `operations::get_file_at` / `get_file_range` produce when called |
| 55 | +/// with the same `include_hidden` flag. When `include_hidden` is `false` and |
| 56 | +/// the entries vec contains hidden files before the match, the returned index |
| 57 | +/// will be **smaller** than the absolute vec position. The frontend uses this |
| 58 | +/// directly as a cursor index (plus the `+1` parent-entry offset when |
| 59 | +/// `hasParent`), so the indexing space must line up with `getFileAt` / |
| 60 | +/// `getFileRange`. |
| 61 | +pub fn find_first_match(entries: &[FileEntry], query: &str, include_hidden: bool) -> Option<usize> { |
| 62 | + if query.is_empty() || entries.is_empty() { |
| 63 | + return None; |
| 64 | + } |
| 65 | + |
| 66 | + let mut matcher = Matcher::new(Config::DEFAULT); |
| 67 | + let pattern = Pattern::parse(query, CaseMatching::Smart, Normalization::Smart); |
| 68 | + |
| 69 | + let mut best: Option<(usize, u32)> = None; |
| 70 | + let mut haystack_buf: Vec<char> = Vec::new(); |
| 71 | + |
| 72 | + // Iterate the visible sequence directly so the returned index matches the |
| 73 | + // cursor space used by `getFileAt` / `getFileRange` (which iterate via |
| 74 | + // `visible_entries(...).nth(index)` in `operations.rs`). |
| 75 | + let visible = entries.iter().filter(|e| include_hidden || !e.name.starts_with('.')); |
| 76 | + |
| 77 | + for (visible_idx, entry) in visible.enumerate() { |
| 78 | + let haystack = Utf32Str::new(&entry.name, &mut haystack_buf); |
| 79 | + let Some(score) = pattern.score(haystack, &mut matcher) else { |
| 80 | + continue; |
| 81 | + }; |
| 82 | + |
| 83 | + // Strictly greater so ties resolve to the lower index (the first match wins). |
| 84 | + match best { |
| 85 | + Some((_, best_score)) if score <= best_score => {} |
| 86 | + _ => best = Some((visible_idx, score)), |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + best.map(|(idx, _)| idx) |
| 91 | +} |
| 92 | + |
| 93 | +/// Convenience wrapper that grabs the `LISTING_CACHE` read lock, runs |
| 94 | +/// `find_first_match`, and emits a single `type_to_jump` debug log line with |
| 95 | +/// the per-call timing. The Tauri command in `commands::file_system::listing` |
| 96 | +/// is a thin async pass-through over this. |
| 97 | +pub fn fuzzy_find_first_match_in_listing( |
| 98 | + listing_id: &str, |
| 99 | + query: &str, |
| 100 | + include_hidden: bool, |
| 101 | +) -> Result<Option<usize>, String> { |
| 102 | + let started = Instant::now(); |
| 103 | + let cache = LISTING_CACHE |
| 104 | + .read() |
| 105 | + .map_err(|_| "Failed to acquire cache lock".to_string())?; |
| 106 | + |
| 107 | + let listing = cache |
| 108 | + .get(listing_id) |
| 109 | + .ok_or_else(|| format!("Listing not found: {}", listing_id))?; |
| 110 | + |
| 111 | + let result = find_first_match(&listing.entries, query, include_hidden); |
| 112 | + let elapsed_us = started.elapsed().as_micros(); |
| 113 | + log::debug!( |
| 114 | + target: "type_to_jump", |
| 115 | + "listing_id={} query_len={} include_hidden={} result_index={:?} elapsed_us={}", |
| 116 | + listing_id, |
| 117 | + query.chars().count(), |
| 118 | + include_hidden, |
| 119 | + result, |
| 120 | + elapsed_us, |
| 121 | + ); |
| 122 | + Ok(result) |
| 123 | +} |
| 124 | + |
| 125 | +#[cfg(test)] |
| 126 | +mod tests { |
| 127 | + use super::*; |
| 128 | + use crate::file_system::listing::metadata::FileEntry; |
| 129 | + |
| 130 | + fn entry(name: &str) -> FileEntry { |
| 131 | + FileEntry::new(name.to_string(), format!("/{}", name), false, false) |
| 132 | + } |
| 133 | + |
| 134 | + #[test] |
| 135 | + fn empty_listing_returns_none() { |
| 136 | + let entries: Vec<FileEntry> = Vec::new(); |
| 137 | + assert_eq!(find_first_match(&entries, "abc", true), None); |
| 138 | + } |
| 139 | + |
| 140 | + #[test] |
| 141 | + fn empty_query_returns_none() { |
| 142 | + let entries = vec![entry("README.md"), entry("AGENTS.md")]; |
| 143 | + assert_eq!(find_first_match(&entries, "", true), None); |
| 144 | + } |
| 145 | + |
| 146 | + #[test] |
| 147 | + fn no_matches_returns_none() { |
| 148 | + let entries = vec![entry("README.md"), entry("AGENTS.md")]; |
| 149 | + // "xyz" shares no characters with either name. |
| 150 | + assert_eq!(find_first_match(&entries, "xyz", true), None); |
| 151 | + } |
| 152 | + |
| 153 | + #[test] |
| 154 | + fn single_match_returns_its_index() { |
| 155 | + let entries = vec![entry("README.md"), entry("AGENTS.md"), entry("Cargo.toml")]; |
| 156 | + // Only "Cargo.toml" contains the subsequence "crg" / "cargo". |
| 157 | + let idx = find_first_match(&entries, "cargo", true).expect("should match"); |
| 158 | + assert_eq!(idx, 2); |
| 159 | + } |
| 160 | + |
| 161 | + #[test] |
| 162 | + fn multiple_matches_pick_highest_scored() { |
| 163 | + // "tests" fuzzy-matches both — but "tests.js" is the better (prefix) match |
| 164 | + // than "my_tests_helper.rs" so it should win. |
| 165 | + let entries = vec![entry("my_tests_helper.rs"), entry("tests.js"), entry("other.txt")]; |
| 166 | + let idx = find_first_match(&entries, "tests", true).expect("should match"); |
| 167 | + assert_eq!(idx, 1, "prefix match 'tests.js' should outscore 'my_tests_helper.rs'"); |
| 168 | + } |
| 169 | + |
| 170 | + #[test] |
| 171 | + fn ties_resolve_to_lower_index() { |
| 172 | + // Two identical names → identical scores → lower index wins. |
| 173 | + let entries = vec![entry("hello.txt"), entry("hello.txt")]; |
| 174 | + let idx = find_first_match(&entries, "hello", true).expect("should match"); |
| 175 | + assert_eq!(idx, 0); |
| 176 | + } |
| 177 | + |
| 178 | + #[test] |
| 179 | + fn hidden_entry_excluded_when_include_hidden_false() { |
| 180 | + let entries = vec![entry(".env"), entry("env_setup.sh")]; |
| 181 | + // With hidden excluded, only "env_setup.sh" is a candidate. The dotfile |
| 182 | + // is invisible, so "env_setup.sh" sits at visible-index 0. |
| 183 | + let idx = find_first_match(&entries, "env", false).expect("should match"); |
| 184 | + assert_eq!(idx, 0); |
| 185 | + } |
| 186 | + |
| 187 | + #[test] |
| 188 | + fn hidden_entry_included_when_include_hidden_true() { |
| 189 | + // Deterministic case: two clearly distinct names. The only entry that can |
| 190 | + // match "alpha" is ".alpha.txt" — "zeta.bin" shares no characters with the |
| 191 | + // query. The match must be found AND must land at the dotfile's visible |
| 192 | + // index (0 when hidden is on, since the dotfile is then visible). |
| 193 | + let entries = vec![entry(".alpha.txt"), entry("zeta.bin")]; |
| 194 | + let idx = find_first_match(&entries, "alpha", true).expect("should match"); |
| 195 | + assert_eq!( |
| 196 | + idx, 0, |
| 197 | + "hidden '.alpha.txt' must be considered when include_hidden=true" |
| 198 | + ); |
| 199 | + } |
| 200 | + |
| 201 | + /// Regression test for the visible-space indexing contract. |
| 202 | + /// |
| 203 | + /// Before this fix, `find_first_match` returned the absolute index into the |
| 204 | + /// `entries` vec. With a hidden file sitting before the match in the vec, |
| 205 | + /// the frontend (which uses the index in the visible sequence — same as |
| 206 | + /// `get_file_at` / `get_file_range`) landed one row too far down per |
| 207 | + /// skipped dotfile. This test exercises exactly that scenario. |
| 208 | + #[test] |
| 209 | + fn returns_visible_space_index_when_hidden_precedes_match() { |
| 210 | + // Vec layout: [hidden, hidden, target, other] |
| 211 | + // Absolute indices: 0 1 2 3 |
| 212 | + // Visible indices: - - 0 1 |
| 213 | + let entries = vec![ |
| 214 | + entry(".hidden_a"), |
| 215 | + entry(".hidden_b"), |
| 216 | + entry("target.txt"), |
| 217 | + entry("other.bin"), |
| 218 | + ]; |
| 219 | + |
| 220 | + let idx = find_first_match(&entries, "target", false).expect("should match"); |
| 221 | + // The visible-space index of "target.txt" is 0, not the absolute 2. |
| 222 | + assert_eq!( |
| 223 | + idx, 0, |
| 224 | + "must return visible-space index (0), not absolute vec index (2), so the frontend cursor doesn't skip rows" |
| 225 | + ); |
| 226 | + |
| 227 | + // Sanity check: with include_hidden=true, the same match lands at |
| 228 | + // visible-index 2 because the two dotfiles are now visible too. |
| 229 | + let idx_with_hidden = find_first_match(&entries, "target", true).expect("should match"); |
| 230 | + assert_eq!(idx_with_hidden, 2); |
| 231 | + } |
| 232 | + |
| 233 | + #[test] |
| 234 | + fn case_insensitive_with_lowercase_query() { |
| 235 | + // Lowercase query → smart case → matches against UPPERCASE filename. |
| 236 | + let entries = vec![entry("README.md"), entry("TESTS.txt"), entry("other.bin")]; |
| 237 | + let idx = find_first_match(&entries, "tes", true).expect("should match"); |
| 238 | + assert_eq!(idx, 1); |
| 239 | + } |
| 240 | + |
| 241 | + #[test] |
| 242 | + fn unicode_filename_is_matchable() { |
| 243 | + // Nucleo normalizes Unicode (Normalization::Smart). Typing the ASCII form |
| 244 | + // should still find the accented filename. We document the observed behavior |
| 245 | + // here rather than asserting a strict score — what matters is "some match |
| 246 | + // is found and it's the Résumé entry, not the unrelated one". |
| 247 | + let entries = vec![entry("notes.txt"), entry("Résumé.pdf"), entry("photo.jpg")]; |
| 248 | + let idx = find_first_match(&entries, "resume", true).expect("should match"); |
| 249 | + assert_eq!(idx, 1, "ASCII 'resume' should fold into 'Résumé.pdf'"); |
| 250 | + } |
| 251 | +} |
0 commit comments