Skip to content

Commit b03387e

Browse files
committed
Indexing: Surface scan calibration + live bytes
- Capture each scan's calibration at `start_scan`: read the prior completed scan's totals (`total_entries`, `total_physical_bytes`, `scan_duration_ms`) off the live read connection before the truncate, and fetch the scanned volume's used bytes once via `get_space_info_for_path` wrapped in `block_in_place` (the call does NSURL XPC / `statvfs` I/O; space-info failure degrades to `None`, never blocks the scan). Both stash on a plain `scan_calibration` field (`start_scan` is `&mut self`, `get_status` is `&self`). - Extend `IndexScanStartedEvent` with the static per-scan calibration (`prior_total_entries`, `prior_scan_duration_ms`, `volume_used_bytes`) so the FE's calibrated-vs-rough tier decision is a pure function of one event; the 500 ms progress event stays counter-only. Plain serde, hand-typed on the FE. - Add `bytes_scanned` to `IndexScanProgressEvent`, emitted from the existing 500 ms reporter off the M1 snapshot. - Add `bytes_scanned` + `volume_used_bytes` to `IndexStatusResponse` (so a mid-scan window reload can backfill rough progress) and `total_physical_bytes` to `IndexStatus`'s meta read for symmetry. Regenerated `bindings.ts`. - Re-export `get_space_info_for_path` as `pub(crate)` from `backends/mod.rs` (its module is private; the indexing reuse needs the path). - Extract the snapshot-and-calibration combining into a pure `live_scan_counters` helper so `get_status`'s new fields are unit-testable without an `AppHandle`; pinned by three cases (live bytes + used-bytes denominator, all-zero idle, used-bytes absent when space-info failed).
1 parent f8694ce commit b03387e

6 files changed

Lines changed: 214 additions & 5 deletions

File tree

apps/desktop/src-tauri/src/file_system/volume/backends/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ mod smb_watcher;
1919

2020
pub use in_memory::InMemoryVolume;
2121
pub use local_posix::LocalPosixVolume;
22+
/// Cross-platform volume used-bytes helper (NSURL purgeable-aware on macOS,
23+
/// `statvfs` on Linux). Re-exported so the indexing module can read the scanned
24+
/// volume's used bytes for tier-2 scan progress without re-implementing statfs.
25+
pub(crate) use local_posix::get_space_info_for_path;
2226
#[cfg(any(target_os = "macos", target_os = "linux"))]
2327
pub use mtp::MtpVolume;
2428
#[cfg(any(target_os = "macos", target_os = "linux"))]

apps/desktop/src-tauri/src/indexing/events.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,15 @@ use super::store::IndexStatus;
1414
#[serde(rename_all = "camelCase")]
1515
pub struct IndexScanStartedEvent {
1616
pub volume_id: String,
17+
/// The previous completed scan's final entry count, the tier-1 (calibrated)
18+
/// progress denominator. `None` on a first-ever scan (no prior calibration).
19+
pub prior_total_entries: Option<u64>,
20+
/// The previous completed scan's wall-clock duration, used to seed the tier-1
21+
/// ETA before the sliding window has samples. `None` on a first-ever scan.
22+
pub prior_scan_duration_ms: Option<u64>,
23+
/// The scanned volume's used bytes at scan start, the tier-2 (rough, first-scan)
24+
/// progress denominator. `None` when the space-info fetch failed.
25+
pub volume_used_bytes: Option<u64>,
1726
}
1827

1928
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -22,6 +31,9 @@ pub struct IndexScanProgressEvent {
2231
pub volume_id: String,
2332
pub entries_scanned: u64,
2433
pub dirs_found: u64,
34+
/// Resolved post-dedup physical bytes scanned so far, the tier-2 progress
35+
/// numerator (apples-to-apples with `volume_used_bytes`).
36+
pub bytes_scanned: u64,
2537
}
2638

2739
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -159,8 +171,18 @@ pub struct IndexStatusResponse {
159171
pub scanning: bool,
160172
pub entries_scanned: u64,
161173
pub dirs_found: u64,
174+
/// Resolved post-dedup physical bytes scanned so far (live), the tier-2
175+
/// progress numerator. 0 when no scan is running. Rides the same
176+
/// `scan_handle` snapshot as `entries_scanned`/`dirs_found`.
177+
pub bytes_scanned: u64,
162178
pub index_status: Option<IndexStatus>,
163179
pub db_file_size: Option<u64>,
180+
/// The scanned volume's used bytes at the current scan's start, the tier-2
181+
/// (first-scan) progress denominator. Sourced from the stashed calibration,
182+
/// so it's present only while a scan is running (and only when the space-info
183+
/// fetch succeeded). Lets the FE backfill tier-2 progress after a mid-scan
184+
/// window reload, where the `index-scan-started` event was missed.
185+
pub volume_used_bytes: Option<u64>,
164186
}
165187

166188
/// Extended debug status for the debug window. Includes live DB counts

apps/desktop/src-tauri/src/indexing/manager.rs

Lines changed: 155 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,52 @@ pub(crate) struct IndexManager {
5252
pub(super) app: AppHandle,
5353
/// Whether a full scan is currently running. Shared with the completion handler.
5454
pub(super) scanning: Arc<AtomicBool>,
55+
/// Calibration for the in-flight scan, captured in `start_scan`: the prior
56+
/// completed scan's totals (read from meta before truncating) plus the
57+
/// scanned volume's used bytes (fetched once). A plain field is enough —
58+
/// `start_scan` is `&mut self` and `get_status` is `&self`. `None` until the
59+
/// first scan starts; refreshed at the start of every scan.
60+
scan_calibration: Option<ScanCalibration>,
61+
}
62+
63+
/// The static, per-scan inputs the frontend needs to pick and drive a scan
64+
/// progress tier. Captured once at scan start (`get_status` reads it back for
65+
/// late-join), so the moving 500 ms progress events carry only live counters.
66+
#[derive(Debug, Clone, Copy)]
67+
struct ScanCalibration {
68+
/// The prior completed scan's persisted totals (tier-1 denominator + ETA seed).
69+
prior: super::store::ScanCalibration,
70+
/// The scanned volume's used bytes at scan start (tier-2 denominator). `None`
71+
/// when the space-info fetch failed; never blocks or delays the scan.
72+
volume_used_bytes: Option<u64>,
73+
}
74+
75+
/// The live scan-progress fields `get_status` surfaces on `IndexStatusResponse`.
76+
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
77+
struct LiveScanCounters {
78+
entries_scanned: u64,
79+
dirs_found: u64,
80+
bytes_scanned: u64,
81+
volume_used_bytes: Option<u64>,
82+
}
83+
84+
/// Derive the live scan counters for `get_status` from the active scan's progress
85+
/// snapshot and the stashed per-scan calibration. Extracted as a pure function so
86+
/// the snapshot-and-calibration combining is unit-testable without an `AppHandle`
87+
/// (`get_status` itself needs a full `IndexManager`, which the module's testing
88+
/// bar keeps under integration coverage). No active scan → all-zero counters; the
89+
/// `volume_used_bytes` denominator rides the stashed calibration so a mid-scan
90+
/// window reload can still backfill tier-2 progress after missing the started event.
91+
fn live_scan_counters(
92+
snapshot: Option<scanner::ScanProgressSnapshot>,
93+
calibration: Option<ScanCalibration>,
94+
) -> LiveScanCounters {
95+
LiveScanCounters {
96+
entries_scanned: snapshot.map(|s| s.entries_scanned).unwrap_or(0),
97+
dirs_found: snapshot.map(|s| s.dirs_found).unwrap_or(0),
98+
bytes_scanned: snapshot.map(|s| s.bytes_scanned).unwrap_or(0),
99+
volume_used_bytes: calibration.and_then(|c| c.volume_used_bytes),
100+
}
55101
}
56102

57103
impl IndexManager {
@@ -84,6 +130,7 @@ impl IndexManager {
84130
live_event_task: Arc::new(std::sync::Mutex::new(None)),
85131
app,
86132
scanning: Arc::new(AtomicBool::new(false)),
133+
scan_calibration: None,
87134
})
88135
}
89136

@@ -303,6 +350,37 @@ impl IndexManager {
303350
return Err("Scan already running".to_string());
304351
}
305352

353+
// Step 0: Capture this scan's calibration BEFORE truncating.
354+
//
355+
// The prior completed scan's totals are read straight off the live read
356+
// connection: the calibration keys survive `TruncateData` (it preserves
357+
// `meta`), but reading first keeps the data flow obviously correct — we
358+
// snapshot the previous scan's numbers before the truncate touches anything.
359+
let prior = IndexStore::read_scan_calibration(self.store.read_conn()).unwrap_or_else(|e| {
360+
log::warn!("Failed to read prior scan calibration (tier-1 will degrade): {e}");
361+
super::store::ScanCalibration::default()
362+
});
363+
364+
// Fetch the scanned volume's used bytes ONCE (tier-2 denominator). The call
365+
// does disk I/O — an NSURL XPC round-trip on macOS, `statvfs` on Linux — and
366+
// `start_scan` runs in async contexts (the auto-start spawn, async Tauri
367+
// commands), so wrap it in `block_in_place`, matching the `flush_blocking`
368+
// call below. A bare blocking call on a tokio worker can stall on a wedged
369+
// mount. Failure → `None`; never block or delay the scan for the denominator.
370+
let volume_root = self.volume_root.clone();
371+
let volume_used_bytes = tokio::task::block_in_place(|| {
372+
crate::file_system::volume::backends::get_space_info_for_path(&volume_root)
373+
.map(|info| info.used_bytes)
374+
.map_err(|e| log::warn!("Failed to read volume used bytes (tier-2 will degrade): {e}"))
375+
.ok()
376+
});
377+
378+
let calibration = ScanCalibration {
379+
prior,
380+
volume_used_bytes,
381+
};
382+
self.scan_calibration = Some(calibration);
383+
306384
// Step 0a: Clear the previous scan's completion marker BEFORE truncating.
307385
// Without this, a rescan killed mid-way (power loss, `kill -9`) leaves the
308386
// PREVIOUS scan's `scan_completed_at` in meta on top of a truncated/partial
@@ -357,11 +435,17 @@ impl IndexManager {
357435
}
358436
}
359437

360-
// Emit started event
438+
// Emit started event with the static, per-scan calibration. Static values
439+
// ride this event once; the 500 ms progress event carries only the moving
440+
// counters, so the FE never re-receives constants. The tier decision
441+
// (calibrated vs rough) is then a pure function of this one event.
361442
let _ = self.app.emit(
362443
"index-scan-started",
363444
IndexScanStartedEvent {
364445
volume_id: self.volume_id.clone(),
446+
prior_total_entries: calibration.prior.total_entries,
447+
prior_scan_duration_ms: calibration.prior.scan_duration_ms,
448+
volume_used_bytes: calibration.volume_used_bytes,
365449
},
366450
);
367451

@@ -404,6 +488,7 @@ impl IndexManager {
404488
volume_id: volume_id_progress.clone(),
405489
entries_scanned: snap.entries_scanned,
406490
dirs_found: snap.dirs_found,
491+
bytes_scanned: snap.bytes_scanned,
407492
},
408493
);
409494

@@ -712,16 +797,17 @@ impl IndexManager {
712797
let db_file_size = self.store.db_file_size().ok();
713798

714799
let snap = self.scan_handle.as_ref().map(|h| h.progress.snapshot());
715-
let entries_scanned = snap.map(|s| s.entries_scanned).unwrap_or(0);
716-
let dirs_found = snap.map(|s| s.dirs_found).unwrap_or(0);
800+
let counters = live_scan_counters(snap, self.scan_calibration);
717801

718802
Ok(IndexStatusResponse {
719803
initialized: true,
720804
scanning: self.scanning.load(Ordering::Relaxed),
721-
entries_scanned,
722-
dirs_found,
805+
entries_scanned: counters.entries_scanned,
806+
dirs_found: counters.dirs_found,
807+
bytes_scanned: counters.bytes_scanned,
723808
index_status: Some(index_status),
724809
db_file_size,
810+
volume_used_bytes: counters.volume_used_bytes,
725811
})
726812
}
727813

@@ -837,3 +923,67 @@ impl IndexManager {
837923
log::info!("IndexManager: shut down for volume '{}'", self.volume_id);
838924
}
839925
}
926+
927+
#[cfg(test)]
928+
mod tests {
929+
//! Unit tests for the pure `get_status` helper.
930+
//!
931+
//! `IndexManager::get_status` itself needs a full manager (and thus an
932+
//! `AppHandle`), which the module's testing bar keeps under integration
933+
//! coverage. `live_scan_counters` is the snapshot-and-calibration combining
934+
//! it delegates to; pinning that here exercises every field `get_status`
935+
//! surfaces — live bytes from the scan snapshot and the tier-2 used-bytes
936+
//! denominator from the stashed calibration — without an `AppHandle`.
937+
use super::*;
938+
use crate::indexing::scanner::ScanProgressSnapshot;
939+
940+
fn snapshot(entries: u64, dirs: u64, bytes: u64) -> ScanProgressSnapshot {
941+
ScanProgressSnapshot {
942+
entries_scanned: entries,
943+
dirs_found: dirs,
944+
bytes_scanned: bytes,
945+
}
946+
}
947+
948+
fn calibration(used_bytes: Option<u64>) -> ScanCalibration {
949+
ScanCalibration {
950+
prior: super::super::store::ScanCalibration::default(),
951+
volume_used_bytes: used_bytes,
952+
}
953+
}
954+
955+
#[test]
956+
fn live_counters_reflect_snapshot_bytes_and_calibration_used_bytes() {
957+
// Mid-scan: an active snapshot plus a calibration carrying the tier-2
958+
// denominator. get_status must surface both, apples-to-apples with what
959+
// the 500 ms progress event emits.
960+
let counters = live_scan_counters(
961+
Some(snapshot(42_000, 1_200, 905_000_000)),
962+
Some(calibration(Some(746_000_000))),
963+
);
964+
assert_eq!(counters.entries_scanned, 42_000);
965+
assert_eq!(counters.dirs_found, 1_200);
966+
assert_eq!(counters.bytes_scanned, 905_000_000);
967+
assert_eq!(counters.volume_used_bytes, Some(746_000_000));
968+
}
969+
970+
#[test]
971+
fn live_counters_are_zero_with_no_active_scan() {
972+
// No scan handle and no calibration (the idle / between-scans state):
973+
// every live counter reads 0 and the tier-2 denominator is absent.
974+
let counters = live_scan_counters(None, None);
975+
assert_eq!(counters, LiveScanCounters::default());
976+
assert_eq!(counters.bytes_scanned, 0);
977+
assert_eq!(counters.volume_used_bytes, None);
978+
}
979+
980+
#[test]
981+
fn live_counters_omit_used_bytes_when_space_info_failed() {
982+
// First scan where the space-info fetch failed: a live snapshot exists,
983+
// but the tier-2 denominator is `None`, so the FE falls back to tier 1 /
984+
// counter-only. The live bytes still flow through.
985+
let counters = live_scan_counters(Some(snapshot(10, 3, 4_096)), Some(calibration(None)));
986+
assert_eq!(counters.bytes_scanned, 4_096);
987+
assert_eq!(counters.volume_used_bytes, None);
988+
}
989+
}

apps/desktop/src-tauri/src/indexing/state.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,10 @@ pub fn get_status() -> Result<IndexStatusResponse, String> {
362362
scanning: false,
363363
entries_scanned: 0,
364364
dirs_found: 0,
365+
bytes_scanned: 0,
365366
index_status: None,
366367
db_file_size: None,
368+
volume_used_bytes: None,
367369
}),
368370
IndexPhase::Initializing { store, .. } => {
369371
let db_file_size = store.db_file_size().ok();
@@ -373,8 +375,10 @@ pub fn get_status() -> Result<IndexStatusResponse, String> {
373375
scanning: true,
374376
entries_scanned: 0,
375377
dirs_found: 0,
378+
bytes_scanned: 0,
376379
index_status,
377380
db_file_size,
381+
volume_used_bytes: None,
378382
})
379383
}
380384
IndexPhase::Running(mgr) => mgr.get_status(),
@@ -391,8 +395,10 @@ pub fn get_debug_status() -> Result<IndexDebugStatusResponse, String> {
391395
scanning: false,
392396
entries_scanned: 0,
393397
dirs_found: 0,
398+
bytes_scanned: 0,
394399
index_status: None,
395400
db_file_size: None,
401+
volume_used_bytes: None,
396402
};
397403
let (activity_phase, phase_started_at, phase_duration_ms, phase_history) =
398404
IndexManager::read_phase_timeline();
@@ -425,8 +431,10 @@ pub fn get_debug_status() -> Result<IndexDebugStatusResponse, String> {
425431
scanning: true,
426432
entries_scanned: 0,
427433
dirs_found: 0,
434+
bytes_scanned: 0,
428435
index_status,
429436
db_file_size,
437+
volume_used_bytes: None,
430438
};
431439
let (activity_phase, phase_started_at, phase_duration_ms, phase_history) =
432440
IndexManager::read_phase_timeline();

apps/desktop/src-tauri/src/indexing/store.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@ pub struct IndexStatus {
183183
pub scan_completed_at: Option<String>,
184184
pub scan_duration_ms: Option<String>,
185185
pub total_entries: Option<String>,
186+
/// The previous completed scan's summed post-dedup physical bytes (TEXT, like
187+
/// every meta value). Surfaced for symmetry with `total_entries` and for
188+
/// debugging; not on the tier-1 critical path.
189+
pub total_physical_bytes: Option<String>,
186190
pub last_event_id: Option<String>,
187191
}
188192

@@ -550,6 +554,7 @@ impl IndexStore {
550554
scan_completed_at: Self::read_meta_value(&self.read_conn, "scan_completed_at")?,
551555
scan_duration_ms: Self::read_meta_value(&self.read_conn, "scan_duration_ms")?,
552556
total_entries: Self::read_meta_value(&self.read_conn, "total_entries")?,
557+
total_physical_bytes: Self::read_meta_value(&self.read_conn, "total_physical_bytes")?,
553558
last_event_id: Self::read_meta_value(&self.read_conn, "last_event_id")?,
554559
})
555560
}

apps/desktop/src/lib/ipc/bindings.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2853,6 +2853,12 @@ export type IndexStatus = {
28532853
scanCompletedAt: string | null
28542854
scanDurationMs: string | null
28552855
totalEntries: string | null
2856+
/**
2857+
* The previous completed scan's summed post-dedup physical bytes (TEXT, like
2858+
* every meta value). Surfaced for symmetry with `total_entries` and for
2859+
* debugging; not on the tier-1 critical path.
2860+
*/
2861+
totalPhysicalBytes: string | null
28562862
lastEventId: string | null
28572863
}
28582864

@@ -2861,8 +2867,22 @@ export type IndexStatusResponse = {
28612867
scanning: boolean
28622868
entriesScanned: number
28632869
dirsFound: number
2870+
/**
2871+
* Resolved post-dedup physical bytes scanned so far (live), the tier-2
2872+
* progress numerator. 0 when no scan is running. Rides the same
2873+
* `scan_handle` snapshot as `entries_scanned`/`dirs_found`.
2874+
*/
2875+
bytesScanned: number
28642876
indexStatus: IndexStatus | null
28652877
dbFileSize: number | null
2878+
/**
2879+
* The scanned volume's used bytes at the current scan's start, the tier-2
2880+
* (first-scan) progress denominator. Sourced from the stashed calibration,
2881+
* so it's present only while a scan is running (and only when the space-info
2882+
* fetch succeeded). Lets the FE backfill tier-2 progress after a mid-scan
2883+
* window reload, where the `index-scan-started` event was missed.
2884+
*/
2885+
volumeUsedBytes: number | null
28662886
}
28672887

28682888
/**

0 commit comments

Comments
 (0)