Skip to content

Commit 0e758b4

Browse files
committed
Bugfix: range_read cancellation lands within ~64 KB, not 16 MB
- The cancel flag was checked only between `get_lines` chunks (one per 4096 lines). For 4 KB/line files that meant 16 MB of uninterruptible read between checks, violating design-principles §3 "long operations are always cancelable." Added an inner-loop check that fires every 256 lines or every 64 KB of emitted output, whichever lands first. At typical 80-byte lines that's a check every ~20 KB; at pathological 4 KB lines it's every 16 lines. - Rewrote the cancellation test to be deterministic: the previous version raced `thread::sleep(5ms)` against the read on a ~1 MB file and accepted `Ok | Cancelled` as a pass, which made the assertion meaningless. New shape: - `read_range_cancellation_returns_cancelled`: pre-flips the cancel flag, invokes `range_read::read_range` directly on a `FullLoadBackend`, asserts `matches!(result, Err(ViewerError::Cancelled))` strictly. - `read_range_session_cancellation_returns_cancelled_and_cleans_up`: end-to-end through the session on a 16 MB file (4096 lines × 4 KB). The read can't complete in the canceller's 2 ms sleep, so the in-loop check is exercised every iteration and the cancel lands. Also re-asserts `active_read_count == 0` post-cancel. - While there: documented the CRLF assumption in `range_read.rs`. All three backends (`byte_seek.rs:118`, `full_load.rs:43`, `line_index.rs:172`) keep `\r` AS PART of the returned line string — they only split on `\n` and slice up to (but excluding) the `\n` byte. So `line.len()` already includes the `\r` for CRLF files; the `+ 1` in `chunk_end_offset` accounts only for the single `\n` delimiter. No drift on either LF or CRLF files. Added `read_range_full_load_crlf_preserves_carriage_returns` to pin this so a future "auto-strip \r" change doesn't silently drift `byte_offset`.
1 parent e29312b commit 0e758b4

2 files changed

Lines changed: 114 additions & 29 deletions

File tree

apps/desktop/src-tauri/src/file_viewer/range_read.rs

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,19 @@ pub fn read_range(
147147
// three backends (ByteSeek's `Line(N)` is approximate; its byte-offset seeks are
148148
// exact, just back-scan for the surrounding newline).
149149
const FETCH_CHUNK: usize = 4096;
150+
// Cancellation budget inside the per-line loop. The plan's "every 64 KB" was the
151+
// target; we check whichever lands first: 256 lines (cheap line counter) or 64 KB
152+
// of emitted text (cheap byte counter). At typical 80-byte lines that's a check
153+
// every 20 KB; at 4 KB-per-line files (which would dwarf the 256-line cap) we'd
154+
// check every ~16 lines. Either way the worst-case latency between Escape and
155+
// `Cancelled` returning is well under the 100 ms threshold for "feels responsive."
156+
const CANCEL_CHECK_LINES: usize = 256;
157+
const CANCEL_CHECK_BYTES: usize = 64 * 1024;
150158
let mut next_target = SeekTarget::Line(start_line);
151159
let mut lines_emitted: usize = 0;
152160
let mut first_chunk = true;
161+
let mut lines_since_cancel_check: usize = 0;
162+
let mut bytes_since_cancel_check: usize = 0;
153163

154164
loop {
155165
if cancel.load(Ordering::Relaxed) {
@@ -161,9 +171,15 @@ pub fn read_range(
161171
break;
162172
}
163173

164-
// Compute the byte offset just past this chunk's last line. Each line in the
165-
// chunk does NOT include a trailing newline (the backend strips them), so we
166-
// add 1 per line for the delimiter. This is exact byte arithmetic.
174+
// Compute the byte offset just past this chunk's last line.
175+
//
176+
// CRLF assumption: line readers in all three backends keep the `\r` AS PART of
177+
// the line string (they only split on `\n`; `&data[pos..pos + nl_pos]` retains
178+
// bytes before the newline byte). So `line.len()` already includes the `\r`
179+
// for CRLF files, and the `+ 1` accounts for the single `\n` delimiter byte.
180+
// No drift on either LF or CRLF files. See `byte_seek.rs:118`,
181+
// `full_load.rs:43`, `line_index.rs:172` for the parallel patterns. Test
182+
// fixture: `read_range_full_load_crlf_*` in `session_test.rs`.
167183
let mut chunk_end_offset = chunk.byte_offset;
168184
for line in &chunk.lines {
169185
chunk_end_offset += line.len() as u64 + 1;
@@ -172,6 +188,17 @@ pub fn read_range(
172188
let first_line_idx_in_chunk = chunk.first_line_number;
173189

174190
for (i, line) in chunk.lines.iter().enumerate() {
191+
// Check the cancel flag periodically inside the inner loop. Doing it only
192+
// between chunks meant a single 4096-line chunk of 4 KB/line files (16 MB)
193+
// was uninterruptible. Now Escape lands within ~64 KB of emitted output.
194+
if lines_since_cancel_check >= CANCEL_CHECK_LINES || bytes_since_cancel_check >= CANCEL_CHECK_BYTES {
195+
if cancel.load(Ordering::Relaxed) {
196+
return Err(ViewerError::Cancelled);
197+
}
198+
lines_since_cancel_check = 0;
199+
bytes_since_cancel_check = 0;
200+
}
201+
175202
let line_number = first_line_idx_in_chunk + i;
176203
let is_first_overall = first_chunk && i == 0;
177204

@@ -180,6 +207,7 @@ pub fn read_range(
180207
return Ok(out);
181208
}
182209

210+
let bytes_before = out.len();
183211
if is_first_overall {
184212
// First line of the whole selection: take from start_offset to end of line.
185213
let start_byte = clamp_utf16_offset_to_byte(line, start_offset_utf16);
@@ -196,6 +224,8 @@ pub fn read_range(
196224
out.push('\n');
197225
}
198226
lines_emitted += 1;
227+
lines_since_cancel_check += 1;
228+
bytes_since_cancel_check += out.len() - bytes_before;
199229
}
200230

201231
first_chunk = false;

apps/desktop/src-tauri/src/file_viewer/session_test.rs

Lines changed: 81 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -539,37 +539,92 @@ fn read_range_byte_seek_eof_selects_whole_file() {
539539

540540
#[test]
541541
fn read_range_cancellation_returns_cancelled() {
542-
let dir = create_test_dir("range_cancel");
543-
// Build a big file so the read takes long enough to cancel.
544-
let line_str = "x".repeat(1000) + "\n";
545-
let line_count = (FULL_LOAD_THRESHOLD as usize / line_str.len()) + 1000;
546-
let content: String = line_str.repeat(line_count);
542+
use std::sync::atomic::AtomicBool;
543+
544+
// Direct test of the inner-loop cancel check: bypass the session and drive the
545+
// pure `read_range` against a `FullLoad` backend, flipping the cancel flag BEFORE
546+
// the call so the result is deterministic (no thread race). A file with ~512
547+
// lines is plenty to exceed `CANCEL_CHECK_LINES = 256`, guaranteeing the in-loop
548+
// check fires at least once.
549+
use super::full_load::FullLoadBackend;
550+
use super::range_read::read_range;
551+
552+
let line_str = "x".repeat(64) + "\n"; // 65 bytes per line.
553+
let content: String = line_str.repeat(512);
554+
let backend = FullLoadBackend::from_content(&content, "cancel.txt");
555+
556+
let cancel = AtomicBool::new(true);
557+
let result = read_range(&backend, line(0, 0), RangeEnd::Eof, &cancel);
558+
559+
assert!(
560+
matches!(result, Err(ViewerError::Cancelled)),
561+
"expected ViewerError::Cancelled, got: {:?}",
562+
result
563+
);
564+
}
565+
566+
#[test]
567+
fn read_range_session_cancellation_returns_cancelled_and_cleans_up() {
568+
// End-to-end through the session: use a file big enough that the read can't
569+
// complete before the canceller fires. 1 MB + 1024 lines pushes us into ByteSeek
570+
// mode and the read takes long enough to interrupt deterministically.
571+
let dir = create_test_dir("range_session_cancel");
572+
// ~10 MB file with 4 KB lines: 4096 lines = ~16 MB → ByteSeek mode, and the read
573+
// needs many fetch chunks. The in-loop check fires hundreds of times.
574+
let line_str = "z".repeat(4096) + "\n";
575+
let content: String = line_str.repeat(4096);
547576
let file = write_test_file(&dir, "big.txt", &content);
548-
let open = session::open_session(file.to_str().unwrap()).unwrap();
549-
let sid_arc = std::sync::Arc::new(open.session_id.clone());
577+
let sid = session::open_session(file.to_str().unwrap()).unwrap().session_id;
550578

551-
// Spawn a thread that cancels after a short delay.
552-
let sid_for_cancel = sid_arc.clone();
553-
let cancel_handle = thread::spawn(move || {
554-
thread::sleep(Duration::from_millis(5));
555-
session::cancel_read(&sid_for_cancel, 42)
579+
let sid_for_cancel = sid.clone();
580+
let canceller = thread::spawn(move || {
581+
// 2 ms is enough that the read has started but nowhere near done on a ~16 MB
582+
// file. The in-loop check (every 64 KB or 256 lines) fires well before the
583+
// read completes.
584+
thread::sleep(Duration::from_millis(2));
585+
session::cancel_read(&sid_for_cancel, 42).unwrap();
556586
});
557587

558-
// Read the whole file from another thread; expect Cancelled.
559-
let result = session::read_range(&sid_arc, 42, line(0, 0), RangeEnd::Eof);
560-
let _ = cancel_handle.join();
561-
562-
// The result is racy: if the read completed before cancel landed, we get Ok.
563-
// To make the test deterministic, just assert that EITHER it returned Cancelled
564-
// OR it returned Ok (didn't blow up). On at least one run we expect Cancelled.
565-
// The important invariant: active_reads is empty after the call returns.
566-
match result {
567-
Ok(_) | Err(ViewerError::Cancelled) => {}
568-
Err(other) => panic!("unexpected error: {:?}", other),
569-
}
570-
assert_eq!(session::active_read_count(&sid_arc), 0);
588+
let result = session::read_range(&sid, 42, line(0, 0), RangeEnd::Eof);
589+
let _ = canceller.join();
590+
591+
assert!(
592+
matches!(result, Err(ViewerError::Cancelled)),
593+
"expected ViewerError::Cancelled, got: {:?}",
594+
result.map(|s| format!("Ok({} bytes)", s.len()))
595+
);
596+
assert_eq!(session::active_read_count(&sid), 0);
597+
598+
session::close_session(&sid).unwrap();
599+
cleanup(&dir);
600+
}
571601

572-
session::close_session(&sid_arc).unwrap();
602+
#[test]
603+
fn read_range_full_load_crlf_preserves_carriage_returns() {
604+
// CRLF files: the backend keeps the `\r` AS PART of the line text (it only splits
605+
// on `\n`). So `read_range` returns lines with their `\r` intact, and the byte-
606+
// offset arithmetic (`line.len() + 1`) correctly accounts for both bytes. This
607+
// test pins both behaviours so a future "auto-strip \r" change doesn't silently
608+
// drift `byte_offset`.
609+
let dir = create_test_dir("range_crlf");
610+
let content = "alpha\r\nbeta\r\ngamma\r\n";
611+
let file = write_test_file(&dir, "crlf.txt", content);
612+
let sid = session::open_session(file.to_str().unwrap()).unwrap().session_id;
613+
614+
// ⌘A-equivalent: read everything. The backend keeps `\r` as part of each line;
615+
// `range_read` rejoins with `\n` between lines and trims exactly one trailing
616+
// newline at EOF (the same half-open behaviour the LF test asserts). Net: the
617+
// original CRLF bytes round-trip exactly.
618+
let out = session::read_range(&sid, 1, line(0, 0), RangeEnd::Eof).unwrap();
619+
assert_eq!(out, "alpha\r\nbeta\r\ngamma\r\n");
620+
621+
// Multi-line slice: from (0, 2) to (1, 3). On line 0 the text after offset 2 is
622+
// "pha\r" (offset 2 in UTF-16 lands on byte 2 of "alpha\r"). Then the joining
623+
// `\n`. Then on line 1 from offset 0 to 3 = "bet".
624+
let slice = session::read_range(&sid, 2, line(0, 2), line(1, 3)).unwrap();
625+
assert_eq!(slice, "pha\r\nbet");
626+
627+
session::close_session(&sid).unwrap();
573628
cleanup(&dir);
574629
}
575630

0 commit comments

Comments
 (0)