Skip to content

Commit 284f905

Browse files
gandhipratik203Suresh Kumar Moharajan
authored andcommitted
perf(plugin): batch list processing and pre-sized String allocation
Two optimizations informed by the rate limiter PR (#3809) patterns: 1. Batch list processing: for all-string lists in truncate mode, extract all &str borrows in one pass, process in a tight Rust loop, build output PyList in a single pass. Better cache locality and avoids per-item path string formatting and interleaved append calls. 2. Pre-sized String::with_capacity(): eliminate reallocation during truncation by pre-computing body + ellipsis size. Results: - Short list passthrough: 13.6x → 18.9x faster - List 10x10KB: 2.6x → 3.0x faster - Deep nested dict: 7.1x → 7.0x faster (stable) - Wide nested dict: 8.4x → 8.5x faster (stable) - 331 Python tests + 47 Rust tests pass Signed-off-by: Pratik Gandhi <gandhipratik203@gmail.com>
1 parent 54e70a3 commit 284f905

1 file changed

Lines changed: 111 additions & 9 deletions

File tree

  • plugins_rust/output_length_guard/src

plugins_rust/output_length_guard/src/lib.rs

Lines changed: 111 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -380,11 +380,13 @@ fn truncate(value: &str, cfg: &GuardConfig) -> String {
380380
cut = find_word_boundary(value, cut, cut);
381381
}
382382

383-
// Byte-slice instead of .chars().take().collect()
383+
// Pre-sized allocation: body + ellipsis in one shot
384384
let byte_off = byte_offset_of_char(value, cut);
385-
let mut truncated = value[..byte_off].to_string();
386-
truncated.push_str(ell);
387-
return truncated;
385+
let body = &value[..byte_off];
386+
let mut result = String::with_capacity(body.len() + ell.len());
387+
result.push_str(body);
388+
result.push_str(ell);
389+
return result;
388390
}
389391
}
390392
// Under limit or no max_tokens — pass through
@@ -419,11 +421,13 @@ fn truncate(value: &str, cfg: &GuardConfig) -> String {
419421
cut = find_word_boundary(value, cut, max_chars);
420422
}
421423

422-
// Byte-slice instead of .chars().take().collect()
424+
// Pre-sized allocation: body + ellipsis in one shot
423425
let byte_off = byte_offset_of_char(value, cut);
424-
let mut truncated = value[..byte_off].to_string();
425-
truncated.push_str(ell);
426-
truncated
426+
let body = &value[..byte_off];
427+
let mut result = String::with_capacity(body.len() + ell.len());
428+
result.push_str(body);
429+
result.push_str(ell);
430+
result
427431
}
428432

429433
// ─── Violation info (returned as Python dict) ────────────────────────────────
@@ -683,10 +687,108 @@ fn process_container<'py>(
683687

684688
// ── List ──
685689
if let Ok(list) = container.cast::<PyList>() {
686-
if list.len() > cfg.max_structure_size {
690+
let list_len = list.len();
691+
if list_len > cfg.max_structure_size {
687692
return Ok(ProcessResult::Unchanged);
688693
}
689694

695+
// ── Batch fast path for all-string lists (truncate mode) ──
696+
// Avoids per-item recursive process_container calls, path string
697+
// formatting, and interleaved PyList::append. Instead:
698+
// Phase 1: borrow all &str from Python (tight loop, cache-friendly)
699+
// Phase 2: decide + truncate in pure Rust (no Python API calls)
700+
// Phase 3: build output PyList in one pass
701+
if cfg.strategy == Strategy::Truncate {
702+
// Phase 1: collect list items so they live long enough to borrow &str
703+
let items: Vec<Bound<'py, PyAny>> = list.iter().collect();
704+
705+
// Try to borrow all items as &str via PyString::to_str()
706+
let mut borrowed: Vec<&str> = Vec::with_capacity(list_len);
707+
let mut all_strings = true;
708+
for item in &items {
709+
if let Ok(s) = item.cast::<PyString>() {
710+
match s.to_str() {
711+
Ok(text) => borrowed.push(text),
712+
Err(_) => {
713+
all_strings = false;
714+
break;
715+
}
716+
}
717+
} else {
718+
all_strings = false;
719+
break;
720+
}
721+
}
722+
723+
if all_strings {
724+
// Phase 2: process all strings, tracking which need truncation
725+
let mut any_modified = false;
726+
let mut results: Vec<Option<String>> = Vec::with_capacity(list_len);
727+
728+
let limit = match cfg.limit_mode {
729+
LimitMode::Character => cfg.max_chars.unwrap_or(usize::MAX),
730+
LimitMode::Token => cfg
731+
.max_tokens
732+
.unwrap_or(usize::MAX)
733+
.saturating_add(1)
734+
.saturating_mul(cfg.chars_per_token),
735+
};
736+
737+
for &text in &borrowed {
738+
// Skip numerics (only check short strings)
739+
if text.len() <= 50 && is_numeric_string(text) {
740+
results.push(None); // unchanged
741+
continue;
742+
}
743+
744+
// O(1) byte-length fast path
745+
if text.len() <= limit {
746+
// If ASCII, byte len == char len; definitely under limit
747+
if text.is_ascii() {
748+
results.push(None);
749+
continue;
750+
}
751+
}
752+
753+
let (char_count, _) = count_chars_capped(text, limit);
754+
let above_max = match cfg.limit_mode {
755+
LimitMode::Character => cfg.max_chars.is_some_and(|m| char_count > m),
756+
LimitMode::Token => {
757+
let tokens = estimate_tokens(char_count, cfg.chars_per_token);
758+
cfg.max_tokens.is_some_and(|m| tokens > m)
759+
}
760+
};
761+
762+
if above_max {
763+
let truncated = truncate(text, cfg);
764+
if truncated != text {
765+
any_modified = true;
766+
results.push(Some(truncated));
767+
} else {
768+
results.push(None);
769+
}
770+
} else {
771+
results.push(None);
772+
}
773+
}
774+
775+
if !any_modified {
776+
return Ok(ProcessResult::Unchanged);
777+
}
778+
779+
// Phase 3: build output list in one pass
780+
let new_list = PyList::empty(py);
781+
for (idx, opt) in results.iter().enumerate() {
782+
match opt {
783+
Some(truncated) => new_list.append(PyString::new(py, truncated))?,
784+
None => new_list.append(list.get_item(idx)?)?,
785+
}
786+
}
787+
return Ok(ProcessResult::Modified(new_list.into_any()));
788+
}
789+
}
790+
791+
// ── Generic fallback for mixed-type lists or block mode ──
690792
let mut any_modified = false;
691793
let new_list = PyList::empty(py);
692794

0 commit comments

Comments
 (0)