@@ -380,11 +380,13 @@ fn truncate(value: &str, cfg: &GuardConfig) -> String {
380380 cut = find_word_boundary ( value, cut, cut) ;
381381 }
382382
383- // Byte-slice instead of .chars().take().collect()
383+ // Pre-sized allocation: body + ellipsis in one shot
384384 let byte_off = byte_offset_of_char ( value, cut) ;
385- let mut truncated = value[ ..byte_off] . to_string ( ) ;
386- truncated. push_str ( ell) ;
387- return truncated;
385+ let body = & value[ ..byte_off] ;
386+ let mut result = String :: with_capacity ( body. len ( ) + ell. len ( ) ) ;
387+ result. push_str ( body) ;
388+ result. push_str ( ell) ;
389+ return result;
388390 }
389391 }
390392 // Under limit or no max_tokens — pass through
@@ -419,11 +421,13 @@ fn truncate(value: &str, cfg: &GuardConfig) -> String {
419421 cut = find_word_boundary ( value, cut, max_chars) ;
420422 }
421423
422- // Byte-slice instead of .chars().take().collect()
424+ // Pre-sized allocation: body + ellipsis in one shot
423425 let byte_off = byte_offset_of_char ( value, cut) ;
424- let mut truncated = value[ ..byte_off] . to_string ( ) ;
425- truncated. push_str ( ell) ;
426- truncated
426+ let body = & value[ ..byte_off] ;
427+ let mut result = String :: with_capacity ( body. len ( ) + ell. len ( ) ) ;
428+ result. push_str ( body) ;
429+ result. push_str ( ell) ;
430+ result
427431}
428432
429433// ─── Violation info (returned as Python dict) ────────────────────────────────
@@ -683,10 +687,108 @@ fn process_container<'py>(
683687
684688 // ── List ──
685689 if let Ok ( list) = container. cast :: < PyList > ( ) {
686- if list. len ( ) > cfg. max_structure_size {
690+ let list_len = list. len ( ) ;
691+ if list_len > cfg. max_structure_size {
687692 return Ok ( ProcessResult :: Unchanged ) ;
688693 }
689694
695+ // ── Batch fast path for all-string lists (truncate mode) ──
696+ // Avoids per-item recursive process_container calls, path string
697+ // formatting, and interleaved PyList::append. Instead:
698+ // Phase 1: borrow all &str from Python (tight loop, cache-friendly)
699+ // Phase 2: decide + truncate in pure Rust (no Python API calls)
700+ // Phase 3: build output PyList in one pass
701+ if cfg. strategy == Strategy :: Truncate {
702+ // Phase 1: collect list items so they live long enough to borrow &str
703+ let items: Vec < Bound < ' py , PyAny > > = list. iter ( ) . collect ( ) ;
704+
705+ // Try to borrow all items as &str via PyString::to_str()
706+ let mut borrowed: Vec < & str > = Vec :: with_capacity ( list_len) ;
707+ let mut all_strings = true ;
708+ for item in & items {
709+ if let Ok ( s) = item. cast :: < PyString > ( ) {
710+ match s. to_str ( ) {
711+ Ok ( text) => borrowed. push ( text) ,
712+ Err ( _) => {
713+ all_strings = false ;
714+ break ;
715+ }
716+ }
717+ } else {
718+ all_strings = false ;
719+ break ;
720+ }
721+ }
722+
723+ if all_strings {
724+ // Phase 2: process all strings, tracking which need truncation
725+ let mut any_modified = false ;
726+ let mut results: Vec < Option < String > > = Vec :: with_capacity ( list_len) ;
727+
728+ let limit = match cfg. limit_mode {
729+ LimitMode :: Character => cfg. max_chars . unwrap_or ( usize:: MAX ) ,
730+ LimitMode :: Token => cfg
731+ . max_tokens
732+ . unwrap_or ( usize:: MAX )
733+ . saturating_add ( 1 )
734+ . saturating_mul ( cfg. chars_per_token ) ,
735+ } ;
736+
737+ for & text in & borrowed {
738+ // Skip numerics (only check short strings)
739+ if text. len ( ) <= 50 && is_numeric_string ( text) {
740+ results. push ( None ) ; // unchanged
741+ continue ;
742+ }
743+
744+ // O(1) byte-length fast path
745+ if text. len ( ) <= limit {
746+ // If ASCII, byte len == char len; definitely under limit
747+ if text. is_ascii ( ) {
748+ results. push ( None ) ;
749+ continue ;
750+ }
751+ }
752+
753+ let ( char_count, _) = count_chars_capped ( text, limit) ;
754+ let above_max = match cfg. limit_mode {
755+ LimitMode :: Character => cfg. max_chars . is_some_and ( |m| char_count > m) ,
756+ LimitMode :: Token => {
757+ let tokens = estimate_tokens ( char_count, cfg. chars_per_token ) ;
758+ cfg. max_tokens . is_some_and ( |m| tokens > m)
759+ }
760+ } ;
761+
762+ if above_max {
763+ let truncated = truncate ( text, cfg) ;
764+ if truncated != text {
765+ any_modified = true ;
766+ results. push ( Some ( truncated) ) ;
767+ } else {
768+ results. push ( None ) ;
769+ }
770+ } else {
771+ results. push ( None ) ;
772+ }
773+ }
774+
775+ if !any_modified {
776+ return Ok ( ProcessResult :: Unchanged ) ;
777+ }
778+
779+ // Phase 3: build output list in one pass
780+ let new_list = PyList :: empty ( py) ;
781+ for ( idx, opt) in results. iter ( ) . enumerate ( ) {
782+ match opt {
783+ Some ( truncated) => new_list. append ( PyString :: new ( py, truncated) ) ?,
784+ None => new_list. append ( list. get_item ( idx) ?) ?,
785+ }
786+ }
787+ return Ok ( ProcessResult :: Modified ( new_list. into_any ( ) ) ) ;
788+ }
789+ }
790+
791+ // ── Generic fallback for mixed-type lists or block mode ──
690792 let mut any_modified = false ;
691793 let new_list = PyList :: empty ( py) ;
692794
0 commit comments