Skip to content

Commit e035894

Browse files
committed
feat: Store right_significant_is_emoji state instead
1 parent 4790e25 commit e035894

1 file changed

Lines changed: 7 additions & 16 deletions

File tree

src/word.rs

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,8 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
468468
let mut state = Start;
469469
let mut savestate = Start;
470470
let mut cat = wd::WC_Any;
471+
// Tracks whether the nearest non-(Extend|Format) char to the right is emoji.
472+
let mut right_significant_is_emoji: bool = false;
471473

472474
let mut skipped_format_extend = false;
473475

@@ -488,7 +490,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
488490
// Hebrew Letter immediately before it.
489491
// (2) Format and Extend char handling takes some gymnastics.
490492

491-
if cat == wd::WC_ZWJ && state != Zwj && self.next_significant_is_emoji(idx) {
493+
if cat == wd::WC_ZWJ && state != Zwj && right_significant_is_emoji {
492494
continue;
493495
}
494496

@@ -659,6 +661,10 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
659661
RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7b
660662
_ => break, // backtrack will happens
661663
},
664+
};
665+
666+
if cat != wd::WC_Extend && cat != wd::WC_Format {
667+
right_significant_is_emoji = is_emoji(ch);
662668
}
663669
}
664670

@@ -732,21 +738,6 @@ impl<'a> UWordBounds<'a> {
732738
None
733739
}
734740
}
735-
736-
#[inline]
737-
fn next_significant_is_emoji(&self, idx: usize) -> bool {
738-
use crate::tables::word as wd;
739-
let mut nidx = idx;
740-
while let Some(ncat) = self.get_next_cat(nidx) {
741-
nidx += self.string[nidx..].chars().next().unwrap().len_utf8();
742-
if ncat == wd::WC_Extend || ncat == wd::WC_Format {
743-
continue;
744-
}
745-
let nch = self.string[nidx..].chars().next().unwrap();
746-
return is_emoji(nch);
747-
}
748-
false
749-
}
750741
}
751742

752743
/// ASCII‑fast‑path word‑boundary iterator for strings that contain only ASCII characters.

0 commit comments

Comments
 (0)