Skip to content

Commit 344dd01

Browse files
committed
feat: Store right_significant_is_emoji state instead
1 parent 4790e25 commit 344dd01

1 file changed

Lines changed: 12 additions & 16 deletions

File tree

src/word.rs

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,8 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
468468
let mut state = Start;
469469
let mut savestate = Start;
470470
let mut cat = wd::WC_Any;
471+
// Tracks whether the nearest non-(Extend|Format) char to the right is emoji.
472+
let mut right_significant_is_emoji: Option<bool> = None;
471473

472474
let mut skipped_format_extend = false;
473475

@@ -488,7 +490,9 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
488490
// Hebrew Letter immediately before it.
489491
// (2) Format and Extend char handling takes some gymnastics.
490492

491-
if cat == wd::WC_ZWJ && state != Zwj && self.next_significant_is_emoji(idx) {
493+
if cat == wd::WC_ZWJ && state != Zwj && right_significant_is_emoji == Some(true) {
494+
// For further-left chars, this ZWJ is now the nearest significant char on the right.
495+
right_significant_is_emoji = Some(false);
492496
continue;
493497
}
494498

@@ -502,6 +506,9 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
502506
}
503507

504508
if state != Start {
509+
if cat != wd::WC_Extend && cat != wd::WC_Format {
510+
right_significant_is_emoji = Some(is_emoji(ch));
511+
}
505512
continue;
506513
}
507514
} else if state == FormatExtend(AcceptNone) {
@@ -659,6 +666,10 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
659666
RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7b
660667
_ => break, // backtrack will happens
661668
},
669+
};
670+
671+
if cat != wd::WC_Extend && cat != wd::WC_Format {
672+
right_significant_is_emoji = Some(is_emoji(ch));
662673
}
663674
}
664675

@@ -732,21 +743,6 @@ impl<'a> UWordBounds<'a> {
732743
None
733744
}
734745
}
735-
736-
#[inline]
737-
fn next_significant_is_emoji(&self, idx: usize) -> bool {
738-
use crate::tables::word as wd;
739-
let mut nidx = idx;
740-
while let Some(ncat) = self.get_next_cat(nidx) {
741-
nidx += self.string[nidx..].chars().next().unwrap().len_utf8();
742-
if ncat == wd::WC_Extend || ncat == wd::WC_Format {
743-
continue;
744-
}
745-
let nch = self.string[nidx..].chars().next().unwrap();
746-
return is_emoji(nch);
747-
}
748-
false
749-
}
750746
}
751747

752748
/// ASCII‑fast‑path word‑boundary iterator for strings that contain only ASCII characters.

0 commit comments

Comments
 (0)