@@ -468,6 +468,8 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
468468 let mut state = Start ;
469469 let mut savestate = Start ;
470470 let mut cat = wd:: WC_Any ;
471+ // Tracks whether the nearest non-(Extend|Format) char to the right is emoji.
472+ let mut right_significant_is_emoji: Option < bool > = None ;
471473
472474 let mut skipped_format_extend = false ;
473475
@@ -488,7 +490,9 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
488490 // Hebrew Letter immediately before it.
489491 // (2) Format and Extend char handling takes some gymnastics.
490492
491- if cat == wd:: WC_ZWJ && state != Zwj && self . next_significant_is_emoji ( idx) {
493+ if cat == wd:: WC_ZWJ && state != Zwj && right_significant_is_emoji == Some ( true ) {
494+ // For further-left chars, this ZWJ is now the nearest significant char on the right.
495+ right_significant_is_emoji = Some ( false ) ;
492496 continue ;
493497 }
494498
@@ -502,6 +506,9 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
502506 }
503507
504508 if state != Start {
509+ if cat != wd:: WC_Extend && cat != wd:: WC_Format {
510+ right_significant_is_emoji = Some ( is_emoji ( ch) ) ;
511+ }
505512 continue ;
506513 }
507514 } else if state == FormatExtend ( AcceptNone ) {
@@ -659,6 +666,10 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
659666 RequireHLetter if cat == wd:: WC_Hebrew_Letter => HLetter , // rule WB7b
660667 _ => break , // backtrack will happens
661668 } ,
669+ } ;
670+
671+ if cat != wd:: WC_Extend && cat != wd:: WC_Format {
672+ right_significant_is_emoji = Some ( is_emoji ( ch) ) ;
662673 }
663674 }
664675
@@ -732,21 +743,6 @@ impl<'a> UWordBounds<'a> {
732743 None
733744 }
734745 }
735-
736- #[ inline]
737- fn next_significant_is_emoji ( & self , idx : usize ) -> bool {
738- use crate :: tables:: word as wd;
739- let mut nidx = idx;
740- while let Some ( ncat) = self . get_next_cat ( nidx) {
741- nidx += self . string [ nidx..] . chars ( ) . next ( ) . unwrap ( ) . len_utf8 ( ) ;
742- if ncat == wd:: WC_Extend || ncat == wd:: WC_Format {
743- continue ;
744- }
745- let nch = self . string [ nidx..] . chars ( ) . next ( ) . unwrap ( ) ;
746- return is_emoji ( nch) ;
747- }
748- false
749- }
750746}
751747
752748/// ASCII‑fast‑path word‑boundary iterator for strings that contain only ASCII characters.
0 commit comments