@@ -468,7 +468,11 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
468468 let mut state = Start ;
469469 let mut savestate = Start ;
470470 let mut cat = wd:: WC_Any ;
471- // Tracks whether the nearest non-(Extend|Format) char to the right is emoji.
471+
472+ // WB3c is context-sensitive (ZWJ + Extended_Pictographic),
473+ // while WB4 collapses Extend/Format and would otherwise hide that context.
474+ // We therefore keep this context outside the main state machine:
475+ // whether the nearest non-(Extend|Format) char to the right is emoji.
472476 let mut right_significant_is_emoji: bool = false ;
473477
474478 let mut skipped_format_extend = false ;
@@ -490,10 +494,14 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
490494 // Hebrew Letter immediately before it.
491495 // (2) Format and Extend char handling takes some gymnastics.
492496
497+ // Reverse-direction WB3c check: when we encounter ZWJ and the nearest
498+ // significant right-side char is emoji, do not break here.
493499 if cat == wd:: WC_ZWJ && state != Zwj && right_significant_is_emoji {
494500 continue ;
495501 }
496502
503+ // Keep the right-side WB3c context up to date as we move left.
504+ // Ignore Extend/Format here to mirror WB4 collapsing behavior.
497505 if cat != wd:: WC_Extend && cat != wd:: WC_Format {
498506 right_significant_is_emoji = is_emoji ( ch) ;
499507 }
@@ -665,7 +673,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
665673 RequireHLetter if cat == wd:: WC_Hebrew_Letter => HLetter , // rule WB7b
666674 _ => break , // backtrack will happens
667675 } ,
668- } ;
676+ }
669677 }
670678
671679 if let FormatExtend ( t) = state {
0 commit comments