Skip to content

Commit 3a391bf

Browse files
committed
doc: Add comments explaining connection to word boundary rules
1 parent c9ec06a commit 3a391bf

1 file changed

Lines changed: 10 additions & 2 deletions

File tree

src/word.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,11 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
468468
let mut state = Start;
469469
let mut savestate = Start;
470470
let mut cat = wd::WC_Any;
471-
// Tracks whether the nearest non-(Extend|Format) char to the right is emoji.
471+
472+
// WB3c is context-sensitive (ZWJ + Extended_Pictographic),
473+
// while WB4 collapses Extend/Format and would otherwise hide that context.
474+
// We therefore keep this context outside the main state machine:
475+
// whether the nearest non-(Extend|Format) char to the right is emoji.
472476
let mut right_significant_is_emoji: bool = false;
473477

474478
let mut skipped_format_extend = false;
@@ -490,10 +494,14 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
490494
// Hebrew Letter immediately before it.
491495
// (2) Format and Extend char handling takes some gymnastics.
492496

497+
// Reverse-direction WB3c check: when we encounter ZWJ and the nearest
498+
// significant right-side char is emoji, do not break here.
493499
if cat == wd::WC_ZWJ && state != Zwj && right_significant_is_emoji {
494500
continue;
495501
}
496502

503+
// Keep the right-side WB3c context up to date as we move left.
504+
// Ignore Extend/Format here to mirror WB4 collapsing behavior.
497505
if cat != wd::WC_Extend && cat != wd::WC_Format {
498506
right_significant_is_emoji = is_emoji(ch);
499507
}
@@ -665,7 +673,7 @@ impl<'a> DoubleEndedIterator for UWordBounds<'a> {
665673
RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7b
666674
_ => break, // backtrack will happens
667675
},
668-
};
676+
}
669677
}
670678

671679
if let FormatExtend(t) = state {

0 commit comments

Comments
 (0)