Skip to content

Commit 13862d8

Browse files
authored
feat: Support Unicode 17.0.0 (#157)
* feat: Support Unicode 17.0.0 * feat: Store right_significant_is_emoji state instead * fix: Move check for right_significant_is_emoji above state machine * doc: Add comments explaining connection to word boundary rules
1 parent 1441d3d commit 13862d8

5 files changed

Lines changed: 3211 additions & 3177 deletions

File tree

scripts/unicode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
# these are the surrogate codepoints, which are not valid rust characters
5555
surrogate_codepoints = (0xd800, 0xdfff)
5656

57-
UNICODE_VERSION = (16, 0, 0)
57+
UNICODE_VERSION = (17, 0, 0)
5858

5959
UNICODE_VERSION_NUMBER = "%s.%s.%s" %UNICODE_VERSION
6060

src/grapheme.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -800,13 +800,14 @@ impl GraphemeCursor {
800800
self.cat_after = self.cat_before.take();
801801
self.state = GraphemeState::Unknown;
802802
if let Some(incb_linker_count) = self.incb_linker_count {
803-
self.ris_count = if incb_linker_count > 0 && crate::tables::is_incb_linker(ch) {
804-
Some(incb_linker_count - 1)
805-
} else if crate::tables::derived_property::InCB_Extend(ch) {
806-
Some(incb_linker_count)
807-
} else {
808-
None
809-
};
803+
self.incb_linker_count =
804+
if incb_linker_count > 0 && crate::tables::is_incb_linker(ch) {
805+
Some(incb_linker_count - 1)
806+
} else if crate::tables::derived_property::InCB_Extend(ch) {
807+
Some(incb_linker_count)
808+
} else {
809+
None
810+
};
810811
}
811812
if let Some(ris_count) = self.ris_count {
812813
self.ris_count = if ris_count > 0 {

0 commit comments

Comments
 (0)