Skip to content

GraphemeCursor::next_boundary() returns incorrect boundary #115

@noib3

Description

@noib3

The grapheme boundaries of "🇷🇸🇮🇴" should be 8 and 16, but by feeding GraphemeCursor the individual RIS codepoints I get 8 and 12. Am I using the API incorrectly or is this a bug?

use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};

fn main() {
    let s = "🇷🇸🇮🇴";

    let mut cursor = GraphemeCursor::new(0, s.len(), true);

    // 🇷🇸

    match cursor.next_boundary("🇷", 0) {
        Err(GraphemeIncomplete::NextChunk) => {}
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇸", 4) {
        Err(GraphemeIncomplete::PreContext(4)) => {
            cursor.provide_context("🇷", 0);
        }
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇸", 4) {
        Err(GraphemeIncomplete::NextChunk) => {}
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇮", 8) {
        Err(GraphemeIncomplete::PreContext(8)) => {
            cursor.provide_context("🇸", 4);
        }
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇮", 8) {
        Err(GraphemeIncomplete::PreContext(4)) => {
            cursor.provide_context("🇷", 0);
        }
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇮", 8) {
        Ok(Some(8)) => {}
        _ => unreachable!(),
    }

    // 🇮🇴

    match cursor.next_boundary("🇮", 8) {
        Err(GraphemeIncomplete::NextChunk) => {}
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇴", 12) {
        Err(GraphemeIncomplete::PreContext(12)) => {
            cursor.provide_context("🇮", 8);
        }
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇴", 12) {
        Err(GraphemeIncomplete::PreContext(8)) => {
            cursor.provide_context("🇸", 4);
        }
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇴", 12) {
        Err(GraphemeIncomplete::PreContext(4)) => {
            cursor.provide_context("🇷", 0);
        }
        _ => unreachable!(),
    }

    match cursor.next_boundary("🇴", 12) {
        Ok(Some(16)) => {}
        Ok(Some(12)) => panic!("this should be 16"),
        _ => unreachable!(),
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions