Skip to content

Commit a5d5b60

Browse files
committed
add a surrogate codepoint case
Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
1 parent 04fcf60 commit a5d5b60

File tree

1 file changed

+10
-0
lines changed
  • datafusion/functions/src/string

1 file changed

+10
-0
lines changed

datafusion/functions/src/string/chr.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,16 @@ mod tests {
207207
"requested character too large for encoding"
208208
);
209209

210+
// invalid Unicode code points (surrogate code point)
211+
// link: <https://learn.microsoft.com/en-us/globalization/encoding/unicode-standard#surrogate-pairs>
212+
let input = Arc::new(Int64Array::from(vec![0xD800 + 1]));
213+
let result = chr(&[input]);
214+
assert!(result.is_err());
215+
assert_contains!(
216+
result.err().unwrap().to_string(),
217+
"requested character too large for encoding"
218+
);
219+
210220
// negative input
211221
let input = Arc::new(Int64Array::from(vec![i64::MIN + 2i64])); // will be 2 if cast to u32
212222
let result = chr(&[input]);

0 commit comments

Comments
 (0)