Skip to content

Commit c549fac

Browse files
feat(stdlib): Add Char.encodedLength (#2238)
Co-authored-by: Oscar Spencer <oscar.spen@gmail.com>
1 parent bdb7f7c commit c549fac

File tree

3 files changed

+111
-0
lines changed

3 files changed

+111
-0
lines changed

compiler/test/stdlib/char.test.gr

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ assert Char.code(Char.pred(Char.fromCode(0xE000))) == 0xD7FF
4040
assert Char.toString('f') == "f"
4141
assert Char.toString('💯') == "💯"
4242

43+
// byteCount
44+
assert Char.encodedLength(Char.UTF8, 'a') == 1
45+
assert Char.encodedLength(Char.UTF8, '©') == 2
46+
assert Char.encodedLength(Char.UTF8, '☃') == 3
47+
assert Char.encodedLength(Char.UTF8, '🌾') == 4
48+
assert Char.encodedLength(Char.UTF16, 'a') == 1
49+
assert Char.encodedLength(Char.UTF16, '©') == 1
50+
assert Char.encodedLength(Char.UTF16, '☃') == 1
51+
assert Char.encodedLength(Char.UTF16, '🌾') == 2
52+
assert Char.encodedLength(Char.UTF32, 'a') == 4
53+
assert Char.encodedLength(Char.UTF32, '©') == 4
54+
assert Char.encodedLength(Char.UTF32, '☃') == 4
55+
assert Char.encodedLength(Char.UTF32, '🌾') == 4
56+
4357
// issue #927
4458
let chars = [> '\u{1F3F4}', '\u{E0067}']
4559
let mut charPosition = 0

stdlib/char.gr

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,42 @@ provide let toString = (char: Char) => {
172172
WasmI32.toGrain(string): String
173173
}
174174

175+
/**
176+
* Byte encodings
177+
*
178+
* @since v0.7.0
179+
*/
180+
provide enum Encoding {
181+
UTF8,
182+
UTF16,
183+
UTF32,
184+
}
185+
186+
/**
187+
* Returns the byte count of a character if encoded in the given encoding.
188+
*
189+
* @param encoding: The encoding to check
190+
* @param char: The character
191+
* @returns The byte count of the character in the given encoding
192+
*
193+
* @example Char.encodedLength(Char.UTF8, 'a') == 1
194+
* @example Char.encodedLength(Char.UTF8, '🌾') == 4
195+
* @example Char.encodedLength(Char.UTF16, '©') == 1
196+
*
197+
* @since v0.7.0
198+
*/
199+
@unsafe
200+
provide let encodedLength = (encoding, char: Char) => {
201+
let usv = untagChar(char)
202+
let utf8ByteCount = usvEncodeLength(usv)
203+
let utf8ByteCount = tagSimpleNumber(utf8ByteCount)
204+
match (encoding) {
205+
UTF32 => 4,
206+
UTF16 => if (utf8ByteCount == 4) 2 else 1,
207+
UTF8 => utf8ByteCount,
208+
}
209+
}
210+
175211
/**
176212
* Checks if the first character is less than the second character by Unicode scalar value.
177213
*

stdlib/char.md

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,27 @@ from "char" include Char
2727
'🌾'
2828
```
2929

30+
## Types
31+
32+
Type declarations included in the Char module.
33+
34+
### Char.**Encoding**
35+
36+
<details disabled>
37+
<summary tabindex="-1">Added in <code>next</code></summary>
38+
No other changes yet.
39+
</details>
40+
41+
```grain
42+
enum Encoding {
43+
UTF8,
44+
UTF16,
45+
UTF32,
46+
}
47+
```
48+
49+
Byte encodings
50+
3051
## Values
3152

3253
Functions and constants included in the Char module.
@@ -285,6 +306,46 @@ Char.toString('a') == "a"
285306
Char.toString('🌾') == "🌾"
286307
```
287308

309+
### Char.**encodedLength**
310+
311+
<details disabled>
312+
<summary tabindex="-1">Added in <code>next</code></summary>
313+
No other changes yet.
314+
</details>
315+
316+
```grain
317+
encodedLength : (encoding: Encoding, char: Char) => Number
318+
```
319+
320+
Returns the byte count of a character if encoded in the given encoding.
321+
322+
Parameters:
323+
324+
|param|type|description|
325+
|-----|----|-----------|
326+
|`encoding`|`Encoding`|The encoding to check|
327+
|`char`|`Char`|The character|
328+
329+
Returns:
330+
331+
|type|description|
332+
|----|-----------|
333+
|`Number`|The byte count of the character in the given encoding|
334+
335+
Examples:
336+
337+
```grain
338+
Char.encodedLength(Char.UTF8, 'a') == 1
339+
```
340+
341+
```grain
342+
Char.encodedLength(Char.UTF8, '🌾') == 4
343+
```
344+
345+
```grain
346+
Char.encodedLength(Char.UTF16, '©') == 1
347+
```
348+
288349
### Char.**(<)**
289350

290351
<details disabled>

0 commit comments

Comments
 (0)