Skip to content

Commit 4305e82

Browse files
feat(stdlib): Add forEachChar, forEachCharI, map and mapi to String module (#1864)
Co-authored-by: Oscar Spencer <oscar@grain-lang.org>
1 parent d822c87 commit 4305e82

File tree

3 files changed

+281
-2
lines changed

3 files changed

+281
-2
lines changed

compiler/test/stdlib/string.test.gr

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,39 @@ assert String.decode(bytes, String.UTF16_LE) == "¢"
477477
Array.mapi((c, i) => (c, i), codes)
478478
}
479479

480+
// char iteration tests
481+
// conveniently reusing data from `explode` tests
482+
{
483+
let mut tmp = []
484+
String.forEachChar(char => {
485+
tmp = [char, ...tmp]
486+
}, emojis)
487+
assert Array.reverse(Array.fromList(tmp)) == String.explode(emojis)
488+
}
489+
490+
{
491+
let mut tmp = []
492+
String.forEachChari((char, idx) => {
493+
tmp = [(char, idx), ...tmp]
494+
}, emojis)
495+
assert Array.reverse(Array.fromList(tmp)) ==
496+
Array.mapi((c, i) => (c, i), String.explode(emojis))
497+
}
498+
499+
// String.map
500+
assert String.map(c => 'a', "") == ""
501+
assert String.map(c => 'a', "Hello world") == "aaaaaaaaaaa"
502+
assert String.map(c => c, "Hello world") == "Hello world"
503+
504+
// String.mapi
505+
assert String.mapi((char, index) => String.charAt(0, toString(index)), "") == ""
506+
assert String.mapi(
507+
(char, index) => String.charAt(0, toString(index)),
508+
"Hello world"
509+
) ==
510+
"01234567891"
511+
assert String.mapi((char, index) => char, "Hello world") == "Hello world"
512+
480513
// String.trimStart
481514
assert String.trimStart("t test") == "t test"
482515
assert String.trimStart(" test") == "test"

stdlib/string.gr

Lines changed: 140 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1953,7 +1953,6 @@ provide let forEachCodePoint = (fn: Number => Void, str: String) => {
19531953
let mut ptr = strPtr + 8n
19541954
let end = ptr + byteSize
19551955

1956-
let mut idx = 0n
19571956
while (ptr < end) {
19581957
let byte = WasmI32.load8U(ptr, 0n)
19591958
let codePointByteCount = if ((byte & 0x80n) == 0x00n) {
@@ -1977,7 +1976,6 @@ provide let forEachCodePoint = (fn: Number => Void, str: String) => {
19771976
fn(tagSimpleNumber(codePoint))
19781977

19791978
ptr += codePointByteCount
1980-
idx += 1n
19811979
}
19821980

19831981
ignore(str)
@@ -2040,6 +2038,146 @@ provide let forEachCodePointi = (fn: (Number, Number) => Void, str: String) => {
20402038
void
20412039
}
20422040

2041+
/**
2042+
* Iterates over Unicode characters in a string.
2043+
*
2044+
* @param fn: The iterator function
2045+
* @param str: The string to iterate
2046+
*
2047+
* @example String.forEachChar(print, "Hello world")
2048+
*
2049+
* @since v0.6.5
2050+
*/
2051+
@unsafe
2052+
provide let forEachChar = (fn: Char => Void, str: String) => {
2053+
use WasmI32.{ (+), (-), (&), (>>>), ltU as (<), leU as (<=), (==) }
2054+
2055+
let strPtr = WasmI32.fromGrain(str)
2056+
2057+
let byteSize = WasmI32.load(strPtr, 4n)
2058+
2059+
let mut ptr = strPtr + 8n
2060+
let end = ptr + byteSize
2061+
2062+
while (ptr < end) {
2063+
let byte = WasmI32.load8U(ptr, 0n)
2064+
let codePointByteCount = if ((byte & 0x80n) == 0x00n) {
2065+
1n
2066+
} else if ((byte & 0xF0n) == 0xF0n) {
2067+
4n
2068+
} else if ((byte & 0xE0n) == 0xE0n) {
2069+
3n
2070+
} else {
2071+
2n
2072+
}
2073+
2074+
// Note that even if up to 4 bytes are needed to represent Unicode
2075+
// codepoints, this doesn't mean 32 bits. The highest allowed code point is
2076+
// 0x10FFFF and it should not change in future versions of Unicode. This
2077+
// means no more than 21 bits are necessary to represent a code point and
2078+
// thus we can use Grain's "simple" numbers that hold up to 31 bits and
2079+
// avoid heap allocations. `getCodePoint` will throw
2080+
// MalformedUnicode exception for values exceeding this limit.
2081+
let codePoint = getCodePoint(ptr)
2082+
fn(tagChar(codePoint))
2083+
2084+
ptr += codePointByteCount
2085+
}
2086+
void
2087+
}
2088+
2089+
/**
2090+
* Iterates over Unicode characters in a string. This is the same as
2091+
* `forEachChar`, but provides the characters's index in the string
2092+
* as the second argument to the iterator function.
2093+
*
2094+
* @param fn: The iterator function
2095+
* @param str: The string to iterate
2096+
*
2097+
* @example String.forEachChari((char, index) => print((char, index)), "Hello world")
2098+
*
2099+
* @since v0.6.5
2100+
*/
2101+
@unsafe
2102+
provide let forEachChari = (fn: (Char, Number) => Void, str: String) => {
2103+
use WasmI32.{ (+), (-), (&), (>>>), ltU as (<), leU as (<=), (==) }
2104+
2105+
let strPtr = WasmI32.fromGrain(str)
2106+
2107+
let byteSize = WasmI32.load(strPtr, 4n)
2108+
2109+
let mut ptr = strPtr + 8n
2110+
let end = ptr + byteSize
2111+
2112+
let mut idx = 0n
2113+
while (ptr < end) {
2114+
let byte = WasmI32.load8U(ptr, 0n)
2115+
let codePointByteCount = if ((byte & 0x80n) == 0x00n) {
2116+
1n
2117+
} else if ((byte & 0xF0n) == 0xF0n) {
2118+
4n
2119+
} else if ((byte & 0xE0n) == 0xE0n) {
2120+
3n
2121+
} else {
2122+
2n
2123+
}
2124+
2125+
// Note that even if up to 4 bytes are needed to represent Unicode
2126+
// codepoints, this doesn't mean 32 bits. The highest allowed code point is
2127+
// 0x10FFFF and it should not change in future versions of Unicode. This
2128+
// means no more than 21 bits are necessary to represent a code point and
2129+
// thus we can use Grain's "simple" numbers that hold up to 31 bits and
2130+
// avoid heap allocations. `getCodePoint` will throw
2131+
// MalformedUnicode exception for values exceeding this limit.
2132+
let codePoint = getCodePoint(ptr)
2133+
fn(tagChar(codePoint), tagSimpleNumber(idx))
2134+
2135+
ptr += codePointByteCount
2136+
idx += 1n
2137+
}
2138+
void
2139+
}
2140+
2141+
/**
2142+
* Builds a new string by mapping Unicode characters.
2143+
*
2144+
* @param fn: The mapping function
2145+
* @param str: The string to map
2146+
*
2147+
* @example assert String.map((c) => 'a', "Hello world") == "aaaaaaaaaaa"
2148+
*
2149+
* @since v0.6.5
2150+
*/
2151+
provide let map = (fn: Char => Char, str: String) => {
2152+
let chars = explode(str)
2153+
let arrLen = arrayLength(chars)
2154+
for (let mut i = 0; i < arrLen; i += 1) {
2155+
chars[i] = fn(chars[i])
2156+
}
2157+
implode(chars)
2158+
}
2159+
2160+
/**
2161+
* Builds a new string by mapping Unicode characters. This is the same as
2162+
* `mapChar`, but provides the characters's index in the string
2163+
* as the second argument to the mapping function.
2164+
*
2165+
* @param fn: The mapping function
2166+
* @param str: The string to map
2167+
*
2168+
* @example assert String.mapi((char, index) => String.charAt(0, toString(index)), "Hello world") == "01234567891"
2169+
*
2170+
* @since v0.6.5
2171+
*/
2172+
provide let mapi = (fn: (Char, Number) => Char, str: String) => {
2173+
let chars = explode(str)
2174+
let arrLen = arrayLength(chars)
2175+
for (let mut i = 0; i < arrLen; i += 1) {
2176+
chars[i] = fn(chars[i], i)
2177+
}
2178+
implode(chars)
2179+
}
2180+
20432181
@unsafe
20442182
let trimString = (stringPtr: WasmI32, byteLength: WasmI32, fromEnd: Bool) => {
20452183
use WasmI32.{ (+), (-), (*), (>>>), ltU as (<), (==), (!=) }

stdlib/string.md

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,114 @@ Examples:
911911
String.forEachCodePointi((codepoint, index) => print((codepoint, index)), "Hello world")
912912
```
913913

914+
### String.**forEachChar**
915+
916+
<details disabled>
917+
<summary tabindex="-1">Added in <code>next</code></summary>
918+
No other changes yet.
919+
</details>
920+
921+
```grain
922+
forEachChar : (fn: (Char => Void), str: String) => Void
923+
```
924+
925+
Iterates over Unicode characters in a string.
926+
927+
Parameters:
928+
929+
|param|type|description|
930+
|-----|----|-----------|
931+
|`fn`|`Char => Void`|The iterator function|
932+
|`str`|`String`|The string to iterate|
933+
934+
Examples:
935+
936+
```grain
937+
String.forEachChar(print, "Hello world")
938+
```
939+
940+
### String.**forEachChari**
941+
942+
<details disabled>
943+
<summary tabindex="-1">Added in <code>next</code></summary>
944+
No other changes yet.
945+
</details>
946+
947+
```grain
948+
forEachChari : (fn: ((Char, Number) => Void), str: String) => Void
949+
```
950+
951+
Iterates over Unicode characters in a string. This is the same as
952+
`forEachChar`, but provides the characters's index in the string
953+
as the second argument to the iterator function.
954+
955+
Parameters:
956+
957+
|param|type|description|
958+
|-----|----|-----------|
959+
|`fn`|`(Char, Number) => Void`|The iterator function|
960+
|`str`|`String`|The string to iterate|
961+
962+
Examples:
963+
964+
```grain
965+
String.forEachChari((char, index) => print((char, index)), "Hello world")
966+
```
967+
968+
### String.**map**
969+
970+
<details disabled>
971+
<summary tabindex="-1">Added in <code>next</code></summary>
972+
No other changes yet.
973+
</details>
974+
975+
```grain
976+
map : (fn: (Char => Char), str: String) => String
977+
```
978+
979+
Builds a new string by mapping Unicode characters.
980+
981+
Parameters:
982+
983+
|param|type|description|
984+
|-----|----|-----------|
985+
|`fn`|`Char => Char`|The mapping function|
986+
|`str`|`String`|The string to map|
987+
988+
Examples:
989+
990+
```grain
991+
assert String.map((c) => 'a', "Hello world") == "aaaaaaaaaaa"
992+
```
993+
994+
### String.**mapi**
995+
996+
<details disabled>
997+
<summary tabindex="-1">Added in <code>next</code></summary>
998+
No other changes yet.
999+
</details>
1000+
1001+
```grain
1002+
mapi : (fn: ((Char, Number) => Char), str: String) => String
1003+
```
1004+
1005+
Builds a new string by mapping Unicode characters. This is the same as
1006+
`mapChar`, but provides the characters's index in the string
1007+
as the second argument to the mapping function.
1008+
1009+
Parameters:
1010+
1011+
|param|type|description|
1012+
|-----|----|-----------|
1013+
|`fn`|`(Char, Number) => Char`|The mapping function|
1014+
|`str`|`String`|The string to map|
1015+
1016+
Examples:
1017+
1018+
```grain
1019+
assert String.mapi((char, index) => String.charAt(0, toString(index)), "Hello world") == "01234567891"
1020+
```
1021+
9141022
### String.**trimStart**
9151023

9161024
<details disabled>

0 commit comments

Comments
 (0)