Skip to content

Commit 58c9a51

Browse files
authored
feat(compiler)!: Change Char internal representation (#1622)
1 parent 43082f5 commit 58c9a51

File tree

11 files changed

+16
-17
lines changed

11 files changed

+16
-17
lines changed

compiler/src/codegen/compcore.re

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,7 +1728,7 @@ let allocate_bytes_uninitialized = (wasm_mod, env, size) => {
17281728
let create_char = (wasm_mod, env, char) => {
17291729
let uchar = List.hd @@ Utf8.decodeUtf8String(char);
17301730
let uchar_int: int = Utf8__Uchar.toInt(uchar);
1731-
let grain_char = uchar_int lsl 3 lor 0b010;
1731+
let grain_char = uchar_int lsl 8 lor 0b010;
17321732
Expression.Const.make(wasm_mod, const_int32(grain_char));
17331733
};
17341734

@@ -2342,7 +2342,7 @@ let compile_prim1 = (wasm_mod, env, p1, arg, loc): Expression.t => {
23422342
wasm_mod,
23432343
Op.shl_int32,
23442344
compiled_arg,
2345-
Expression.Const.make(wasm_mod, const_int32(0x3)),
2345+
Expression.Const.make(wasm_mod, const_int32(0x8)),
23462346
),
23472347
Expression.Const.make(wasm_mod, const_int32(0b10)),
23482348
)
@@ -2351,7 +2351,7 @@ let compile_prim1 = (wasm_mod, env, p1, arg, loc): Expression.t => {
23512351
wasm_mod,
23522352
Op.shr_s_int32,
23532353
compiled_arg,
2354-
Expression.Const.make(wasm_mod, const_int32(0x3)),
2354+
Expression.Const.make(wasm_mod, const_int32(0x8)),
23552355
)
23562356
| Not =>
23572357
/* Flip the first bit */

compiler/test/__snapshots__/chars.200d9e1a.0.snapshot

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ chars › char4
3333
(block $cleanup_locals.2 (result i32)
3434
(local.set $0
3535
(block $compile_block.1 (result i32)
36-
(i32.const 522)
36+
(i32.const 16642)
3737
)
3838
)
3939
(local.get $0)

compiler/test/__snapshots__/chars.259e330c.0.snapshot

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ chars › char2
3333
(block $cleanup_locals.2 (result i32)
3434
(local.set $0
3535
(block $compile_block.1 (result i32)
36-
(i32.const 522)
36+
(i32.const 16642)
3737
)
3838
)
3939
(local.get $0)

compiler/test/__snapshots__/chars.27fb7f30.0.snapshot

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ chars › char8
3333
(block $cleanup_locals.2 (result i32)
3434
(local.set $0
3535
(block $compile_block.1 (result i32)
36-
(i32.const 80194)
36+
(i32.const 2566146)
3737
)
3838
)
3939
(local.get $0)

compiler/test/__snapshots__/chars.51010573.0.snapshot

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ chars › char7
3333
(block $cleanup_locals.2 (result i32)
3434
(local.set $0
3535
(block $compile_block.1 (result i32)
36-
(i32.const 1022450)
36+
(i32.const 32718338)
3737
)
3838
)
3939
(local.get $0)

compiler/test/__snapshots__/chars.7e0f68db.0.snapshot

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ chars › char6
3333
(block $cleanup_locals.2 (result i32)
3434
(local.set $0
3535
(block $compile_block.1 (result i32)
36-
(i32.const 1025402)
36+
(i32.const 32812802)
3737
)
3838
)
3939
(local.get $0)

compiler/test/__snapshots__/chars.af4b3613.0.snapshot

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ chars › char5
3333
(block $cleanup_locals.2 (result i32)
3434
(local.set $0
3535
(block $compile_block.1 (result i32)
36-
(i32.const 522)
36+
(i32.const 16642)
3737
)
3838
)
3939
(local.get $0)

compiler/test/__snapshots__/chars.e1cac8cd.0.snapshot

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ chars › char3
3333
(block $cleanup_locals.2 (result i32)
3434
(local.set $0
3535
(block $compile_block.1 (result i32)
36-
(i32.const 522)
36+
(i32.const 16642)
3737
)
3838
)
3939
(local.get $0)

docs/contributor/data_representations.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ To avoid overwriting data, we shift simple numbers to the left by 1 bit, then se
3636

3737
### Chars
3838

39-
The Grain `Char` type is represented as a tagged Unicode scalar value. They're similar to simple numbers, though they use a full 3-bit tag, `0b010`. Unicode scalar values exist in the range 0x0-10FFFF, which means it only takes 21 bits to store a USV—that leaves plenty of room for our 3-bit tag, and we cover the full spectrum of USVs. To tag a USV, we shift the value left by 3 bits and set the last 3 bits to our tag value, `0b010`. This means that Grain chars are stored as `8n + 2`, where `n` is the USV. Just like numbers, there are some tricks we can do to avoid untagging and retagging when manipulating chars. For example, the successor of a char can be found by just adding 8: `8(n + 1) + 2` is `(8n + 2) + 8`.
39+
The Grain `Char` type is represented as a tagged Unicode scalar value. They're similar to simple numbers, though they use a full 3-bit tag, `0b010`. Unicode scalar values exist in the range 0x0-10FFFF, which means it only takes 21 bits to store a USV—that leaves plenty of room for our 3-bit tag, and we cover the full spectrum of USVs. To tag a USV, we shift the value left by 8 bits and set the last 3 bits to our tag value, `0b010`. This means that Grain chars are stored as `(2^8)n + 2`, where `n` is the USV. Just like numbers, there are some tricks we can do to avoid untagging and retagging when manipulating chars.
4040

4141
## Structure of Heap-Allocated Data
4242

stdlib/char.gr

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
import WasmI32 from "runtime/unsafe/wasmi32"
1212
import Errors from "runtime/unsafe/errors"
13-
import Tags from "runtime/unsafe/tags"
1413
import {
1514
tagSimpleNumber,
1615
tagChar,
@@ -92,10 +91,10 @@ export let fromCode = (usv: Number) => {
9291

9392
// Here we use a math trick to avoid fully untagging and retagging.
9493
// Simple numbers are represented as 2n + 1 and chars are represented as
95-
// 8n + 2. Quick reminder that shifting left is the equivalent of multiplying
96-
// by 2, and that _GRAIN_CHAR_TAG_TYPE is equal to 2:
97-
// 4(2n + 1) - 2 = 8n + 2
98-
let char = (usv << 2n) - Tags._GRAIN_CHAR_TAG_TYPE
94+
// (2^8)n + 2. Quick reminder that shifting left is the equivalent of multiplying
95+
// by 2
96+
// 2^7(2n + 1) - (2^7 - 2) = (2^8)n + 2
97+
let char = (usv << 7n) - 126n
9998

10099
WasmI32.toGrain(char): Char
101100
}

0 commit comments

Comments
 (0)