Skip to content

Commit 925ab31

Browse files
committed
Merge remote-tracking branch 'origin/main' into version-resolution
# Conflicts: # src/cli/cli_args.zig
2 parents 3c22c88 + d66ca78 commit 925ab31

85 files changed

Lines changed: 11566 additions & 4691 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci_zig_nix.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ jobs:
3333
- uses: cachix/install-nix-action@02a151ada4993995686f9ed4f1be7cfbb229e56f # ratchet:cachix/install-nix-action@v31
3434
with:
3535
nix_path: nixpkgs=channel:nixos-25.05
36+
extra_nix_config: |
37+
min-free = 1073741824
38+
max-free = 5368709120
39+
# When free space drops below min-free (1 GB), nix GCs until max-free (5 GB) is available. This avoids "No space left on device" on CI.
3640
3741
- name: Build inside a nix dev shell
3842
uses: ./.github/actions/flaky-retry

build.zig

Lines changed: 177 additions & 31 deletions
Large diffs are not rendered by default.

ci/benchmarks_zig/run_fx_benchmarks.sh

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,69 @@ preflight_benchmark() {
228228
return 2
229229
}
230230

231+
# Determine whether `roc build <file>` produces a byte-identical executable for
232+
# the two binaries. A confirmed build slowdown whose output executable is
233+
# byte-identical is definitionally a false positive: the compiler produced the
234+
# same program, so the timing difference is measurement or binary-layout noise
235+
# (e.g. a larger compiler binary with slightly different code locality) rather
236+
# than real work.
237+
#
238+
# roc embeds nothing version-specific into the linked executable (the compiler
239+
# version only reaches the DWARF producer string in the intermediate bitcode,
240+
# which is stripped before linking), and its linker is deterministic, so two
241+
# compiler builds that do the same work emit byte-identical executables.
242+
#
243+
# Each binary builds in an isolated working directory and cache so their outputs
244+
# cannot clobber each other; --no-cache bypasses cache reads but still emits the
245+
# executable. Returns 0 only when both executables are present and byte-identical;
246+
# any uncertainty (a build failure or a missing executable) returns 1 so we fail
247+
# safe toward the normal slowdown failure.
248+
build_executable_output_identical() {
249+
local main_roc="$1"
250+
local pr_roc="$2"
251+
local fx_file="$3"
252+
local roc_extra_args="$4"
253+
254+
# roc resolves a platform's relative path against the source file's location
255+
# and writes the executable into the current directory, so we pass an
256+
# absolute source path and build from a scratch directory per binary.
257+
local abs_fx
258+
abs_fx="$(cd "$(dirname "$fx_file")" && pwd)/$(basename "$fx_file")" || return 1
259+
260+
local work
261+
work=$(mktemp -d) || return 1
262+
local main_dir="$work/main" pr_dir="$work/pr"
263+
mkdir -p "$main_dir" "$pr_dir"
264+
265+
local -a extra_arg_array=()
266+
if [ -n "$roc_extra_args" ]; then
267+
read -r -a extra_arg_array <<< "$roc_extra_args"
268+
fi
269+
# Guard the array expansion so an empty extra-args list does not trip
270+
# `set -u` on older bash (e.g. macOS's bash 3.2).
271+
if ! ( cd "$main_dir" && XDG_CACHE_HOME="$main_dir/.cache" "$main_roc" build "$abs_fx" --no-cache "${extra_arg_array[@]+"${extra_arg_array[@]}"}" >/dev/null 2>&1 ); then
272+
rm -rf "$work"
273+
return 1
274+
fi
275+
if ! ( cd "$pr_dir" && XDG_CACHE_HOME="$pr_dir/.cache" "$pr_roc" build "$abs_fx" --no-cache "${extra_arg_array[@]+"${extra_arg_array[@]}"}" >/dev/null 2>&1 ); then
276+
rm -rf "$work"
277+
return 1
278+
fi
279+
280+
local main_exe pr_exe
281+
main_exe=$(find "$main_dir" -maxdepth 1 -type f -perm -u+x | head -1)
282+
pr_exe=$(find "$pr_dir" -maxdepth 1 -type f -perm -u+x | head -1)
283+
if [ -z "$main_exe" ] || [ -z "$pr_exe" ]; then
284+
rm -rf "$work"
285+
return 1
286+
fi
287+
288+
local identical=1
289+
cmp -s "$main_exe" "$pr_exe" || identical=0
290+
rm -rf "$work"
291+
[ "$identical" -eq 1 ]
292+
}
293+
231294
# Run hyperfine benchmark and return percentage change via global variable
232295
# Returns 0 on success, 1 on failure
233296
# Sets BENCH_PCT_CHANGE on success
@@ -428,6 +491,16 @@ benchmark_file() {
428491
local confirm_is_slower
429492
confirm_is_slower=$(awk "BEGIN {print ($confirm_pct_change > 4 && $confirm_abs_delta_ms > 5) ? 1 : 0}")
430493
if [ "$confirm_is_slower" = "1" ]; then
494+
# A build slowdown whose output executable is byte-identical is a
495+
# definitional false positive (the same program cannot have cost
496+
# more to produce), so pass it without a human override.
497+
if [ "$roc_subcommand" = "build" ] && \
498+
build_executable_output_identical "$MAIN_ROC" "$PR_ROC" "$fx_file" "$roc_extra_args"; then
499+
echo " IDENTICAL OUTPUT: $display_name produces a byte-identical executable on both binaries; treating the timing difference as a false positive (measurement or binary-layout noise), not a regression."
500+
echo ""
501+
return 0
502+
fi
503+
431504
echo " SLOWER EXECUTION CONFIRMED in $display_name (${pct_change}% / ${abs_delta_ms} ms then ${confirm_pct_change}% / ${confirm_abs_delta_ms} ms)"
432505
SLOWER_DETECTED=1
433506
SLOWER_FILES+=("$display_name")

design.md

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,12 +1110,14 @@ that checked module. The same checked function template may therefore produce
11101110
many Monotype bodies, and the same checked nested lambda site may produce many
11111111
nested Monotype functions, each with a different monomorphic function type.
11121112

1113-
An instantiation context owns stage-local type cells addressed by
1114-
`(checked module id, checked type id)`. The address is the checked identity of
1115-
the type variable/content in the current specialization. It is not a structural
1116-
digest, source name, runtime layout, object symbol, or generated procedure id.
1117-
Cells begin unresolved. As the specialization is lowered, explicit evidence from
1118-
checked data constrains those cells:
1113+
Each specialization owns an instantiation graph: union-find nodes with
1114+
explicit row-extension links, created by instantiating checked types on first
1115+
touch. Instantiation contexts cache nodes by `(checked module id, checked type
1116+
id)`. The address is the checked identity of the type variable/content in the
1117+
current specialization. It is not a structural digest, source name, runtime
1118+
layout, object symbol, or generated procedure id. Nodes begin unresolved. As
1119+
the specialization is lowered, explicit evidence from checked data unifies
1120+
those nodes:
11191121

11201122
- the requested root function/value type constrains the checked root type;
11211123
- lambda and closure expected function types constrain the nested function
@@ -1222,8 +1224,8 @@ encounter the same checked type under better evidence and try to assign a
12221224
different Monotype type. That is not a valid compiler state; it is evidence that
12231225
the stage was not lowering from one constrained specialization graph. The
12241226
instantiation model makes the intended data flow explicit, so the first
1225-
constraint and every later constraint meet in the same cell before the final
1226-
Monotype body is emitted.
1227+
constraint and every later constraint meet in the same graph node before the
1228+
final Monotype body is emitted.
12271229

12281230
An unconstrained checked type variable that remains open after checking lowers
12291231
to the empty tag union in Monotype. This is not a default choice. It records the
@@ -1232,19 +1234,33 @@ can still be represented as `List([ ])` because they contain no elements, and
12321234
code that would need an actual element value must have constrained the element
12331235
type earlier or must be unreachable at runtime.
12341236

1235-
During Monotype construction, an open checked variable is represented by a
1236-
stage-local type cell. The cell starts as the empty tag union, and it may be
1237-
completed with a concrete type while the same Monotype body is still being
1238-
constructed if call-site arguments, expected lambda types, numeric literals, or
1239-
checked type relations provide concrete evidence. This is ordinary type solving
1240-
inside one stage. Once Monotype IR is output, no open cell remains and no
1241-
later stage may change a type.
1242-
1243-
Monotype type cells are addressed by the owning checked module id and the exact
1244-
checked type id. They are not addressed by `TypeDigest`. A digest can identify
1245-
closed structural type content for specialization and comparison, but it cannot
1246-
distinguish two different open checked variables with the same shape. Treating
1247-
those variables as the same cell is a compiler bug.
1237+
During Monotype construction, an open checked variable is an unresolved graph
1238+
node carrying the variable's numeric and row defaults. Unification resolves it
1239+
when call-site arguments, expected lambda types, numeric literals, or checked
1240+
type relations provide concrete evidence; defaults apply only at
1241+
materialization. A Monotype is a materialized view of a solved node: it is
1242+
reserved at a stable id and its content is refilled in place when its node
1243+
gains evidence, so every holder of the id observes the solved type. This is
1244+
ordinary type solving inside one stage. Once Monotype IR is output, no
1245+
unresolved node remains reachable and no later stage may change a type.
1246+
1247+
A Monotype imported into another specialization's graph is a finished
1248+
snapshot, never a refreshable view: a specialization that needs more than its
1249+
requested type is a unification conflict, not a silent rewrite of another
1250+
specialization's final type. Procedure template body requests therefore defer
1251+
to the end of the requesting specialization, when its types are final and the
1252+
specialization key is stable. Nested functions are the exception: they share
1253+
the requester's graph, and an inferred local procedure's body pins signature
1254+
variables the requester's remaining body relies on, so nested bodies lower at
1255+
their request site.
1256+
1257+
Instantiation graph nodes are cached by the owning checked module id and the
1258+
exact checked type id. They are not cached by `TypeDigest`. A digest can
1259+
identify closed structural type content for specialization and comparison, but
1260+
it cannot distinguish two different open checked variables with the same shape.
1261+
Treating those variables as the same node is a compiler bug. Type digests are
1262+
alias-transparent and encode recursive back references, so structurally equal
1263+
types digest equally regardless of alias spelling or knot-tying ids.
12481264

12491265
Generated helper code for an empty tag union, such as an inspector requested
12501266
only because a container type mentions the empty tag union, has an unreachable
@@ -2740,12 +2756,11 @@ that value kind must be added explicitly here with a checked cache rule.
27402756
- function values
27412757

27422758
Compile-time evaluation failures are owned by checking finalization because the
2743-
module has not been output yet. User-written compile-time crashes, exhausted
2744-
compile-time limits, invalid compile-time host interaction, and unsupported
2745-
compile-time operations become checking diagnostics attached to the checked root
2746-
being finalized. OOM remains OOM. A post-check invariant failure while lowering
2747-
or interpreting a compile-time root is still a compiler bug, not a user-facing
2748-
diagnostic.
2759+
module has not been output yet. User-written compile-time crashes, invalid
2760+
compile-time host interaction, and unsupported compile-time operations become
2761+
checking diagnostics attached to the checked root being finalized. OOM remains
2762+
OOM. A post-check invariant failure while lowering or interpreting a
2763+
compile-time root is still a compiler bug, not a user-facing diagnostic.
27492764

27502765
While storing an eval result, the builder may reserve a `ConstNodeId` before
27512766
storing its children so repeated references to the same acyclic runtime value

docs/langref/iterators.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Iterators

docs/langref/loops.md

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,37 @@ Loops let you run the same code multiple times, in...well, in a loop.
44

55
## `for` Loops
66

7-
A `for` loop iterates over each item in a list:
7+
A `for` loop lets you run code on each item in an [iterator](iterators). For example:
88

99
```roc
1010
var $sum = 0
11+
12+
for n in 1.to(5) {
13+
$sum = $sum + n
14+
}
15+
```
16+
17+
> The `to` method returns a [range](numbers#ranges) which is an `Iter` of the number in question. For example, [`I64.to`](builtins/I64#to) returns `Iter(I64)`, and so `n` in this example would be an `I64`.
18+
19+
A loop body only includes statements; it does not have a final expression. The loop itself evaluates to `{}`.
20+
21+
### Iterating over types that have `iter`
22+
23+
`for` can also be used on types that have an `iter` method, as long as that method returns an [`Iter`](builtins/Iter). For example, [`List`](builtins/list) has (`List.iter`)[builtins/List#iter], so you can do a `for` loop over a list:
24+
25+
```roc
26+
var $sum = 0
27+
1128
for n in [1, 2, 3, 4] {
1229
$sum = $sum + n
1330
}
1431
```
1532

16-
The item can be destructured inline:
33+
At runtime, this `[1, 2, 3, 4]` code snippet is exactly as efficient as the earlier `1.to(5)` one. In one case, `1.to(5)` will be evaluated to an `Iter` at compile time, and in the other, `[1, 2, 3, 4].iter()` will be evaluated at compile time to an identical `Iter`. By the time either program actually runs, they will have the same memory contents and will be executing the same instructions.
34+
35+
### Pattern matching in `for`
36+
37+
Whatever you put between `for` and `in` is treated as a [pattern](pattern-matching), meaning (for example) that the item can be destructured inline:
1738

1839
```roc
1940
var $total = 0
@@ -22,7 +43,7 @@ for (x, y) in [(1, 2), (3, 4), (5, 6)] {
2243
}
2344
```
2445

25-
Use `_` if you don't want to name the item:
46+
As usual, you can nest patterns as much as you like, and can use `_` if you don't want to name a pattern:
2647

2748
```roc
2849
var $count = 0
@@ -31,7 +52,16 @@ for _ in items {
3152
}
3253
```
3354

34-
A loop body only includes statements; it does not have a final expression. The loop itself evaluates to `{}`.
55+
Just like with [assignments](statements#assignments), the pattern you use here must be [exhaustive](pattern-matching#exhaustiveness). For example, the following would give an exhaustiveness error because the loop body couldn't know what value to use for `amount_to_add` if the item was ever `Err` at runtime:
56+
57+
```roc
58+
var $count = 0
59+
for Ok(amount_to_add) in items {
60+
$count = $count + amount_to_add
61+
}
62+
```
63+
64+
If you can't write an exhaustive pattern-match, you can name the entire iterator item and then use [`match`](pattern-matching#match) on it inside the loop body.
3565

3666
## `while` Loops
3767

0 commit comments

Comments
 (0)