Skip to content

Commit 5eb408b

Browse files
committed
Merge remote-tracking branch 'origin/main' into investigate-9591-pr9592
2 parents 3b0bb69 + d66ca78 commit 5eb408b

62 files changed

Lines changed: 4717 additions & 673 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

build.zig

Lines changed: 132 additions & 31 deletions
Large diffs are not rendered by default.

ci/benchmarks_zig/run_fx_benchmarks.sh

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,69 @@ preflight_benchmark() {
228228
return 2
229229
}
230230

231+
# Determine whether `roc build <file>` produces a byte-identical executable for
232+
# the two binaries. A confirmed build slowdown whose output executable is
233+
# byte-identical is definitionally a false positive: the compiler produced the
234+
# same program, so the timing difference is measurement or binary-layout noise
235+
# (e.g. a larger compiler binary with slightly different code locality) rather
236+
# than real work.
237+
#
238+
# roc embeds nothing version-specific into the linked executable (the compiler
239+
# version only reaches the DWARF producer string in the intermediate bitcode,
240+
# which is stripped before linking), and its linker is deterministic, so two
241+
# compiler builds that do the same work emit byte-identical executables.
242+
#
243+
# Each binary builds in an isolated working directory and cache so their outputs
244+
# cannot clobber each other; --no-cache bypasses cache reads but still emits the
245+
# executable. Returns 0 only when both executables are present and byte-identical;
246+
# any uncertainty (a build failure or a missing executable) returns 1 so we fail
247+
# safe toward the normal slowdown failure.
248+
build_executable_output_identical() {
249+
local main_roc="$1"
250+
local pr_roc="$2"
251+
local fx_file="$3"
252+
local roc_extra_args="$4"
253+
254+
# roc resolves a platform's relative path against the source file's location
255+
# and writes the executable into the current directory, so we pass an
256+
# absolute source path and build from a scratch directory per binary.
257+
local abs_fx
258+
abs_fx="$(cd "$(dirname "$fx_file")" && pwd)/$(basename "$fx_file")" || return 1
259+
260+
local work
261+
work=$(mktemp -d) || return 1
262+
local main_dir="$work/main" pr_dir="$work/pr"
263+
mkdir -p "$main_dir" "$pr_dir"
264+
265+
local -a extra_arg_array=()
266+
if [ -n "$roc_extra_args" ]; then
267+
read -r -a extra_arg_array <<< "$roc_extra_args"
268+
fi
269+
# Guard the array expansion so an empty extra-args list does not trip
270+
# `set -u` on older bash (e.g. macOS's bash 3.2).
271+
if ! ( cd "$main_dir" && XDG_CACHE_HOME="$main_dir/.cache" "$main_roc" build "$abs_fx" --no-cache "${extra_arg_array[@]+"${extra_arg_array[@]}"}" >/dev/null 2>&1 ); then
272+
rm -rf "$work"
273+
return 1
274+
fi
275+
if ! ( cd "$pr_dir" && XDG_CACHE_HOME="$pr_dir/.cache" "$pr_roc" build "$abs_fx" --no-cache "${extra_arg_array[@]+"${extra_arg_array[@]}"}" >/dev/null 2>&1 ); then
276+
rm -rf "$work"
277+
return 1
278+
fi
279+
280+
local main_exe pr_exe
281+
main_exe=$(find "$main_dir" -maxdepth 1 -type f -perm -u+x | head -1)
282+
pr_exe=$(find "$pr_dir" -maxdepth 1 -type f -perm -u+x | head -1)
283+
if [ -z "$main_exe" ] || [ -z "$pr_exe" ]; then
284+
rm -rf "$work"
285+
return 1
286+
fi
287+
288+
local identical=1
289+
cmp -s "$main_exe" "$pr_exe" || identical=0
290+
rm -rf "$work"
291+
[ "$identical" -eq 1 ]
292+
}
293+
231294
# Run hyperfine benchmark and return percentage change via global variable
232295
# Returns 0 on success, 1 on failure
233296
# Sets BENCH_PCT_CHANGE on success
@@ -428,6 +491,16 @@ benchmark_file() {
428491
local confirm_is_slower
429492
confirm_is_slower=$(awk "BEGIN {print ($confirm_pct_change > 4 && $confirm_abs_delta_ms > 5) ? 1 : 0}")
430493
if [ "$confirm_is_slower" = "1" ]; then
494+
# A build slowdown whose output executable is byte-identical is a
495+
# definitional false positive (the same program cannot have cost
496+
# more to produce), so pass it without a human override.
497+
if [ "$roc_subcommand" = "build" ] && \
498+
build_executable_output_identical "$MAIN_ROC" "$PR_ROC" "$fx_file" "$roc_extra_args"; then
499+
echo " IDENTICAL OUTPUT: $display_name produces a byte-identical executable on both binaries; treating the timing difference as a false positive (measurement or binary-layout noise), not a regression."
500+
echo ""
501+
return 0
502+
fi
503+
431504
echo " SLOWER EXECUTION CONFIRMED in $display_name (${pct_change}% / ${abs_delta_ms} ms then ${confirm_pct_change}% / ${confirm_abs_delta_ms} ms)"
432505
SLOWER_DETECTED=1
433506
SLOWER_FILES+=("$display_name")

src/backend/dev/LirCodeGen.zig

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,16 @@ pub const BuiltinFn = enum {
268268
dec_from_str,
269269
float_from_str,
270270

271+
// Hasher operations
272+
dict_pseudo_seed,
273+
hasher_finish,
274+
hasher_write_u64,
275+
hasher_write_u128,
276+
hasher_write_f32_bits,
277+
hasher_write_f64_bits,
278+
hasher_write_bytes,
279+
hasher_write_str,
280+
271281
/// Get the exported symbol name for object file linking.
272282
pub fn symbolName(self: BuiltinFn) []const u8 {
273283
return switch (self) {
@@ -372,6 +382,16 @@ pub const BuiltinFn = enum {
372382
.int_from_str => "roc_builtins_int_from_str",
373383
.dec_from_str => "roc_builtins_dec_from_str",
374384
.float_from_str => "roc_builtins_float_from_str",
385+
386+
// Hasher operations
387+
.dict_pseudo_seed => "roc_builtins_dict_pseudo_seed",
388+
.hasher_finish => "roc_builtins_hasher_finish",
389+
.hasher_write_u64 => "roc_builtins_hasher_write_u64",
390+
.hasher_write_u128 => "roc_builtins_hasher_write_u128",
391+
.hasher_write_f32_bits => "roc_builtins_hasher_write_f32_bits",
392+
.hasher_write_f64_bits => "roc_builtins_hasher_write_f64_bits",
393+
.hasher_write_bytes => "roc_builtins_hasher_write_bytes",
394+
.hasher_write_str => "roc_builtins_hasher_write_str",
375395
};
376396
}
377397
};
@@ -3592,6 +3612,26 @@ pub fn LirCodeGen(comptime target: RocTarget) type {
35923612
return .{ .general_reg = result_reg };
35933613
},
35943614

3615+
.dict_pseudo_seed,
3616+
.hasher_finish,
3617+
.hasher_write_bool,
3618+
.hasher_write_u8,
3619+
.hasher_write_u16,
3620+
.hasher_write_u32,
3621+
.hasher_write_u64,
3622+
.hasher_write_u128,
3623+
.hasher_write_i8,
3624+
.hasher_write_i16,
3625+
.hasher_write_i32,
3626+
.hasher_write_i64,
3627+
.hasher_write_i128,
3628+
.hasher_write_f32,
3629+
.hasher_write_f64,
3630+
.hasher_write_dec,
3631+
.hasher_write_bytes,
3632+
.hasher_write_str,
3633+
=> return try self.generateHasherLowLevel(ll, args),
3634+
35953635
.u8_to_str => {
35963636
const value_loc = try self.emitValueLocal(args[0]);
35973637
return self.callIntToStr(value_loc, 1, false);
@@ -3953,6 +3993,196 @@ pub fn LirCodeGen(comptime target: RocTarget) type {
39533993
}
39543994
}
39553995

3996+
fn hasherDomain(op: lir.LowLevel) u8 {
3997+
return @intFromEnum(switch (op) {
3998+
.hasher_write_bool => builtins.hash.HasherDomain.bool,
3999+
.hasher_write_u8 => builtins.hash.HasherDomain.u8,
4000+
.hasher_write_u16 => builtins.hash.HasherDomain.u16,
4001+
.hasher_write_u32 => builtins.hash.HasherDomain.u32,
4002+
.hasher_write_u64 => builtins.hash.HasherDomain.u64,
4003+
.hasher_write_u128 => builtins.hash.HasherDomain.u128,
4004+
.hasher_write_i8 => builtins.hash.HasherDomain.i8,
4005+
.hasher_write_i16 => builtins.hash.HasherDomain.i16,
4006+
.hasher_write_i32 => builtins.hash.HasherDomain.i32,
4007+
.hasher_write_i64 => builtins.hash.HasherDomain.i64,
4008+
.hasher_write_i128 => builtins.hash.HasherDomain.i128,
4009+
.hasher_write_dec => builtins.hash.HasherDomain.dec,
4010+
.hasher_write_bytes => builtins.hash.HasherDomain.bytes,
4011+
else => unreachable,
4012+
});
4013+
}
4014+
4015+
fn hasherWidth(op: lir.LowLevel) u8 {
4016+
return switch (op) {
4017+
.hasher_write_bool,
4018+
.hasher_write_u8,
4019+
.hasher_write_i8,
4020+
=> 1,
4021+
.hasher_write_u16,
4022+
.hasher_write_i16,
4023+
=> 2,
4024+
.hasher_write_u32,
4025+
.hasher_write_i32,
4026+
=> 4,
4027+
.hasher_write_u64,
4028+
.hasher_write_i64,
4029+
=> 8,
4030+
else => unreachable,
4031+
};
4032+
}
4033+
4034+
fn hasherStateReg(self: *Self, local: LocalId) Allocator.Error!GeneralReg {
4035+
const loc = try self.emitValueLocal(local);
4036+
return switch (loc) {
4037+
.stack => |s| blk: {
4038+
const reg = try self.allocTempGeneral();
4039+
try self.codegen.emitLoadStack(.w64, reg, s.offset);
4040+
break :blk reg;
4041+
},
4042+
else => try self.ensureInGeneralReg(loc),
4043+
};
4044+
}
4045+
4046+
fn scalarRetReg(self: *Self) Allocator.Error!ValueLocation {
4047+
const result_reg = try self.allocTempGeneral();
4048+
try self.codegen.emit.movRegReg(.w64, result_reg, ret_reg_0);
4049+
return .{ .general_reg = result_reg };
4050+
}
4051+
4052+
fn callHasherWriteU64(
4053+
self: *Self,
4054+
seed_reg: GeneralReg,
4055+
value_reg: GeneralReg,
4056+
domain: u8,
4057+
width: u8,
4058+
) Allocator.Error!ValueLocation {
4059+
var builder = try Builder.init(&self.codegen.emit, &self.codegen.stack_offset);
4060+
try builder.addRegArg(seed_reg);
4061+
try builder.addImmArg(@intCast(domain));
4062+
try builder.addRegArg(value_reg);
4063+
try builder.addImmArg(@intCast(width));
4064+
try self.callBuiltin(&builder, @intFromPtr(&dev_wrappers.roc_builtins_hasher_write_u64), .hasher_write_u64);
4065+
self.codegen.freeGeneral(seed_reg);
4066+
if (value_reg != seed_reg) self.codegen.freeGeneral(value_reg);
4067+
return try self.scalarRetReg();
4068+
}
4069+
4070+
fn callHasherWriteBits(self: *Self, seed_reg: GeneralReg, bits_reg: GeneralReg, comptime is_f32: bool) Allocator.Error!ValueLocation {
4071+
var builder = try Builder.init(&self.codegen.emit, &self.codegen.stack_offset);
4072+
try builder.addRegArg(seed_reg);
4073+
try builder.addRegArg(bits_reg);
4074+
try self.callBuiltin(
4075+
&builder,
4076+
if (is_f32) @intFromPtr(&dev_wrappers.roc_builtins_hasher_write_f32_bits) else @intFromPtr(&dev_wrappers.roc_builtins_hasher_write_f64_bits),
4077+
if (is_f32) .hasher_write_f32_bits else .hasher_write_f64_bits,
4078+
);
4079+
self.codegen.freeGeneral(seed_reg);
4080+
if (bits_reg != seed_reg) self.codegen.freeGeneral(bits_reg);
4081+
return try self.scalarRetReg();
4082+
}
4083+
4084+
fn callHasherWriteU128(self: *Self, seed_reg: GeneralReg, parts: I128Parts, domain: u8) Allocator.Error!ValueLocation {
4085+
var builder = try Builder.init(&self.codegen.emit, &self.codegen.stack_offset);
4086+
try builder.addRegArg(seed_reg);
4087+
try builder.addImmArg(@intCast(domain));
4088+
try builder.addRegArg(parts.low);
4089+
try builder.addRegArg(parts.high);
4090+
try self.callBuiltin(&builder, @intFromPtr(&dev_wrappers.roc_builtins_hasher_write_u128), .hasher_write_u128);
4091+
self.codegen.freeGeneral(seed_reg);
4092+
self.codegen.freeGeneral(parts.low);
4093+
self.codegen.freeGeneral(parts.high);
4094+
return try self.scalarRetReg();
4095+
}
4096+
4097+
fn generateHasherLowLevel(self: *Self, ll: anytype, args: []const LocalId) Allocator.Error!ValueLocation {
4098+
switch (ll.op) {
4099+
.dict_pseudo_seed => {
4100+
if (args.len != 0) unreachable;
4101+
var builder = try Builder.init(&self.codegen.emit, &self.codegen.stack_offset);
4102+
try self.callBuiltin(&builder, @intFromPtr(&dev_wrappers.roc_builtins_dict_pseudo_seed), .dict_pseudo_seed);
4103+
return try self.scalarRetReg();
4104+
},
4105+
.hasher_finish => {
4106+
if (args.len != 1) unreachable;
4107+
const seed_reg = try self.hasherStateReg(args[0]);
4108+
var builder = try Builder.init(&self.codegen.emit, &self.codegen.stack_offset);
4109+
try builder.addRegArg(seed_reg);
4110+
try self.callBuiltin(&builder, @intFromPtr(&dev_wrappers.roc_builtins_hasher_finish), .hasher_finish);
4111+
self.codegen.freeGeneral(seed_reg);
4112+
return try self.scalarRetReg();
4113+
},
4114+
.hasher_write_bool,
4115+
.hasher_write_u8,
4116+
.hasher_write_u16,
4117+
.hasher_write_u32,
4118+
.hasher_write_u64,
4119+
.hasher_write_i8,
4120+
.hasher_write_i16,
4121+
.hasher_write_i32,
4122+
.hasher_write_i64,
4123+
=> {
4124+
if (args.len != 2) unreachable;
4125+
const seed_reg = try self.hasherStateReg(args[0]);
4126+
const value_loc = try self.emitValueLocal(args[1]);
4127+
const value_reg = try self.ensureInGeneralReg(value_loc);
4128+
return try self.callHasherWriteU64(seed_reg, value_reg, hasherDomain(ll.op), hasherWidth(ll.op));
4129+
},
4130+
.hasher_write_f32 => {
4131+
if (args.len != 2) unreachable;
4132+
const seed_reg = try self.hasherStateReg(args[0]);
4133+
const value_loc = try self.emitValueLocal(args[1]);
4134+
const bits_reg = try self.materializeF32BitsInGeneralReg(value_loc);
4135+
return try self.callHasherWriteBits(seed_reg, bits_reg, true);
4136+
},
4137+
.hasher_write_f64 => {
4138+
if (args.len != 2) unreachable;
4139+
const seed_reg = try self.hasherStateReg(args[0]);
4140+
const value_loc = try self.emitValueLocal(args[1]);
4141+
const bits_reg = try self.ensureInGeneralReg(value_loc);
4142+
return try self.callHasherWriteBits(seed_reg, bits_reg, false);
4143+
},
4144+
.hasher_write_u128,
4145+
.hasher_write_i128,
4146+
.hasher_write_dec,
4147+
=> {
4148+
if (args.len != 2) unreachable;
4149+
const seed_reg = try self.hasherStateReg(args[0]);
4150+
const value_loc = try self.emitValueLocal(args[1]);
4151+
const parts = try self.getI128Parts(value_loc, if (ll.op == .hasher_write_u128) .unsigned else .signed);
4152+
return try self.callHasherWriteU128(seed_reg, parts, hasherDomain(ll.op));
4153+
},
4154+
.hasher_write_bytes => {
4155+
if (args.len != 2) unreachable;
4156+
const seed_reg = try self.hasherStateReg(args[0]);
4157+
const list_loc = try self.emitValueLocal(args[1]);
4158+
const list_off = try self.ensureOnStack(list_loc, roc_list_size);
4159+
var builder = try Builder.init(&self.codegen.emit, &self.codegen.stack_offset);
4160+
try builder.addRegArg(seed_reg);
4161+
try builder.addImmArg(@intCast(@intFromEnum(builtins.hash.HasherDomain.bytes)));
4162+
try builder.addMemArg(frame_ptr, list_off);
4163+
try builder.addMemArg(frame_ptr, list_off + 8);
4164+
try self.callBuiltin(&builder, @intFromPtr(&dev_wrappers.roc_builtins_hasher_write_bytes), .hasher_write_bytes);
4165+
self.codegen.freeGeneral(seed_reg);
4166+
return try self.scalarRetReg();
4167+
},
4168+
.hasher_write_str => {
4169+
if (args.len != 2) unreachable;
4170+
const seed_reg = try self.hasherStateReg(args[0]);
4171+
const str_loc = try self.emitValueLocal(args[1]);
4172+
const str_off = try self.ensureOnStack(str_loc, roc_str_size);
4173+
var builder = try Builder.init(&self.codegen.emit, &self.codegen.stack_offset);
4174+
try builder.addRegArg(seed_reg);
4175+
try builder.addMemArg(frame_ptr, str_off);
4176+
try builder.addMemArg(frame_ptr, str_off + 16);
4177+
try builder.addMemArg(frame_ptr, str_off + 8);
4178+
try self.callBuiltin(&builder, @intFromPtr(&dev_wrappers.roc_builtins_hasher_write_str), .hasher_write_str);
4179+
self.codegen.freeGeneral(seed_reg);
4180+
return try self.scalarRetReg();
4181+
},
4182+
else => unreachable,
4183+
}
4184+
}
4185+
39564186
// ── Helper methods for calling C wrapper builtins ──
39574187

39584188
/// Call a C wrapper: fn(out, str_f0, str_f1, str_f2, roc_ops) -> void

0 commit comments

Comments
 (0)