diff --git a/vlib/v2/gen/arm64/arm64.v b/vlib/v2/gen/arm64/arm64.v index 46985429b4830a..6c6ee56399df4f 100644 --- a/vlib/v2/gen/arm64/arm64.v +++ b/vlib/v2/gen/arm64/arm64.v @@ -134,7 +134,7 @@ fn (mut g Gen) gen_func(func mir.Function) { // This is needed for functions like __v_init_consts that are called but have no body g.curr_offset = g.macho.text_data.len sym_name := '_' + func.name - g.macho.add_symbol(sym_name, u64(g.curr_offset), false, 1) + g.macho.add_symbol(sym_name, u64(g.curr_offset), true, 1) g.emit(0xd65f03c0) // ret return } diff --git a/vlib/v2/gen/arm64/linker.v b/vlib/v2/gen/arm64/linker.v index c4912889c14cba..9e6ee027ae4b48 100644 --- a/vlib/v2/gen/arm64/linker.v +++ b/vlib/v2/gen/arm64/linker.v @@ -50,15 +50,61 @@ const bind_opcode_do_bind = 0x90 const bind_type_pointer = 1 const bind_symbol_flags_weak_import = 0x01 -// Libc symbols that should ALWAYS resolve to the external system library, -// never to local V wrappers. This prevents infinite recursion where -// V's malloc() wrapper calls C.malloc() which would otherwise resolve -// back to the V wrapper. -const force_external_syms = ['_malloc', '_free', '_calloc', '_realloc', '_exit', '_abort', '_memcpy', - '_memmove', '_memset', '_memcmp', '___stdoutp', '___stderrp', '_puts', '_printf', '_write', - '_read', '_open', '_close', '_fwrite', '_fflush', '_fopen', '_fclose', '_putchar', '_sprintf', - '_snprintf', '_fprintf', '_sscanf', '_mmap', '_munmap', '_getcwd', '_access', '_readlink', - '_getenv', '_strlen'] +// C library symbols that should resolve to the external system library via GOT/stubs. +// This is the exhaustive whitelist of all C functions referenced by V's builtins. +// Only symbols in this list are allowed through GOT/stubs — all other undefined +// symbols are rejected to prevent V-mangled names from leaking into dyld binding. +const force_external_syms = [ + // Memory allocation + '_malloc', '_free', '_calloc', '_realloc', + // Memory operations + '_memcpy', '_memmove', '_memset', '_memcmp', '_memchr', '_mprotect', + // Memory mapping + '_mmap', '_munmap', + // String operations + '_strlen', '_strcmp', '_strncmp', '_strchr', '_strrchr', '_strdup', '_strerror', + '_strcpy', '_strncpy', '_strcat', '_strncat', '_strstr', + '_strcasecmp', '_strncasecmp', '_strtol', '_strtod', '_strtoul', '_atoi', '_atof', + // Character classification + '_tolower', '_toupper', '_isdigit', '_isspace', + // Standard I/O + '_printf', '_fprintf', '_sprintf', '_snprintf', '_sscanf', '_dprintf', + '_puts', '_fputs', '_fflush', '_putchar', '_getchar', + '_fopen', '_fclose', '_fread', '_fwrite', '_fseek', '_ftell', '_fgets', '_rewind', + '_fileno', '_setbuf', '_popen', '_pclose', + // File descriptor I/O + '_open', '_close', '_read', '_write', '_pipe', '_dup2', '_isatty', '_fcntl', '_ioctl', + // File system + '_stat', '_lstat', '_mkdir', '_rmdir', '_remove', '_rename', '_chdir', '_getcwd', + '_access', '_readlink', '_chmod', '_opendir', '_closedir', '_readdir', '_realpath', '_unlink', + // Process management + '_exit', '_abort', '__exit', '_fork', '_waitpid', '_execve', '_execvp', '_getpid', '_kill', + '_system', '_atexit', + // Signal handling + '_signal', '_raise', '_sigaction', '_sigemptyset', + // Environment + '_getenv', '_setenv', '_unsetenv', + // Time + '_gettimeofday', '_sleep', '_usleep', '_mktime', + // System + '_sysconf', '_sysctl', '_sysctlbyname', '_qsort', '_rand', '_srand', + // Stdio globals + '___stdoutp', '___stderrp', '___stdinp', + // macOS Grand Central Dispatch + '_dispatch_semaphore_create', '_dispatch_semaphore_signal', '_dispatch_semaphore_wait', + '_dispatch_time', '_dispatch_release', + // POSIX threads + '_pthread_self', '_pthread_mutex_init', '_pthread_mutex_lock', '_pthread_mutex_unlock', + '_pthread_mutex_destroy', '_pthread_rwlock_init', '_pthread_rwlock_rdlock', + '_pthread_rwlock_wrlock', '_pthread_rwlock_unlock', '_pthread_rwlockattr_init', + '_pthread_rwlockattr_setpshared', '_pthread_condattr_init', '_pthread_condattr_setpshared', + '_pthread_condattr_destroy', '_pthread_cond_init', '_pthread_cond_signal', + '_pthread_cond_wait', '_pthread_cond_timedwait', '_pthread_cond_destroy', + // Backtrace + '_backtrace', '_backtrace_symbols', '_backtrace_symbols_fd', + // macOS specific + '_proc_pidpath', +] pub struct Linker { macho &MachOObject @@ -121,23 +167,8 @@ pub fn (mut l Linker) link(output_path string, entry_name string) { } } - // Second pass: collect truly external symbols (undefined and not locally defined) - for sym in l.macho.symbols { - if sym.type_ == 0x01 { // N_UNDF | N_EXT - if sym.name !in defined_syms && sym.name !in l.extern_syms { - // Skip internal V symbols (contain '__' = V name mangling) - // Only system library symbols (libc) should go through GOT/stubs - if sym.name.contains('__') && sym.name !in force_external_syms { - continue - } - l.extern_syms << sym.name - l.sym_to_got[sym.name] = l.extern_syms.len - 1 - } - } - } - - // Add force_external symbols that are referenced (either as undefined OR defined locally) - // These need stubs so that internal calls go to libc, not to local V wrappers + // Second pass: collect external symbols — ONLY allow force_external_syms through GOT/stubs. + // This prevents V-mangled names from leaking into dyld binding. for sym in l.macho.symbols { if sym.name in force_external_syms && sym.name !in l.extern_syms { l.extern_syms << sym.name diff --git a/vlib/v2/gen/x64/asm.v b/vlib/v2/gen/x64/asm.v index e85b2682f9196d..be78294bad4204 100644 --- a/vlib/v2/gen/x64/asm.v +++ b/vlib/v2/gen/x64/asm.v @@ -512,6 +512,313 @@ fn asm_call_r10(mut g Gen) { g.emit(0xD2) // ModRM: call *r10 } +// === Sized Loads === + +// movzx rax, byte [rcx] (load 1 byte, zero-extend to 64-bit) +fn asm_movzx_rax_byte_mem_rcx(mut g Gen) { + g.emit(0x48) + g.emit(0x0F) + g.emit(0xB6) + g.emit(0x01) +} + +// movzx rax, word [rcx] (load 2 bytes, zero-extend to 64-bit) +fn asm_movzx_rax_word_mem_rcx(mut g Gen) { + g.emit(0x48) + g.emit(0x0F) + g.emit(0xB7) + g.emit(0x01) +} + +// mov eax, [rcx] (load 4 bytes, zero-extend to 64-bit) +fn asm_mov_eax_mem_rcx(mut g Gen) { + g.emit(0x8B) + g.emit(0x01) +} + +// === Load reg from [base + disp] === + +// mov reg, [base + disp] (generic, handles REX for any reg/base pair) +fn asm_load_reg_base_disp(mut g Gen, reg Reg, base Reg, disp int) { + reg_hw := g.map_reg(int(reg)) + base_hw := g.map_reg(int(base)) + mut rex := u8(0x48) + if reg_hw >= 8 { + rex |= 4 // REX.R + } + if base_hw >= 8 { + rex |= 1 // REX.B + } + g.emit(rex) + g.emit(0x8B) + + rm := base_hw & 7 + reg_bits := reg_hw & 7 + needs_sib := rm == 4 + + if disp == 0 && rm != 5 { + g.emit(reg_bits << 3 | rm) + if needs_sib { + g.emit(0x24) + } + } else if disp >= -128 && disp <= 127 { + g.emit(0x40 | (reg_bits << 3) | rm) + if needs_sib { + g.emit(0x24) + } + g.emit(u8(disp)) + } else { + g.emit(0x80 | (reg_bits << 3) | rm) + if needs_sib { + g.emit(0x24) + } + g.emit_u32(u32(disp)) + } +} + +// mov [base + disp], reg (generic store, handles REX for any reg/base pair) +fn asm_store_base_disp_reg(mut g Gen, base Reg, disp int, reg Reg) { + reg_hw := g.map_reg(int(reg)) + base_hw := g.map_reg(int(base)) + mut rex := u8(0x48) + if reg_hw >= 8 { + rex |= 4 // REX.R + } + if base_hw >= 8 { + rex |= 1 // REX.B + } + g.emit(rex) + g.emit(0x89) + + rm := base_hw & 7 + reg_bits := reg_hw & 7 + needs_sib := rm == 4 + + if disp == 0 && rm != 5 { + g.emit(reg_bits << 3 | rm) + if needs_sib { + g.emit(0x24) + } + } else if disp >= -128 && disp <= 127 { + g.emit(0x40 | (reg_bits << 3) | rm) + if needs_sib { + g.emit(0x24) + } + g.emit(u8(disp)) + } else { + g.emit(0x80 | (reg_bits << 3) | rm) + if needs_sib { + g.emit(0x24) + } + g.emit_u32(u32(disp)) + } +} + +// lea reg, [rbp + disp] (generic, handles REX) +fn asm_lea_reg_rbp_disp(mut g Gen, reg Reg, disp int) { + hw_reg := g.map_reg(int(reg)) + mut rex := u8(0x48) + if hw_reg >= 8 { + rex |= 4 // REX.R + } + g.emit(rex) + g.emit(0x8D) + if disp >= -128 && disp <= 127 { + g.emit(0x45 | ((hw_reg & 7) << 3)) // ModRM 01 = disp8 + g.emit(u8(disp)) + } else { + g.emit(0x85 | ((hw_reg & 7) << 3)) // ModRM 10 = disp32 + g.emit_u32(u32(disp)) + } +} + +// === Shift by immediate === + +// shr rax, imm8 +fn asm_shr_rax_imm8(mut g Gen, imm u8) { + g.emit(0x48) + g.emit(0xC1) + g.emit(0xE8) // /5 for SHR + g.emit(imm) +} + +// === Bitwise with immediate === + +// and rax, imm32 (sign-extended) +fn asm_and_rax_imm32(mut g Gen, imm u32) { + g.emit(0x48) + g.emit(0x25) + g.emit_u32(imm) +} + +// === Sign/Zero Extension === + +// movsx rax, al (sign-extend byte to qword) +fn asm_movsx_rax_al(mut g Gen) { + g.emit(0x48) + g.emit(0x0F) + g.emit(0xBE) + g.emit(0xC0) +} + +// movsx rax, ax (sign-extend word to qword) +fn asm_movsx_rax_ax(mut g Gen) { + g.emit(0x48) + g.emit(0x0F) + g.emit(0xBF) + g.emit(0xC0) +} + +// movsxd rax, eax (sign-extend dword to qword) +fn asm_movsxd_rax_eax(mut g Gen) { + g.emit(0x48) + g.emit(0x63) + g.emit(0xC0) +} + +// movzx rax, al (zero-extend byte to qword) - already in setcc, but standalone +fn asm_movzx_rax_al(mut g Gen) { + g.emit(0x48) + g.emit(0x0F) + g.emit(0xB6) + g.emit(0xC0) +} + +// movzx rax, ax (zero-extend word to qword) +fn asm_movzx_rax_ax(mut g Gen) { + g.emit(0x48) + g.emit(0x0F) + g.emit(0xB7) + g.emit(0xC0) +} + +// mov eax, eax (zero-extend dword to qword - implicitly zeros upper 32 bits) +fn asm_mov_eax_eax(mut g Gen) { + g.emit(0x89) + g.emit(0xC0) +} + +// === SSE2 Floating-Point === + +// movq xmm0, rax +fn asm_movq_xmm0_rax(mut g Gen) { + g.emit(0x66) + g.emit(0x48) + g.emit(0x0F) + g.emit(0x6E) + g.emit(0xC0) +} + +// movq rax, xmm0 +fn asm_movq_rax_xmm0(mut g Gen) { + g.emit(0x66) + g.emit(0x48) + g.emit(0x0F) + g.emit(0x7E) + g.emit(0xC0) +} + +// movq xmm1, rcx +fn asm_movq_xmm1_rcx(mut g Gen) { + g.emit(0x66) + g.emit(0x48) + g.emit(0x0F) + g.emit(0x6E) + g.emit(0xC9) +} + +// addsd xmm0, xmm1 +fn asm_addsd_xmm0_xmm1(mut g Gen) { + g.emit(0xF2) + g.emit(0x0F) + g.emit(0x58) + g.emit(0xC1) +} + +// subsd xmm0, xmm1 +fn asm_subsd_xmm0_xmm1(mut g Gen) { + g.emit(0xF2) + g.emit(0x0F) + g.emit(0x5C) + g.emit(0xC1) +} + +// mulsd xmm0, xmm1 +fn asm_mulsd_xmm0_xmm1(mut g Gen) { + g.emit(0xF2) + g.emit(0x0F) + g.emit(0x59) + g.emit(0xC1) +} + +// divsd xmm0, xmm1 +fn asm_divsd_xmm0_xmm1(mut g Gen) { + g.emit(0xF2) + g.emit(0x0F) + g.emit(0x5E) + g.emit(0xC1) +} + +// movsd xmm2, xmm0 +fn asm_movsd_xmm2_xmm0(mut g Gen) { + g.emit(0xF2) + g.emit(0x0F) + g.emit(0x10) + g.emit(0xD0) +} + +// divsd xmm2, xmm1 +fn asm_divsd_xmm2_xmm1(mut g Gen) { + g.emit(0xF2) + g.emit(0x0F) + g.emit(0x5E) + g.emit(0xD1) +} + +// roundsd xmm2, xmm2, 3 (truncate toward zero) +fn asm_roundsd_xmm2_xmm2_trunc(mut g Gen) { + g.emit(0x66) + g.emit(0x0F) + g.emit(0x3A) + g.emit(0x0B) + g.emit(0xD2) + g.emit(0x03) // truncate mode +} + +// mulsd xmm2, xmm1 +fn asm_mulsd_xmm2_xmm1(mut g Gen) { + g.emit(0xF2) + g.emit(0x0F) + g.emit(0x59) + g.emit(0xD1) +} + +// subsd xmm0, xmm2 +fn asm_subsd_xmm0_xmm2(mut g Gen) { + g.emit(0xF2) + g.emit(0x0F) + g.emit(0x5C) + g.emit(0xC2) +} + +// cvttsd2si rax, xmm0 (double to signed int64, truncate) +fn asm_cvttsd2si_rax_xmm0(mut g Gen) { + g.emit(0xF2) + g.emit(0x48) + g.emit(0x0F) + g.emit(0x2C) + g.emit(0xC0) +} + +// cvtsi2sd xmm0, rax (signed int64 to double) +fn asm_cvtsi2sd_xmm0_rax(mut g Gen) { + g.emit(0xF2) + g.emit(0x48) + g.emit(0x0F) + g.emit(0x2A) + g.emit(0xC0) +} + // === Special === // ud2 (undefined instruction - trap) diff --git a/vlib/v2/gen/x64/x64.v b/vlib/v2/gen/x64/x64.v index a1287aa8fde8e2..7ad7e85d0afcf3 100644 --- a/vlib/v2/gen/x64/x64.v +++ b/vlib/v2/gen/x64/x64.v @@ -74,6 +74,14 @@ pub fn (mut g Gen) gen() { } fn (mut g Gen) gen_func(func mir.Function) { + if func.blocks.len == 0 { + // Emit a minimal stub: just a ret instruction + // This is needed for functions like __v_init_consts that are called but have no body + g.curr_offset = g.elf.text_data.len + g.elf.add_symbol(func.name, u64(g.curr_offset), true, 1) + g.emit(0xc3) // ret + return + } g.curr_offset = g.elf.text_data.len g.stack_map = map[int]int{} g.alloca_offsets = map[int]int{} @@ -595,12 +603,413 @@ fn (mut g Gen) gen_instr(val_id int) { g.load_val_to_reg(0, src_id) g.store_reg_to_val(0, dest_id) } - .bitcast { + .bitcast, .trunc, .sext, .zext { + // For x64: all registers are 64-bit, so integer type conversions + // are mostly just copies. Truncation uses the lower bits naturally. + // Sign/zero extension matters for 8/16/32 bit values. if instr.operands.len > 0 { g.load_val_to_reg(0, instr.operands[0]) + if op == .sext { + // Sign-extend based on source type width + src_typ := g.mod.values[instr.operands[0]].typ + src_size := g.type_size(src_typ) + match src_size { + 1 { asm_movsx_rax_al(mut g) } + 2 { asm_movsx_rax_ax(mut g) } + 4 { asm_movsxd_rax_eax(mut g) } + else {} + } + } else if op == .zext { + src_typ := g.mod.values[instr.operands[0]].typ + src_size := g.type_size(src_typ) + match src_size { + 1 { asm_movzx_rax_al(mut g) } + 2 { asm_movzx_rax_ax(mut g) } + 4 { asm_mov_eax_eax(mut g) } + else {} + } + } else if op == .trunc { + // Truncation: mask to target width + dst_size := g.type_size(g.mod.values[val_id].typ) + match dst_size { + 1 { asm_and_rax_imm32(mut g, 0xFF) } + 2 { asm_and_rax_imm32(mut g, 0xFFFF) } + 4 { asm_mov_eax_eax(mut g) } + else {} + } + } g.store_reg_to_val(0, val_id) } } + .fadd, .fsub, .fmul, .fdiv, .frem { + // Floating-point operations using SSE2 + g.load_val_to_reg(0, instr.operands[0]) // LHS bits -> RAX + asm_movq_xmm0_rax(mut g) // RAX -> xmm0 + g.load_val_to_reg(1, instr.operands[1]) // RHS bits -> RCX + asm_movq_xmm1_rcx(mut g) // RCX -> xmm1 + + match op { + .fadd { asm_addsd_xmm0_xmm1(mut g) } + .fsub { asm_subsd_xmm0_xmm1(mut g) } + .fmul { asm_mulsd_xmm0_xmm1(mut g) } + .fdiv { asm_divsd_xmm0_xmm1(mut g) } + .frem { + // xmm0 = xmm0 - trunc(xmm0/xmm1) * xmm1 + asm_movsd_xmm2_xmm0(mut g) + asm_divsd_xmm2_xmm1(mut g) + asm_roundsd_xmm2_xmm2_trunc(mut g) + asm_mulsd_xmm2_xmm1(mut g) + asm_subsd_xmm0_xmm2(mut g) + } + else {} + } + + asm_movq_rax_xmm0(mut g) // xmm0 -> RAX + g.store_reg_to_val(0, val_id) + } + .fptosi { + // Float to signed integer conversion + g.load_val_to_reg(0, instr.operands[0]) + asm_movq_xmm0_rax(mut g) + asm_cvttsd2si_rax_xmm0(mut g) + g.store_reg_to_val(0, val_id) + } + .sitofp { + // Signed integer to float conversion + g.load_val_to_reg(0, instr.operands[0]) + asm_cvtsi2sd_xmm0_rax(mut g) + asm_movq_rax_xmm0(mut g) + g.store_reg_to_val(0, val_id) + } + .fptoui, .uitofp { + // For now, treat same as signed versions + g.load_val_to_reg(0, instr.operands[0]) + g.store_reg_to_val(0, val_id) + } + .inline_string_init { + // Create string struct by value: { str, len, is_lit } + // operands: [str_ptr, len, is_lit] + str_ptr_id := instr.operands[0] + len_id := instr.operands[1] + is_lit_id := instr.operands[2] + + // stack_map[val_id] points to the 8-byte pointer slot. + // The 24-byte struct data lives right above it (at +8). + base_offset := g.stack_map[val_id] + struct_offset := base_offset + 8 + + // Store str field (offset 0) + g.load_val_to_reg(0, str_ptr_id) + asm_store_rbp_disp_reg(mut g, struct_offset, rax) + + // Store len field (offset 8) + g.load_val_to_reg(1, len_id) + asm_store_rbp_disp_reg(mut g, struct_offset + 8, rcx) + + // Store is_lit field (offset 16) + g.load_val_to_reg(0, is_lit_id) + asm_store_rbp_disp_reg(mut g, struct_offset + 16, rax) + + // Return pointer to struct + asm_lea_reg_rbp_disp(mut g, rax, struct_offset) + g.store_reg_to_val(0, val_id) + } + .extractvalue { + // Extract element from tuple/struct + // operands: [tuple_val, index] + tuple_id := instr.operands[0] + idx_val := g.mod.values[instr.operands[1]] + idx := idx_val.name.int() + tuple_val := g.mod.values[tuple_id] + mut tuple_is_large_agg := false + mut field_byte_off := idx * 8 + mut field_elem_size := 8 + if tuple_val.typ > 0 && tuple_val.typ < g.mod.type_store.types.len { + tuple_typ := g.mod.type_store.types[tuple_val.typ] + tuple_is_large_agg = g.type_size(tuple_val.typ) > 16 + && tuple_typ.kind in [.struct_t, .array_t] + if tuple_typ.kind == .struct_t && idx >= 0 { + field_byte_off = g.struct_field_offset_bytes(tuple_val.typ, idx) + if idx < tuple_typ.fields.len { + field_elem_size = g.type_size(tuple_typ.fields[idx]) + if field_elem_size <= 0 { + field_elem_size = 8 + } + } + } + } + + // Get tuple's stack location and load from offset + if tuple_offset := g.stack_map[tuple_id] { + if tuple_is_large_agg && idx >= 0 + && g.large_aggregate_stack_value_is_pointer(tuple_id) { + // Load pointer to aggregate, then load field from it + asm_load_reg_rbp_disp(mut g, rcx, tuple_offset) + if field_elem_size > 8 { + // Multi-word struct field: copy all words + if dst_offset := g.stack_map[val_id] { + num_words := (field_elem_size + 7) / 8 + for w in 0 .. num_words { + asm_load_reg_base_disp(mut g, rax, rcx, field_byte_off + w * 8) + asm_store_rbp_disp_reg(mut g, dst_offset + w * 8, rax) + } + } else { + asm_load_reg_base_disp(mut g, rax, rcx, field_byte_off) + g.store_reg_to_val(0, val_id) + } + } else { + asm_load_reg_base_disp(mut g, rax, rcx, field_byte_off) + g.store_reg_to_val(0, val_id) + } + } else if field_elem_size > 8 { + // Multi-word struct field stored inline + if dst_offset := g.stack_map[val_id] { + src_offset := tuple_offset + field_byte_off + num_words := (field_elem_size + 7) / 8 + for w in 0 .. num_words { + asm_load_reg_rbp_disp(mut g, rax, src_offset + w * 8) + asm_store_rbp_disp_reg(mut g, dst_offset + w * 8, rax) + } + } else { + field_offset := tuple_offset + field_byte_off + asm_load_reg_rbp_disp(mut g, rax, field_offset) + g.store_reg_to_val(0, val_id) + } + } else if field_elem_size in [1, 2, 4] { + // Use sized load to avoid reading adjacent packed fields + field_offset := tuple_offset + field_byte_off + asm_lea_reg_rbp_disp(mut g, rcx, field_offset) + match field_elem_size { + 1 { asm_movzx_rax_byte_mem_rcx(mut g) } + 2 { asm_movzx_rax_word_mem_rcx(mut g) } + 4 { asm_mov_eax_mem_rcx(mut g) } + else {} + } + g.store_reg_to_val(0, val_id) + } else { + field_offset := tuple_offset + field_byte_off + asm_load_reg_rbp_disp(mut g, rax, field_offset) + g.store_reg_to_val(0, val_id) + } + } else if reg := g.reg_map[tuple_id] { + // Register-allocated tuple + if tuple_is_large_agg && idx >= 0 { + if field_elem_size > 8 { + if dst_offset := g.stack_map[val_id] { + num_words := (field_elem_size + 7) / 8 + for w in 0 .. num_words { + asm_load_reg_base_disp(mut g, rax, Reg(reg), field_byte_off + w * 8) + asm_store_rbp_disp_reg(mut g, dst_offset + w * 8, rax) + } + } else { + asm_load_reg_base_disp(mut g, rax, Reg(reg), field_byte_off) + g.store_reg_to_val(0, val_id) + } + } else { + asm_load_reg_base_disp(mut g, rax, Reg(reg), field_byte_off) + g.store_reg_to_val(0, val_id) + } + } else if idx == 0 { + if reg != 0 { + asm_mov_reg_reg(mut g, rax, Reg(reg)) + } + // Mask to field width for sub-8-byte fields + if field_elem_size in [1, 2, 4] { + mask := (u32(1) << u32(field_elem_size * 8)) - 1 + asm_and_rax_imm32(mut g, mask) + } + g.store_reg_to_val(0, val_id) + } else { + // Higher indices packed in same register - shift then mask + g.load_val_to_reg(0, tuple_id) + if field_byte_off > 0 && field_byte_off < 8 { + asm_shr_rax_imm8(mut g, u8(field_byte_off * 8)) + } + if field_elem_size in [1, 2, 4] { + mask := (u32(1) << u32(field_elem_size * 8)) - 1 + asm_and_rax_imm32(mut g, mask) + } + g.store_reg_to_val(0, val_id) + } + } else { + // Tuple not in stack_map or reg_map - fallback + g.load_val_to_reg(0, tuple_id) + g.store_reg_to_val(0, val_id) + } + } + .struct_init { + // Create struct from field values: operands are field values in order + result_offset := g.stack_map[val_id] + struct_typ := g.mod.type_store.types[instr.typ] + struct_size := g.type_size(instr.typ) + num_chunks := if struct_size > 0 { (struct_size + 7) / 8 } else { 1 } + + // Zero-initialize the entire struct first + asm_xor_reg_reg(mut g, rax) + for i in 0 .. num_chunks { + asm_store_rbp_disp_reg(mut g, result_offset + i * 8, rax) + } + + // Store each field value at its proper offset + for fi, field_id in instr.operands { + mut field_off := fi * 8 + if struct_typ.kind == .struct_t && fi >= 0 && fi < struct_typ.fields.len { + field_off = g.struct_field_offset_bytes(instr.typ, fi) + } + + mut field_typ_id := ssa.TypeID(0) + if struct_typ.kind == .struct_t && fi >= 0 && fi < struct_typ.fields.len { + field_typ_id = struct_typ.fields[fi] + } else if field_id > 0 && field_id < g.mod.values.len { + field_typ_id = g.mod.values[field_id].typ + } + mut field_size := if field_typ_id > 0 { g.type_size(field_typ_id) } else { 8 } + if field_size <= 0 { + field_size = 8 + } + + // Skip zero constants (already zeroed above) + field_val := g.mod.values[field_id] + if field_val.kind == .constant && field_val.name == '0' { + continue + } + + if field_size <= 8 { + g.load_val_to_reg(0, field_id) + asm_store_rbp_disp_reg(mut g, result_offset + field_off, rax) + } else { + // Multi-word field (nested struct) + field_chunks := (field_size + 7) / 8 + if field_offset := g.stack_map[field_id] { + mut src_ptr_reg := r10 + if field_size > 16 + && g.large_aggregate_stack_value_is_pointer(field_id) { + asm_load_reg_rbp_disp(mut g, r10, field_offset) + } else { + asm_lea_reg_rbp_disp(mut g, r10, field_offset) + } + for w in 0 .. field_chunks { + asm_load_reg_base_disp(mut g, rax, src_ptr_reg, w * 8) + asm_store_rbp_disp_reg(mut g, result_offset + field_off + w * 8, + rax) + } + } else { + // Fallback: store first word + g.load_val_to_reg(0, field_id) + asm_store_rbp_disp_reg(mut g, result_offset + field_off, rax) + } + } + } + } + .insertvalue { + // Insert element into tuple/struct + // operands: [tuple_val, elem_val, index] + tuple_id := instr.operands[0] + elem_id := instr.operands[1] + idx_val := g.mod.values[instr.operands[2]] + idx := idx_val.name.int() + + // Get result's stack location + result_offset := g.stack_map[val_id] + tuple_typ := g.mod.type_store.types[instr.typ] + tuple_size := g.type_size(instr.typ) + num_chunks := if tuple_size > 0 { (tuple_size + 7) / 8 } else { 1 } + mut elem_off := idx * 8 + if tuple_typ.kind == .struct_t && idx >= 0 && idx < tuple_typ.fields.len { + elem_off = g.struct_field_offset_bytes(instr.typ, idx) + } + + // Copy existing tuple data if not undef. + tuple_val := g.mod.values[tuple_id] + if !(tuple_val.kind == .constant && tuple_val.name == 'undef') { + mut copied_tuple := false + if tuple_offset := g.stack_map[tuple_id] { + if tuple_size > 16 + && g.large_aggregate_stack_value_is_pointer(tuple_id) { + asm_load_reg_rbp_disp(mut g, r10, tuple_offset) + for i in 0 .. num_chunks { + asm_load_reg_base_disp(mut g, rax, r10, i * 8) + asm_store_rbp_disp_reg(mut g, result_offset + i * 8, rax) + } + } else { + for i in 0 .. num_chunks { + asm_load_reg_rbp_disp(mut g, rax, tuple_offset + i * 8) + asm_store_rbp_disp_reg(mut g, result_offset + i * 8, rax) + } + } + copied_tuple = true + } else if src_reg := g.reg_map[tuple_id] { + for i in 0 .. num_chunks { + asm_load_reg_base_disp(mut g, rax, Reg(src_reg), i * 8) + asm_store_rbp_disp_reg(mut g, result_offset + i * 8, rax) + } + copied_tuple = true + } + if !copied_tuple { + // Deterministic fallback: zero out + asm_xor_reg_reg(mut g, rax) + for i in 0 .. num_chunks { + asm_store_rbp_disp_reg(mut g, result_offset + i * 8, rax) + } + } + } else { + // Start from zeroed storage for `insertvalue(undef, ...)` + asm_xor_reg_reg(mut g, rax) + for i in 0 .. num_chunks { + asm_store_rbp_disp_reg(mut g, result_offset + i * 8, rax) + } + } + + // Store the new element at the specified index + mut elem_typ_id := ssa.TypeID(0) + if tuple_typ.kind == .struct_t && idx >= 0 && idx < tuple_typ.fields.len { + elem_typ_id = tuple_typ.fields[idx] + } else if elem_id > 0 && elem_id < g.mod.values.len { + elem_typ_id = g.mod.values[elem_id].typ + } + mut elem_size := if elem_typ_id > 0 { g.type_size(elem_typ_id) } else { 8 } + if elem_size <= 0 { + elem_size = 8 + } + if elem_size <= 8 { + g.load_val_to_reg(0, elem_id) + asm_store_rbp_disp_reg(mut g, result_offset + elem_off, rax) + } else { + elem_chunks := (elem_size + 7) / 8 + mut copied_elem := false + if elem_offset := g.stack_map[elem_id] { + if elem_size > 16 + && g.large_aggregate_stack_value_is_pointer(elem_id) { + asm_load_reg_rbp_disp(mut g, r10, elem_offset) + } else { + asm_lea_reg_rbp_disp(mut g, r10, elem_offset) + } + for i in 0 .. elem_chunks { + asm_load_reg_base_disp(mut g, rax, r10, i * 8) + asm_store_rbp_disp_reg(mut g, result_offset + elem_off + i * 8, + rax) + } + copied_elem = true + } else if src_reg := g.reg_map[elem_id] { + for i in 0 .. elem_chunks { + asm_load_reg_base_disp(mut g, rax, Reg(src_reg), i * 8) + asm_store_rbp_disp_reg(mut g, result_offset + elem_off + i * 8, + rax) + } + copied_elem = true + } + if !copied_elem { + // Best effort fallback: store first word, clear the rest. + g.load_val_to_reg(0, elem_id) + asm_store_rbp_disp_reg(mut g, result_offset + elem_off, rax) + asm_xor_reg_reg(mut g, rax) + for i in 1 .. elem_chunks { + asm_store_rbp_disp_reg(mut g, result_offset + elem_off + i * 8, + rax) + } + } + } + } .phi { // Phi nodes are eliminated by optimization (converted to assignments) // but the instructions remain in the block. We ignore them here. @@ -962,3 +1371,96 @@ fn (mut g Gen) allocate_registers(func mir.Function) { } g.used_regs.sort() } + +fn (g Gen) type_align(typ_id ssa.TypeID) int { + if typ_id > 0 && typ_id < g.mod.type_store.types.len { + typ := g.mod.type_store.types[typ_id] + if typ.kind == .array_t { + return g.type_align(typ.elem_type) + } + } + size := g.type_size(typ_id) + if size >= 8 { + return 8 + } + if size >= 4 { + return 4 + } + if size >= 2 { + return 2 + } + return 1 +} + +fn (g Gen) struct_field_offset_bytes(struct_typ_id ssa.TypeID, field_idx int) int { + if struct_typ_id <= 0 || struct_typ_id >= g.mod.type_store.types.len { + return field_idx * 8 + } + typ := g.mod.type_store.types[struct_typ_id] + if typ.kind != .struct_t || field_idx < 0 || field_idx >= typ.fields.len { + return field_idx * 8 + } + mut offset := 0 + for i, field_typ in typ.fields { + align := g.type_align(field_typ) + if align > 1 && offset % align != 0 { + offset = (offset + align - 1) & ~(align - 1) + } + if i == field_idx { + return offset + } + field_size := g.type_size(field_typ) + offset += if field_size > 0 { field_size } else { 8 } + } + return field_idx * 8 +} + +fn (g &Gen) large_struct_stack_value_is_pointer(val_id int) bool { + if val_id <= 0 || val_id >= g.mod.values.len { + return false + } + val := g.mod.values[val_id] + if val.typ <= 0 || val.typ >= g.mod.type_store.types.len { + return false + } + val_typ := g.mod.type_store.types[val.typ] + if val_typ.kind != .struct_t || g.type_size(val.typ) <= 16 { + return false + } + if val.kind == .string_literal { + return false + } + if val.kind == .instruction { + instr := g.mod.instrs[val.index] + op := g.selected_opcode(instr) + return op !in [.call, .call_sret, .inline_string_init, .insertvalue, .struct_init, + .extractvalue, .assign, .phi, .bitcast, .load] + } + return false +} + +fn (g &Gen) large_aggregate_stack_value_is_pointer(val_id int) bool { + if val_id <= 0 || val_id >= g.mod.values.len { + return false + } + val := g.mod.values[val_id] + if val.typ <= 0 || val.typ >= g.mod.type_store.types.len { + return false + } + val_typ := g.mod.type_store.types[val.typ] + if g.type_size(val.typ) <= 16 { + return false + } + if val_typ.kind == .struct_t { + return g.large_struct_stack_value_is_pointer(val_id) + } + if val_typ.kind == .array_t { + if val.kind == .instruction { + instr := g.mod.instrs[val.index] + op := g.selected_opcode(instr) + return op !in [.call, .call_sret, .insertvalue, .extractvalue, .assign, .phi, .bitcast, + .load] + } + } + return false +} diff --git a/vlib/v2/ssa/builder.v b/vlib/v2/ssa/builder.v index 4881e341c597b7..09a0bc78598b9e 100644 --- a/vlib/v2/ssa/builder.v +++ b/vlib/v2/ssa/builder.v @@ -608,8 +608,8 @@ fn (mut b Builder) build_fn(decl ast.FnDecl) { return } - // Skip functions without a body (e.g., extern declarations) or non-main-module functions - if decl.stmts.len == 0 || b.cur_module != 'main' { + // Skip functions without a body (e.g., extern declarations) + if decl.stmts.len == 0 { // Emit a minimal function body (entry + ret) so backends have a valid function b.cur_func = func_idx entry := b.mod.add_block(func_idx, 'entry')