diff --git a/examples/tee-grate/Cargo.toml b/examples/tee-grate/Cargo.toml new file mode 100644 index 0000000..0ea2e0f --- /dev/null +++ b/examples/tee-grate/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "tee-grate" +version = "0.1.0" +edition = "2024" + +[dependencies] +grate-rs = { git = "https://github.com/Lind-Project/lind-wasm-example-grates", branch= "main", subdir = "lib/grate-rs" } +fdtables = { git = "https://github.com/Lind-Project/lind-wasm", branch = "main", subdir = "src/fdtables" } +libc = "0.2" diff --git a/examples/tee-grate/src/main.rs b/examples/tee-grate/src/main.rs new file mode 100644 index 0000000..b0ca14b --- /dev/null +++ b/examples/tee-grate/src/main.rs @@ -0,0 +1,644 @@ +//! Tee grate — duplicates syscalls across two independent handler chains. +//! +//! Usage: +//! tee-grate [--buffer-limit ] \ +//! %{ [secondary-args...] %} +//! +//! Everything inside `%{ %}` is the secondary grate chain. Everything after +//! `%}` is the primary chain (the normal grate composition continues). +//! +//! Example: +//! tee-grate %{ imfs-grate %} strace-grate cage_binary args... +//! +//! The tee grate interposes on register_handler (1001), exec (59), fork (57), +//! and exit (60). When clamped grates register handlers, tee captures both the +//! primary and secondary registrations, allocates alt syscall numbers, and +//! installs its own dispatch handler that calls both at runtime. + +mod tee; + +use core::ffi::{c_char, c_int, c_void}; +use std::ffi::CString; +use std::ptr; + +use grate_rs::constants::*; +use grate_rs::{SyscallHandler, copy_data_between_cages, getcageid, register_handler}; + +use tee::*; + +// ===================================================================== +// POSIX FFI — use grate-rs public ffi module +// ===================================================================== + +use grate_rs::ffi::{fork, execv, waitpid, mmap, munmap, sem_t, sem_init, sem_destroy, sem_post, sem_wait}; +use grate_rs::constants::mman::*; + +// ===================================================================== +// CLI parsing +// ===================================================================== + +struct TeeConfig { + /// The full exec chain passed to the first child. + /// Contains: [secondary-grates..., "%}", primary-chain...] + exec_chain: Vec, + /// Maximum bytes for secondary buffer. + buffer_limit: usize, +} + +/// Parse argv using the clamping syntax (matches namespace-grate). +/// +/// Expected: tee-grate [--buffer-limit ] %{ secondary... %} primary-chain... +/// +/// After parsing: +/// exec_chain = ["secondary-grate", ..., "%}", "primary-grate", ..., "cage", "args"] +fn parse_argv(args: Vec) -> Result { + let mut buffer_limit = DEFAULT_SECONDARY_BUFFER_LIMIT; + let mut i = 0; + + // Parse tee-grate options before %{. + while i < args.len() { + match args[i].as_str() { + "--buffer-limit" => { + i += 1; + if i >= args.len() { return Err("--buffer-limit requires an argument".into()); } + buffer_limit = args[i].parse().map_err(|_| "--buffer-limit must be a number")?; + i += 1; + } + "%{" => { + i += 1; // consume %{ + break; + } + other => { + if let Some(val) = other.strip_prefix("--buffer-limit=") { + buffer_limit = val.parse().map_err(|_| "--buffer-limit must be a number")?; + i += 1; + } else { + return Err(format!("unexpected argument before %{{: {}", other)); + } + } + } + } + + if i >= args.len() { + return Err("missing %{ ... %} block".into()); + } + + // Everything from here is the exec chain (secondary args, %}, primary chain). + // Passed as-is to the first clamped grate — the %} boundary is detected + // at exec time by exec_handler. + let exec_chain: Vec = args[i..].to_vec(); + + if !exec_chain.contains(&"%}".to_string()) { + return Err("missing %} in command line".into()); + } + + Ok(TeeConfig { + exec_chain, + buffer_limit, + }) +} + +// ===================================================================== +// Lifecycle handlers +// ===================================================================== + +/// Handler for syscall 1001 (register_handler). +/// +/// When a clamped grate calls register_handler(cage, syscall, grate_id, handler_ptr), +/// tee intercepts it to: +/// 1. Determine if the registering grate is primary or secondary (by grate_id) +/// 2. Allocate an alt syscall number for the handler +/// 3. Register the handler at the alt number on the tee grate's cage +/// 4. Store the route: (cage, syscall) → primary_alt / secondary_alt +/// 5. Register the tee dispatch handler on the target cage (if not already) +/// +/// Register handler args as received by the 3i dispatch: +/// arg1 = target_cage, arg1cage = syscall_nr +/// arg2 (unused), arg2cage = grate_id +/// arg3 = handler_fn_ptr +pub extern "C" fn register_handler_handler( + _cageid: u64, + target_cage: u64, + syscall_nr: u64, + _arg2: u64, + grate_id: u64, + handler_fn_ptr: u64, + _arg3cage: u64, + _arg4: u64, + _arg4cage: u64, + _arg5: u64, + _arg5cage: u64, + _arg6: u64, + _arg6cage: u64, +) -> i32 { + let tee_cage = with_tee(|s| s.tee_cage_id); + + // After interception phase ends, pass through registrations unchanged. + if !with_tee(|s| s.intercepting) { + return do_syscall( + grate_id, SYS_REGISTER_HANDLER, + &[target_cage, 0, handler_fn_ptr, 0, 0, 0], + &[syscall_nr, grate_id, 0, 0, 0, 0], + ); + } + + println!( + "[tee-grate] intercept register_handler: cage={}, syscall={}, grate={}", + target_cage, syscall_nr, grate_id + ); + + // Step 1: Allocate alt and record which stack (primary/secondary) this belongs to. + let alt_nr = with_tee(|s| s.record_registration(target_cage, syscall_nr, grate_id)); + + // Step 2: Register the clamped handler at the alt number on tee's cage. + let ret = do_syscall( + grate_id, SYS_REGISTER_HANDLER, + &[tee_cage, 0, handler_fn_ptr, 0, 0, 0], + &[alt_nr, grate_id, 0, 0, 0, 0], + ); + if ret != 0 { + eprintln!("[tee-grate] failed to register alt handler: ret={}", ret); + return ret; + } + + // Step 3: If we haven't yet registered the tee dispatch handler on the target + // cage for this syscall, do so now. + let already = with_tee(|s| s.is_handler_registered(target_cage, syscall_nr)); + if !already { + if let Some(tee_handler) = get_tee_handler(syscall_nr) { + match register_handler(target_cage, syscall_nr, tee_cage, tee_handler) { + Ok(_) => { + with_tee(|s| s.mark_handler_registered(target_cage, syscall_nr)); + } + Err(e) => { + eprintln!("[tee-grate] failed to register tee handler: {:?}", e); + return -1; + } + } + } else { + // No tee handler for this syscall — pass through the registration directly. + eprintln!( + "[tee-grate] no tee handler for syscall {} — passing through", + syscall_nr + ); + return do_syscall( + grate_id, SYS_REGISTER_HANDLER, + &[target_cage, 0, handler_fn_ptr, 0, 0, 0], + &[syscall_nr, grate_id, 0, 0, 0, 0], + ); + } + } + + 0 +} + +/// Handler for syscall 59 (exec). +/// +/// Detects %} boundary and stops register_handler interception. +pub extern "C" fn exec_handler( + _cageid: u64, + arg1: u64, arg1cage: u64, + arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, + arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, + arg6: u64, arg6cage: u64, +) -> i32 { + let tee_cage = with_tee(|s| s.tee_cage_id); + + // Read the exec path from cage memory to check for %}. + let mut buf = vec![0u8; 256]; + if copy_data_between_cages( + tee_cage, arg1cage, + arg1, arg1cage, + buf.as_mut_ptr() as u64, tee_cage, + 256, 0, + ).is_err() { + panic!("[tee-grate] Unable to read the execve path"); + } + + let len = buf.iter().position(|&b| b == 0).unwrap_or(256); + let path = String::from_utf8_lossy(&buf[..len]); + + if path == "%}" { + println!("[tee-grate] detected %}} boundary — stopping register_handler interception"); + with_tee(|s| s.intercepting = false); + + // argv[] pointers are 8-byte wide in the Lind runtime. + const PTR_SIZE: usize = 8; + let argv1_addr = arg2 + PTR_SIZE as u64; + + let mut real_ptr = [0u8; PTR_SIZE]; + match copy_data_between_cages( + tee_cage, arg2cage, + argv1_addr, arg2cage, + real_ptr.as_mut_ptr() as u64, tee_cage, + 8, 0, + ) { + Ok(_) => {} + Err(_) => { + println!("Invalid command line arguments detected."); + return -2; + } + } + let real_path = u64::from_le_bytes(real_ptr); + + return do_syscall( + arg2cage, SYS_EXEC, + &[real_path, argv1_addr, arg3, arg4, arg5, arg6], + &[arg2cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ); + } else { + return do_syscall( + arg1cage, SYS_EXEC, + &[arg1, arg2, arg3, arg4, arg5, arg6], + &[arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ); + } +} + +/// Handler for syscall 57 (fork). +/// +/// Forwards fork, clones tee state to child, registers lifecycle handlers. +pub extern "C" fn fork_handler( + _cageid: u64, + arg1: u64, arg1cage: u64, + arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, + arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, + arg6: u64, arg6cage: u64, +) -> i32 { + let child_cage_id = do_syscall( + arg1cage, SYS_CLONE, + &[arg1, arg2, arg3, arg4, arg5, arg6], + &[arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ) as u64; + + // Clone tee route state and fd table from parent to child. + with_tee(|s| { + if s.is_managed(arg1cage) { + s.clone_cage_state(arg1cage, child_cage_id); + } + }); + let _ = fdtables::copy_fdtable_for_cage(arg1cage, child_cage_id); + + // Register lifecycle handlers on the child. + register_lifecycle_handlers(child_cage_id); + + child_cage_id as i32 +} + +/// Handler for syscall 60 (exit). +/// +/// Cleans up tee state for the exiting cage, then forwards. +pub extern "C" fn exit_handler( + _cageid: u64, + arg1: u64, arg1cage: u64, + arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, + arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, + arg6: u64, arg6cage: u64, +) -> i32 { + with_tee(|s| { s.remove_cage_state(arg1cage); }); + fdtables::remove_cage_from_fdtable(arg1cage); + + do_syscall( + arg1cage, SYS_EXIT, + &[arg1, arg2, arg3, arg4, arg5, arg6], + &[arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ) +} + +/// Register the four lifecycle handlers on a cage. +fn register_lifecycle_handlers(cage_id: u64) { + let tee_cage = with_tee(|s| s.tee_cage_id); + + let handlers: &[(u64, SyscallHandler)] = &[ + (SYS_REGISTER_HANDLER, register_handler_handler), + (SYS_EXEC, exec_handler), + (SYS_CLONE, fork_handler), + (SYS_EXIT, exit_handler), + ]; + + for &(syscall_nr, handler) in handlers { + if let Err(e) = register_handler(cage_id, syscall_nr, tee_cage, handler) { + eprintln!( + "[tee-grate] failed to register lifecycle handler {} on cage {}: {:?}", + syscall_nr, cage_id, e + ); + } + } +} + +// ===================================================================== +// Tee dispatch handlers +// +// Each handler calls tee_dispatch() which forwards to both primary and +// secondary, returning the primary's result. +// ===================================================================== + +/// Generate a tee dispatch handler for a given syscall number. +/// The handler extracts args into arrays and calls tee_dispatch(). +macro_rules! tee_handler { + ($name:ident, $nr:expr) => { + pub extern "C" fn $name( + _cageid: u64, + arg1: u64, arg1cage: u64, + arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, + arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, + arg6: u64, arg6cage: u64, + ) -> i32 { + tee_dispatch( + $nr, arg1cage, + [arg1, arg2, arg3, arg4, arg5, arg6], + [arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ) + } + }; +} + +// Path-based syscalls — pure tee dispatch, no fd tracking needed. +tee_handler!(tee_stat, SYS_XSTAT); +tee_handler!(tee_access, SYS_ACCESS); +tee_handler!(tee_unlink, SYS_UNLINK); +tee_handler!(tee_mkdir, SYS_MKDIR); +tee_handler!(tee_rmdir, SYS_RMDIR); +tee_handler!(tee_rename, SYS_RENAME); +tee_handler!(tee_truncate, SYS_TRUNCATE); +tee_handler!(tee_chmod, SYS_CHMOD); +tee_handler!(tee_chdir, SYS_CHDIR); +tee_handler!(tee_readlink, SYS_READLINK); +tee_handler!(tee_unlinkat, SYS_UNLINKAT); +tee_handler!(tee_readlinkat, SYS_READLINKAT); + +// FD-based syscalls — pure tee dispatch, no fd tracking side effects. +tee_handler!(tee_read, SYS_READ); +tee_handler!(tee_write, SYS_WRITE); +tee_handler!(tee_pread, SYS_PREAD); +tee_handler!(tee_pwrite, SYS_PWRITE); +tee_handler!(tee_lseek, SYS_LSEEK); +tee_handler!(tee_fstat, SYS_FXSTAT); +tee_handler!(tee_fcntl, SYS_FCNTL); +tee_handler!(tee_ftruncate, SYS_FTRUNCATE); +tee_handler!(tee_fchmod, SYS_FCHMOD); +tee_handler!(tee_readv, SYS_READV); +tee_handler!(tee_writev, SYS_WRITEV); + +// ── FD-tracking handlers ───────────────────────────────────────────── +// +// open, close, dup, dup2, dup3 need to update fdtables after dispatch +// so that fork/exec/exit can propagate fd state correctly. + +/// open: tee dispatch, then record the returned fd in fdtables. +pub extern "C" fn tee_open( + _cageid: u64, arg1: u64, arg1cage: u64, arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, arg6: u64, arg6cage: u64, +) -> i32 { + let ret = tee_dispatch( + SYS_OPEN, arg1cage, + [arg1, arg2, arg3, arg4, arg5, arg6], + [arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ); + + if ret >= 0 { + let _ = fdtables::get_specific_virtual_fd( + arg1cage, ret as u64, 0, ret as u64, false, 0, + ); + } + + ret +} + +/// close: tee dispatch, then remove the fd from fdtables. +pub extern "C" fn tee_close( + _cageid: u64, arg1: u64, arg1cage: u64, arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, arg6: u64, arg6cage: u64, +) -> i32 { + let ret = tee_dispatch( + SYS_CLOSE, arg1cage, + [arg1, arg2, arg3, arg4, arg5, arg6], + [arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ); + + let _ = fdtables::close_virtualfd(arg1cage, arg1); + + ret +} + +/// dup: tee dispatch, then copy the fd entry in fdtables. +pub extern "C" fn tee_dup( + _cageid: u64, arg1: u64, arg1cage: u64, arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, arg6: u64, arg6cage: u64, +) -> i32 { + let ret = tee_dispatch( + SYS_DUP, arg1cage, + [arg1, arg2, arg3, arg4, arg5, arg6], + [arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ); + + if ret >= 0 { + let _ = fdtables::get_specific_virtual_fd( + arg1cage, ret as u64, 0, ret as u64, false, 0, + ); + } + + ret +} + +/// dup2: tee dispatch, then record the target fd in fdtables. +pub extern "C" fn tee_dup2( + _cageid: u64, arg1: u64, arg1cage: u64, arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, arg6: u64, arg6cage: u64, +) -> i32 { + let ret = tee_dispatch( + SYS_DUP2, arg1cage, + [arg1, arg2, arg3, arg4, arg5, arg6], + [arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ); + + if ret >= 0 { + let _ = fdtables::get_specific_virtual_fd( + arg1cage, arg2, 0, arg2, false, 0, + ); + } + + ret +} + +/// dup3: tee dispatch, then record the target fd in fdtables. +pub extern "C" fn tee_dup3( + _cageid: u64, arg1: u64, arg1cage: u64, arg2: u64, arg2cage: u64, + arg3: u64, arg3cage: u64, arg4: u64, arg4cage: u64, + arg5: u64, arg5cage: u64, arg6: u64, arg6cage: u64, +) -> i32 { + let ret = tee_dispatch( + SYS_DUP3, arg1cage, + [arg1, arg2, arg3, arg4, arg5, arg6], + [arg1cage, arg2cage, arg3cage, arg4cage, arg5cage, arg6cage], + ); + + if ret >= 0 { + let _ = fdtables::get_specific_virtual_fd( + arg1cage, arg2, 0, arg2, false, 0, + ); + } + + ret +} + +/// Map syscall number → tee dispatch handler function pointer. +fn get_tee_handler(syscall_nr: u64) -> Option { + match syscall_nr { + SYS_OPEN => Some(tee_open), + SYS_XSTAT => Some(tee_stat), + SYS_ACCESS => Some(tee_access), + SYS_UNLINK => Some(tee_unlink), + SYS_MKDIR => Some(tee_mkdir), + SYS_RMDIR => Some(tee_rmdir), + SYS_RENAME => Some(tee_rename), + SYS_TRUNCATE => Some(tee_truncate), + SYS_CHMOD => Some(tee_chmod), + SYS_CHDIR => Some(tee_chdir), + SYS_READLINK => Some(tee_readlink), + SYS_UNLINKAT => Some(tee_unlinkat), + SYS_READLINKAT => Some(tee_readlinkat), + SYS_READ => Some(tee_read), + SYS_WRITE => Some(tee_write), + SYS_CLOSE => Some(tee_close), + SYS_PREAD => Some(tee_pread), + SYS_PWRITE => Some(tee_pwrite), + SYS_LSEEK => Some(tee_lseek), + SYS_FXSTAT => Some(tee_fstat), + SYS_FCNTL => Some(tee_fcntl), + SYS_FTRUNCATE => Some(tee_ftruncate), + SYS_FCHMOD => Some(tee_fchmod), + SYS_READV => Some(tee_readv), + SYS_WRITEV => Some(tee_writev), + SYS_DUP => Some(tee_dup), + SYS_DUP2 => Some(tee_dup2), + SYS_DUP3 => Some(tee_dup3), + _ => None, + } +} + +// ===================================================================== +// Main +// ===================================================================== + +fn main() { + let args: Vec = std::env::args().skip(1).collect(); + + if args.is_empty() { + eprintln!("Usage: tee-grate [--buffer-limit ] %{{ %}} "); + std::process::exit(1); + } + + let config = match parse_argv(args) { + Ok(c) => c, + Err(e) => { + eprintln!("[tee-grate] argument error: {}", e); + std::process::exit(1); + } + }; + + println!( + "[tee-grate] exec_chain={:?}, buffer_limit={}", + config.exec_chain, config.buffer_limit + ); + + // Initialize global tee state. + let tee_cage_id = getcageid(); + *TEE_STATE.lock().unwrap() = Some(TeeState::new(tee_cage_id, config.buffer_limit)); + + // Prepare exec chain as C strings. + let cstrings: Vec = config.exec_chain + .iter() + .map(|s| CString::new(s.as_str()).unwrap()) + .collect(); + let mut c_argv: Vec<*const c_char> = cstrings.iter().map(|s| s.as_ptr()).collect(); + c_argv.push(ptr::null()); + let path = c_argv[0]; + + // Allocate shared semaphore. + let sem: *mut sem_t = unsafe { + let ptr = mmap( + ptr::null_mut(), + std::mem::size_of::(), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, + -1, 0, + ); + if ptr == MAP_FAILED { + eprintln!("[tee-grate] mmap failed"); + std::process::exit(-1); + } + ptr as *mut sem_t + }; + + if unsafe { sem_init(sem, 1, 0) } < 0 { + eprintln!("[tee-grate] sem_init failed"); + std::process::exit(-1); + } + + // Fork the child cage. + let child_pid = unsafe { fork() }; + if child_pid < 0 { + eprintln!("[tee-grate] fork failed"); + std::process::exit(-1); + } + + if child_pid == 0 { + // ── Child: wait for parent to register lifecycle handlers, then exec. + unsafe { sem_wait(sem) }; + let ret = unsafe { execv(path, c_argv.as_ptr()) }; + if ret < 0 { + eprintln!("[tee-grate] execv failed"); + } + std::process::exit(-1); + } + + // ── Parent: tee grate process. + let child_cage_id = child_pid as u64; + + println!( + "[tee-grate] forked child cage {} (tee_cage={})", + child_cage_id, tee_cage_id + ); + + // Mark child as managed and init its fdtables entry. + with_tee(|s| { s.managed_cages.insert(child_cage_id, ()); }); + if !fdtables::check_cage_exists(child_cage_id) { + fdtables::init_empty_cage(child_cage_id); + } + + // Register lifecycle handlers on the child. + register_lifecycle_handlers(child_cage_id); + + // Signal child to proceed. + unsafe { sem_post(sem) }; + + // Wait for all children. + loop { + let mut status: i32 = 0; + let ret = unsafe { waitpid(-1, &mut status as *mut i32 as *mut c_int, 0) }; + if ret <= 0 { break; } + println!("[tee-grate] child {} exited with status {}", ret, status); + } + + // Cleanup. + unsafe { + sem_destroy(sem); + munmap(sem as *mut c_void, std::mem::size_of::()); + } + + println!("[tee-grate] exiting"); + std::process::exit(0); +} diff --git a/examples/tee-grate/src/tee.rs b/examples/tee-grate/src/tee.rs new file mode 100644 index 0000000..4ad4849 --- /dev/null +++ b/examples/tee-grate/src/tee.rs @@ -0,0 +1,431 @@ +//! Tee grate core — interposition logic and dispatch. +//! +//! The tee grate duplicates every intercepted syscall across two independent +//! handler chains (primary and secondary). The primary's return value is +//! authoritative. The secondary is best-effort: errors are logged, never +//! propagated to the caller. +//! +//! # How it works +//! +//! The tee grate interposes on `register_handler` (syscall 1001). When a grate +//! in either the primary or secondary stack calls `register_handler`, tee +//! intercepts it, allocates alt syscall numbers, and registers its own dispatch +//! handler on the target cage. At dispatch time, the tee handler calls both the +//! primary and secondary handlers via `make_threei_call` and returns the +//! primary's result. +//! +//! # Dispatch model +//! +//! Synchronous: call primary, then secondary, return primary's result. +//! The WASM environment is single-threaded, so background threads aren't +//! practical. The bounded buffer (default 64KB) caps how much pointer-argument +//! data we pre-copy for the secondary path. +//! +//! # Primary-only syscalls +//! +//! Syscalls with process-level side effects are NOT duplicated: +//! fork, clone, execve, exit. These are forwarded only to the primary handler +//! (or passed through to kernel if no primary handler exists). + +use std::collections::HashMap; +use std::sync::Mutex; + +use grate_rs::constants::*; +use grate_rs::make_threei_call; + +// ===================================================================== +// Constants +// ===================================================================== + +/// Default maximum bytes to copy for secondary pointer arguments. +pub const DEFAULT_SECONDARY_BUFFER_LIMIT: usize = 64 * 1024; + +/// Syscalls that must NOT be duplicated — forwarded to primary only. +/// These have process-level side effects (new cage, cage death, address +/// space replacement) that would break if executed twice. +pub const PRIMARY_ONLY_SYSCALLS: &[u64] = &[ + SYS_FORK, // 57 + SYS_CLONE, // 56 + SYS_EXEC, // 59 (execve) + SYS_EXIT, // 60 +]; + +/// Base for alt syscall numbers — well above Lind's 1001-1003 range. +const ALT_SYSCALL_BASE: u64 = 2000; + +// ===================================================================== +// Global state +// ===================================================================== + +/// Global tee state, accessible from extern "C" handler functions. +pub static TEE_STATE: Mutex> = Mutex::new(None); + +/// Access the global tee state. Panics if not initialized. +pub fn with_tee(f: F) -> R +where + F: FnOnce(&mut TeeState) -> R, +{ + let mut guard = TEE_STATE.lock().unwrap(); + f(guard.as_mut().expect("TeeState not initialized")) +} + +// ===================================================================== +// Route table +// +// For each (cage_id, syscall_nr), we store the alt syscall numbers for +// both the primary and secondary handlers. When the tee dispatch handler +// fires, it calls both. +// ===================================================================== + +/// A route entry for a single (cage, syscall) pair. +#[derive(Clone, Debug)] +pub struct TeeRoute { + /// Alt syscall number for the primary handler. + pub primary_alt: Option, + /// Alt syscall number for the secondary handler. + pub secondary_alt: Option, + /// Whether we've already registered the tee dispatch handler + /// on the target cage for this syscall. + pub tee_handler_registered: bool, +} + +/// The complete tee grate state. +pub struct TeeState { + /// The tee grate's own cage ID. + pub tee_cage_id: u64, + + /// Route table: (cage_id, syscall_nr) → TeeRoute. + pub routes: HashMap<(u64, u64), TeeRoute>, + + /// Cage ID of the primary grate process. + /// Registrations from this grate_id go into primary_alt. + pub primary_grate_id: Option, + + /// Cage ID of the secondary grate process. + /// Registrations from this grate_id go into secondary_alt. + pub secondary_grate_id: Option, + + /// Whether we are still intercepting register_handler calls. + /// Set to false when the %} exec boundary is detected, meaning + /// both grate stacks have finished registering their handlers. + pub intercepting: bool, + + /// Next available alt syscall number. + pub next_alt: u64, + + /// Maximum bytes to copy for secondary pointer arguments. + pub secondary_buffer_limit: usize, + + /// Set of cage IDs managed by the tee grate. + pub managed_cages: HashMap, +} + +impl TeeState { + pub fn new(tee_cage_id: u64, secondary_buffer_limit: usize) -> Self { + TeeState { + tee_cage_id, + routes: HashMap::new(), + primary_grate_id: None, + secondary_grate_id: None, + intercepting: true, + next_alt: ALT_SYSCALL_BASE, + secondary_buffer_limit, + managed_cages: HashMap::new(), + } + } + + /// Allocate the next alt syscall number. + pub fn alloc_alt(&mut self) -> u64 { + let nr = self.next_alt; + self.next_alt += 1; + nr + } + + /// Record a handler registration from one of the tee'd grates. + /// + /// Determines whether the registering grate is primary or secondary based + /// on grate_id, allocates an alt syscall number, and stores the route. + /// + /// Primary/secondary is auto-assigned by order of first appearance: + /// the first grate_id we see becomes primary, the second becomes secondary. + /// + /// Returns the alt syscall number that was allocated. + pub fn record_registration( + &mut self, + target_cage: u64, + syscall_nr: u64, + grate_id: u64, + ) -> u64 { + // Auto-assign primary/secondary based on order of first appearance. + let is_primary = if self.primary_grate_id == Some(grate_id) { + true + } else if self.secondary_grate_id == Some(grate_id) { + false + } else if self.primary_grate_id.is_none() { + self.primary_grate_id = Some(grate_id); + true + } else if self.secondary_grate_id.is_none() { + self.secondary_grate_id = Some(grate_id); + false + } else { + // More than two grates — treat extras as secondary. + eprintln!( + "[tee-grate] warning: unknown grate_id={}, treating as secondary", + grate_id + ); + false + }; + + let alt_nr = self.alloc_alt(); + + let route = self + .routes + .entry((target_cage, syscall_nr)) + .or_insert_with(|| TeeRoute { + primary_alt: None, + secondary_alt: None, + tee_handler_registered: false, + }); + + if is_primary { + route.primary_alt = Some(alt_nr); + } else { + route.secondary_alt = Some(alt_nr); + } + + // Track this cage. + self.managed_cages.insert(target_cage, ()); + + alt_nr + } + + /// Mark the tee dispatch handler as registered for a (cage, syscall). + pub fn mark_handler_registered(&mut self, target_cage: u64, syscall_nr: u64) { + if let Some(route) = self.routes.get_mut(&(target_cage, syscall_nr)) { + route.tee_handler_registered = true; + } + } + + /// Check if the tee handler is already registered for a (cage, syscall). + pub fn is_handler_registered(&self, target_cage: u64, syscall_nr: u64) -> bool { + self.routes + .get(&(target_cage, syscall_nr)) + .map(|r| r.tee_handler_registered) + .unwrap_or(false) + } + + /// Look up the route for a (cage, syscall). + pub fn get_route(&self, cage_id: u64, syscall_nr: u64) -> Option<&TeeRoute> { + self.routes.get(&(cage_id, syscall_nr)) + } + + /// Check if a cage is managed by the tee grate. + pub fn is_managed(&self, cage_id: u64) -> bool { + self.managed_cages.contains_key(&cage_id) + } + + /// Clone route table entries from parent to child cage (on fork). + pub fn clone_cage_state(&mut self, parent: u64, child: u64) { + let parent_routes: Vec<_> = self + .routes + .iter() + .filter(|&(&(cid, _), _)| cid == parent) + .map(|(&(_, syscall_nr), route)| ((child, syscall_nr), route.clone())) + .collect(); + for (key, val) in parent_routes { + self.routes.insert(key, val); + } + self.managed_cages.insert(child, ()); + } + + /// Remove all state for a cage (on exit). + pub fn remove_cage_state(&mut self, cage_id: u64) { + self.routes.retain(|&(cid, _), _| cid != cage_id); + self.managed_cages.remove(&cage_id); + } +} + +// ===================================================================== +// Dispatch logic +// ===================================================================== + +/// Execute a syscall via make_threei_call. +/// +/// source_cage (tee grate) is used for handler table lookup. +/// calling_cage is the cage that made the syscall — used as operational target. +pub fn do_syscall(calling_cage: u64, nr: u64, args: &[u64; 6], arg_cages: &[u64; 6]) -> i32 { + let tee_cage = { + let guard = TEE_STATE.lock().unwrap(); + guard.as_ref().expect("TeeState not initialized").tee_cage_id + }; + match make_threei_call( + nr as u32, 0, tee_cage, calling_cage, + args[0], arg_cages[0], + args[1], arg_cages[1], + args[2], arg_cages[2], + args[3], arg_cages[3], + args[4], arg_cages[4], + args[5], arg_cages[5], + 0, + ) { + Ok(ret) => ret, + Err(_) => -1, + } +} + +/// Core tee dispatch: call primary, then secondary, return primary's result. +/// +/// For primary-only syscalls (fork, exec, exit, clone), the secondary is +/// skipped entirely. Secondary errors are logged to stderr and never +/// propagated to the caller. +pub fn tee_dispatch( + syscall_nr: u64, + cage_id: u64, + args: [u64; 6], + arg_cages: [u64; 6], +) -> i32 { + let (primary_alt, secondary_alt) = { + let guard = TEE_STATE.lock().unwrap(); + let state = guard.as_ref().expect("TeeState not initialized"); + let route = match state.get_route(cage_id, syscall_nr) { + Some(r) => r, + None => { + // No route — passthrough to kernel. + return do_syscall(cage_id, syscall_nr, &args, &arg_cages); + } + }; + (route.primary_alt, route.secondary_alt) + }; + + // ── Primary dispatch ──────────────────────────────────────────── + // Use the alt syscall if registered, otherwise passthrough the + // original syscall number (goes to kernel). + let primary_nr = primary_alt.unwrap_or(syscall_nr); + let primary_result = do_syscall(cage_id, primary_nr, &args, &arg_cages); + + // ── Secondary dispatch (best-effort) ──────────────────────────── + // Skip for syscalls with process-level side effects — executing + // fork/exec/exit twice would create duplicate cages or kill the + // wrong process. + if PRIMARY_ONLY_SYSCALLS.contains(&syscall_nr) { + return primary_result; + } + + if let Some(sec_alt) = secondary_alt { + // Call secondary with the same args. Each handler does its own + // copy_data_between_cages internally, so the two paths don't + // share any local buffers. + let sec_result = do_syscall(cage_id, sec_alt, &args, &arg_cages); + + // Log secondary errors but never propagate them. + if sec_result < 0 { + eprintln!( + "[tee-grate] secondary error: syscall={} ret={}", + syscall_nr, sec_result + ); + } + } + + // Always return the primary's result. + primary_result +} + +// ===================================================================== +// Tests +// ===================================================================== + +#[cfg(test)] +mod tests { + use super::*; + + fn make_state() -> TeeState { + TeeState::new(100, DEFAULT_SECONDARY_BUFFER_LIMIT) + } + + #[test] + fn test_auto_assign_primary_secondary() { + let mut state = make_state(); + + // First grate to register becomes primary. + state.record_registration(10, SYS_OPEN, 200); + assert_eq!(state.primary_grate_id, Some(200)); + assert_eq!(state.secondary_grate_id, None); + + // Second grate becomes secondary. + state.record_registration(10, SYS_OPEN, 300); + assert_eq!(state.primary_grate_id, Some(200)); + assert_eq!(state.secondary_grate_id, Some(300)); + } + + #[test] + fn test_route_stores_both_alts() { + let mut state = make_state(); + + // Primary registers OPEN. + let primary_alt = state.record_registration(10, SYS_OPEN, 200); + + // Secondary registers OPEN. + let secondary_alt = state.record_registration(10, SYS_OPEN, 300); + + let route = state.get_route(10, SYS_OPEN).unwrap(); + assert_eq!(route.primary_alt, Some(primary_alt)); + assert_eq!(route.secondary_alt, Some(secondary_alt)); + } + + #[test] + fn test_primary_only_syscalls() { + // fork, clone, exec, exit should not be duplicated. + assert!(PRIMARY_ONLY_SYSCALLS.contains(&SYS_FORK)); + assert!(PRIMARY_ONLY_SYSCALLS.contains(&SYS_CLONE)); + assert!(PRIMARY_ONLY_SYSCALLS.contains(&SYS_EXEC)); + assert!(PRIMARY_ONLY_SYSCALLS.contains(&SYS_EXIT)); + + // Regular syscalls should not be in the list. + assert!(!PRIMARY_ONLY_SYSCALLS.contains(&SYS_OPEN)); + assert!(!PRIMARY_ONLY_SYSCALLS.contains(&SYS_READ)); + assert!(!PRIMARY_ONLY_SYSCALLS.contains(&SYS_WRITE)); + } + + #[test] + fn test_clone_cage_state() { + let mut state = make_state(); + + state.record_registration(10, SYS_OPEN, 200); + state.record_registration(10, SYS_OPEN, 300); + state.record_registration(10, SYS_WRITE, 200); + + // Clone parent cage 10 to child cage 20. + state.clone_cage_state(10, 20); + + // Child should have the same routes. + assert!(state.get_route(20, SYS_OPEN).is_some()); + assert!(state.get_route(20, SYS_WRITE).is_some()); + assert!(state.is_managed(20)); + } + + #[test] + fn test_remove_cage_state() { + let mut state = make_state(); + + state.record_registration(10, SYS_OPEN, 200); + assert!(state.is_managed(10)); + + state.remove_cage_state(10); + assert!(!state.is_managed(10)); + assert!(state.get_route(10, SYS_OPEN).is_none()); + } + + #[test] + fn test_alt_allocation_is_unique() { + let mut state = make_state(); + + let a1 = state.alloc_alt(); + let a2 = state.alloc_alt(); + let a3 = state.alloc_alt(); + + assert_ne!(a1, a2); + assert_ne!(a2, a3); + assert_eq!(a1, ALT_SYSCALL_BASE); + assert_eq!(a2, ALT_SYSCALL_BASE + 1); + } +} diff --git a/examples/tee-grate/test/tee_test.c b/examples/tee-grate/test/tee_test.c new file mode 100644 index 0000000..25dd160 --- /dev/null +++ b/examples/tee-grate/test/tee_test.c @@ -0,0 +1,309 @@ +/* tee_test.c — Test binary for the tee grate. + * + * Exercises the tee grate's syscall duplication, primary-wins semantics, + * secondary isolation, fork behavior, and fd lifecycle. + * + * Expected invocation: + * lind-wasm tee-grate.cwasm --primary imfs-grate.cwasm \ + * --secondary imfs-grate.cwasm -- tee_test.cwasm + * + * Each test prints PASS/FAIL. Exit code 0 if all pass. + */ +#include +#include +#include +#include +#include +#include +#include + +static int tests_run = 0; +static int tests_passed = 0; + +#define CHECK(desc, cond) do { \ + tests_run++; \ + if (cond) { \ + printf(" PASS: %s\n", desc); \ + tests_passed++; \ + } else { \ + printf(" FAIL: %s (errno=%d)\n", desc, errno); \ + } \ +} while (0) + +/* ── Test 1: Primary return value is authoritative ─────────────────────── */ + +static void test_primary_wins(void) { + printf("\n[test_primary_wins]\n"); + + int fd = open("/tee_primary", O_CREAT | O_RDWR, 0644); + CHECK("open returns valid fd (primary wins)", fd >= 0); + + if (fd >= 0) { + const char *msg = "primary data"; + ssize_t nw = write(fd, msg, strlen(msg)); + CHECK("write returns correct count", nw == (ssize_t)strlen(msg)); + + lseek(fd, 0, SEEK_SET); + char buf[64] = {0}; + ssize_t nr = read(fd, buf, sizeof(buf) - 1); + CHECK("read returns correct count", nr == (ssize_t)strlen(msg)); + CHECK("read data matches written", memcmp(buf, msg, strlen(msg)) == 0); + + close(fd); + } +} + +/* ── Test 2: Secondary errors don't affect the caller ──────────────────── */ + +static void test_secondary_isolation(void) { + printf("\n[test_secondary_isolation]\n"); + + int ok = 1; + for (int i = 0; i < 10; i++) { + char path[64]; + snprintf(path, sizeof(path), "/tee_iso_%d", i); + + int fd = open(path, O_CREAT | O_WRONLY, 0644); + if (fd < 0) { ok = 0; break; } + + write(fd, "x", 1); + close(fd); + unlink(path); + } + + tests_run++; + if (ok) { + printf(" PASS: 10 create/write/close/unlink cycles with no errors\n"); + tests_passed++; + } else { + printf(" FAIL: secondary isolation broken\n"); + } +} + +/* ── Test 3: Fork is not duplicated ────────────────────────────────────── */ + +static void test_fork_not_duplicated(void) { + printf("\n[test_fork_not_duplicated]\n"); + + pid_t pid = fork(); + CHECK("fork succeeds", pid >= 0); + + if (pid < 0) return; + + if (pid == 0) { + _exit(42); + } + + int status = 0; + pid_t waited = waitpid(pid, &status, 0); + CHECK("waitpid returns the child pid", waited == pid); + CHECK("child exited with status 42", WIFEXITED(status) && WEXITSTATUS(status) == 42); +} + +/* ── Test 4: Large write data integrity ────────────────────────────────── */ + +static void test_large_write(void) { + printf("\n[test_large_write]\n"); + + char wbuf[3000]; + for (int i = 0; i < 3000; i++) + wbuf[i] = 'A' + (i % 26); + + int fd = open("/tee_large", O_CREAT | O_RDWR, 0644); + CHECK("create /tee_large", fd >= 0); + if (fd < 0) return; + + ssize_t nw = write(fd, wbuf, 3000); + CHECK("write 3000 bytes", nw == 3000); + + lseek(fd, 0, SEEK_SET); + + char rbuf[3000] = {0}; + ssize_t nr = read(fd, rbuf, 3000); + CHECK("read 3000 bytes back", nr == 3000); + CHECK("data matches", memcmp(rbuf, wbuf, 3000) == 0); + + close(fd); +} + +/* ── Test 5: Close and reopen ──────────────────────────────────────────── */ + +static void test_close_reopen(void) { + printf("\n[test_close_reopen]\n"); + + int fd1 = open("/tee_reopen", O_CREAT | O_WRONLY, 0644); + CHECK("create file", fd1 >= 0); + if (fd1 < 0) return; + + write(fd1, "hello", 5); + int ret = close(fd1); + CHECK("close succeeds", ret == 0); + + int fd2 = open("/tee_reopen", O_RDONLY); + CHECK("reopen for read", fd2 >= 0); + if (fd2 < 0) return; + + char buf[16] = {0}; + ssize_t nr = read(fd2, buf, sizeof(buf) - 1); + CHECK("read after reopen", nr == 5); + CHECK("data is 'hello'", memcmp(buf, "hello", 5) == 0); + + close(fd2); +} + +/* ── Test 6: Dup preserves fd across tee ───────────────────────────────── */ + +static void test_dup(void) { + printf("\n[test_dup]\n"); + + int fd = open("/tee_dup", O_CREAT | O_RDWR, 0644); + CHECK("create file", fd >= 0); + if (fd < 0) return; + + int fd2 = dup(fd); + CHECK("dup succeeds", fd2 >= 0); + + if (fd2 >= 0) { + write(fd2, "dup data", 8); + lseek(fd, 0, SEEK_SET); + + char buf[16] = {0}; + ssize_t nr = read(fd, buf, sizeof(buf) - 1); + CHECK("read through original after dup write", nr == 8); + CHECK("data matches", memcmp(buf, "dup data", 8) == 0); + + close(fd2); + } + + close(fd); +} + +/* ── Test 7: Stdout passthrough ────────────────────────────────────────── */ + +static void test_stdout(void) { + printf("\n[test_stdout]\n"); + + const char *msg = " PASS: stdout passthrough works\n"; + ssize_t nw = write(1, msg, strlen(msg)); + tests_run++; + if (nw > 0) tests_passed++; +} + +/* ── Test 8: Fork child inherits tee routing ───────────────────────────── */ +/* Child should be able to use the tee'd grate stack just like the parent. */ + +static void test_fork_inherits_routing(void) { + printf("\n[test_fork_inherits_routing]\n"); + + pid_t pid = fork(); + if (pid == 0) { + /* Child: create a file through the tee'd grate. */ + int fd = open("/tee_fork_child_file", O_CREAT | O_WRONLY, 0644); + if (fd < 0) _exit(1); + ssize_t nw = write(fd, "child wrote this", 16); + close(fd); + _exit(nw == 16 ? 0 : 1); + } + + int status; + waitpid(pid, &status, 0); + CHECK("child created file through tee", + WIFEXITED(status) && WEXITSTATUS(status) == 0); + + /* Parent: verify the child's file. */ + int fd = open("/tee_fork_child_file", O_RDONLY); + CHECK("parent can open child's file", fd >= 0); + if (fd >= 0) { + char buf[32] = {0}; + ssize_t nr = read(fd, buf, sizeof(buf)); + CHECK("child's file has correct data", + nr == 16 && memcmp(buf, "child wrote this", 16) == 0); + close(fd); + } +} + +/* ── Test 9: Multiple files open simultaneously ────────────────────────── */ + +static void test_multiple_open(void) { + printf("\n[test_multiple_open]\n"); + + int fd1 = open("/tee_multi_1", O_CREAT | O_RDWR | O_TRUNC, 0644); + int fd2 = open("/tee_multi_2", O_CREAT | O_RDWR | O_TRUNC, 0644); + int fd3 = open("/tee_multi_3", O_CREAT | O_RDWR | O_TRUNC, 0644); + + CHECK("3 files open simultaneously", fd1 >= 0 && fd2 >= 0 && fd3 >= 0); + + write(fd1, "one", 3); + write(fd2, "two_data", 8); + write(fd3, "three", 5); + + char buf[64] = {0}; + + lseek(fd1, 0, SEEK_SET); + ssize_t nr = read(fd1, buf, sizeof(buf)); + CHECK("fd1 correct", nr == 3 && memcmp(buf, "one", 3) == 0); + + lseek(fd2, 0, SEEK_SET); + nr = read(fd2, buf, sizeof(buf)); + CHECK("fd2 correct", nr == 8 && memcmp(buf, "two_data", 8) == 0); + + lseek(fd3, 0, SEEK_SET); + nr = read(fd3, buf, sizeof(buf)); + CHECK("fd3 correct", nr == 5 && memcmp(buf, "three", 5) == 0); + + close(fd1); close(fd2); close(fd3); +} + +/* ── Test 10: Rapid create/close cycles ────────────────────────────────── */ + +static void test_rapid_lifecycle(void) { + printf("\n[test_rapid_lifecycle]\n"); + + int ok = 1; + for (int i = 0; i < 50; i++) { + char path[64]; + snprintf(path, sizeof(path), "/tee_rapid_%d", i); + + int fd = open(path, O_CREAT | O_RDWR, 0644); + if (fd < 0) { ok = 0; break; } + + char data = (char)('A' + (i % 26)); + write(fd, &data, 1); + + lseek(fd, 0, SEEK_SET); + char buf = 0; + ssize_t nr = read(fd, &buf, 1); + if (nr != 1 || buf != data) { ok = 0; break; } + + close(fd); + unlink(path); + } + + tests_run++; + if (ok) { + printf(" PASS: 50 rapid create/write/read/close/unlink cycles\n"); + tests_passed++; + } else { + printf(" FAIL: rapid lifecycle broke\n"); + } +} + +/* ── Main ──────────────────────────────────────────────────────────────── */ + +int main(void) { + printf("=== tee grate test ===\n"); + + test_primary_wins(); + test_secondary_isolation(); + test_fork_not_duplicated(); + test_large_write(); + test_close_reopen(); + test_dup(); + test_stdout(); + test_fork_inherits_routing(); + test_multiple_open(); + test_rapid_lifecycle(); + + printf("\n=== results: %d/%d passed ===\n", tests_passed, tests_run); + return (tests_passed == tests_run) ? 0 : 1; +}