Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ lind-boot: build-dir
cargo build --manifest-path src/lind-boot/Cargo.toml --release
cp src/lind-boot/target/release/lind-boot $(LINDBOOT_BIN)

.PHONY: lind-boot-perf
lind-boot-perf: build-dir
# Build lind-boot with low-overhead cycle counters enabled.
cargo build --manifest-path src/lind-boot/Cargo.toml --release --features lind_perf
cp src/lind-boot/target/release/lind-boot $(LINDBOOT_BIN)

.PHONY: lindfs
lindfs:
@for d in $(LINDFS_DIRS); do \
Expand Down
32 changes: 32 additions & 0 deletions scripts/run_microbench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

# Requires lind-boot to be built with the `lind_perf` feature.
# Use `make lind-boot-perf` for this.

set -euo pipefail


# Check if we need to re-exec with sudo
if [[ $EUID -ne 0 ]]; then
# Not running as root, re-exec with sudo
exec sudo -E "$0" "$@"
fi

SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="${SCRIPT_DIR%scripts}"
BENCH_ROOT="${REPO_ROOT}/tests/benchmarks"

echo "Compiling Tests..."

"${SCRIPT_DIR}/lind_compile" "${BENCH_ROOT}/libc_syscall.c" &>/dev/null && mv "${BENCH_ROOT}/libc_syscall.wasm" "${REPO_ROOT}/lindfs/"
"${SCRIPT_DIR}/lind_compile" "${BENCH_ROOT}/fdtables_syscall.c" &>/dev/null && mv "${BENCH_ROOT}/fdtables_syscall.wasm" "${REPO_ROOT}/lindfs/"
"${SCRIPT_DIR}/lind_compile" --compile-grate "${BENCH_ROOT}/grate_syscall.c" &>/dev/null && mv "${BENCH_ROOT}/grate_syscall.wasm" "${REPO_ROOT}/lindfs/"

echo -en "\nLIBC Test\t"
sudo lind-boot --perf libc_syscall.wasm

echo -en "\nFDTABLE Test\t"
sudo lind-boot --perf fdtables_syscall.wasm

echo -en "\nGRATE Test\t"
sudo lind-boot --perf grate_syscall.wasm libc_syscall.wasm
4 changes: 4 additions & 0 deletions src/fdtables/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@ categories = ["os", "filesystem"]
[dependencies]
libc = "0.2"
dashmap = { version = "5.1", features=["serde"] }
lind-perf = { path = "../lind-perf", optional = true }

[dependencies.lazy_static]
version = "1.0"
features = ["spin_no_std"]

[features]
lind_perf = ["dep:lind-perf"]

[dev-dependencies]
criterion = { version = "0.3", features = ["html_reports"]}

Expand Down
51 changes: 31 additions & 20 deletions src/fdtables/src/dashmaparrayglobal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// Static DashMap. Let's see if having the FDTableEntries be a static
// array is any faster...

#[cfg(feature = "lind_perf")]
use crate::perf;
use crate::threei;

use dashmap::DashMap;
Expand Down Expand Up @@ -393,33 +395,42 @@ lazy_static! {

#[doc = include_str!("../docs/close_virtualfd.md")]
pub fn close_virtualfd(cageid:u64, virtfd:u64) -> Result<(),threei::RetVal> {
#[cfg(feature = "lind_perf")]
let _close_vfd_scope = perf::enabled::CLOSE_VIRTUALFD.scope();

let ret = (|| {
// Below condition checks if the virtualfd is out of bounds and if yes it throws an error
// Note that this assumes that all virtualfd numbers returned < FD_PER_PROCESS_MAX
if virtfd >= FD_PER_PROCESS_MAX {
return Err(threei::Errno::EBADFD as u64);
}

// Below condition checks if the virtualfd is out of bounds and if yes it throws an error
// Note that this assumes that all virtualfd numbers returned < FD_PER_PROCESS_MAX
if virtfd >= FD_PER_PROCESS_MAX {
return Err(threei::Errno::EBADFD as u64);
}
assert!(FDTABLE.contains_key(&cageid),"Unknown cageid in fdtable access");

assert!(FDTABLE.contains_key(&cageid),"Unknown cageid in fdtable access");
// derefing this so I don't hold a lock and deadlock close handlers
let mut myfdrow = *FDTABLE.get_mut(&cageid).unwrap();

// derefing this so I don't hold a lock and deadlock close handlers
let mut myfdrow = *FDTABLE.get_mut(&cageid).unwrap();

if myfdrow[virtfd as usize].is_some() {
let entry = myfdrow[virtfd as usize];

if myfdrow[virtfd as usize].is_some() {
let entry = myfdrow[virtfd as usize];
// Zero out this entry before calling the close handler...
myfdrow[virtfd as usize] = None;

// Zero out this entry before calling the close handler...
myfdrow[virtfd as usize] = None;
// Re-insert the modified myfdrow since I've been modifying a copy
FDTABLE.insert(cageid, myfdrow.clone());

// Re-insert the modified myfdrow since I've been modifying a copy
FDTABLE.insert(cageid, myfdrow.clone());

// always _decrement last as it may call the user handler...
_decrement_fdcount(entry.unwrap());
return Ok(());
}
Err(threei::Errno::EBADFD as u64)
// always _decrement last as it may call the user handler...
_decrement_fdcount(entry.unwrap());
return Ok(());
}
Err(threei::Errno::EBADFD as u64)
})();

#[cfg(feature = "lind_perf")]
std::hint::black_box(&_close_vfd_scope);

ret
}


Expand Down
4 changes: 4 additions & 0 deletions src/fdtables/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@
// This includes the specific implementation of the algorithm chosen.
include!("current_impl");

/// Enable lind_perf related features for benchmarking.
#[cfg(feature = "lind_perf")]
pub mod perf;

// This includes general constants and definitions for things that are
// needed everywhere, like FDTableEntry. I use the * import here to flatten
// the namespace so folks importing this have the symbols directly imported.
Expand Down
11 changes: 11 additions & 0 deletions src/fdtables/src/perf.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/// lind-perf related feature modules.
#[cfg(feature = "lind_perf")]
pub mod enabled {
use lind_perf::Counter;

/// Define a counter for close_virtualfd
pub static CLOSE_VIRTUALFD: Counter = Counter::new("fdtables::close_virtualfd");

/// Define a list of all counters
pub static ALL_COUNTERS: &[&Counter] = &[&CLOSE_VIRTUALFD];
}
37 changes: 37 additions & 0 deletions src/glibc/lind_syscall/lind_syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,40 @@ int copy_data_between_cages(uint64_t thiscage, uint64_t targetcage, uint64_t src
0 /* translate_errno=0: we want to return the raw result without errno translation */
);
}

// ---------------------------------------------------------------------------------------------------------------------

// Wrapper for LIBC_SYSCALL which is used to benchmark calls that end up calling the Linux kernel.
int libc_syscall() {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also mention in comment about scenarios it might be used?

return make_threei_call(
LIBC_SYSCALL,
0,
__lind_cageid,
__lind_cageid,
0, __lind_cageid,
0, __lind_cageid,
0, __lind_cageid,
0, __lind_cageid,
0, __lind_cageid,
0, __lind_cageid,
1
);
}

// Wrapper for FDTABLES_SYSCALL which is used to benchmark calls that do not call the Linux kernel but instead
// perform some internal processing.
int fdtable_syscall() {
return make_threei_call(
FDTABLE_SYSCALL,
0,
__lind_cageid,
__lind_cageid,
-1, __lind_cageid, // This syscall mimics close(-1), so we enforce that the fd argument is set to -1.
0, __lind_cageid,
0, __lind_cageid,
0, __lind_cageid,
0, __lind_cageid,
0, __lind_cageid,
1
);
}
3 changes: 3 additions & 0 deletions src/glibc/lind_syscall/lind_syscall.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@ int copy_data_between_cages(uint64_t thiscage, uint64_t targetcage,
uint64_t destaddr, uint64_t destcage,
uint64_t len, uint64_t copytype);

int libc_syscall();
int fdt_syscall();

#endif // _LIND_SYSCALL_H
4 changes: 4 additions & 0 deletions src/glibc/lind_syscall/lind_syscall_num.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,9 @@
#define REGISTER_HANDLER_SYSCALL 1001
#define COPY_DATA_BETWEEN_CAGES_SYSCALL 1002

/* Special syscalls used for benchmarking */
#define LIBC_SYSCALL 2001
#define FDTABLE_SYSCALL 2002

#endif /* _LIND_SYSCALL_NUM_H */

8 changes: 8 additions & 0 deletions src/lind-boot/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ edition = "2024"
disable_signals = []
secure = ["typemap/secure"]
lind_debug = ["wasmtime-lind-common/lind_debug"]
lind_perf = [
"dep:lind-perf",
"threei/lind_perf",
"rawposix/lind_perf",
"wasmtime-lind-common/lind_perf",
]

[dependencies]
wasmtime-lind-common = { path = "../wasmtime/crates/lind-common" }
Expand All @@ -17,6 +23,7 @@ cage = { path = "../wasmtime/crates/cage" }
threei = { path = "../wasmtime/crates/threei" }
sysdefs = { path = "../wasmtime/crates/sysdefs" }
typemap = { path = "../wasmtime/crates/typemap" }
fdtables = { path = "../wasmtime/crates/fdtables" }
wasi-common = { path = "../wasmtime/crates/wasi-common", features = ["sync"] ,default-features = false }
wasmtime-lind-3i = { path = "../wasmtime/crates/lind-3i" }
wasmtime = { path = "../wasmtime/crates/wasmtime", features = ["cranelift", "pooling-allocator", "gc", "threads", "demangle", "addr2line"], default-features = false }
Expand All @@ -28,3 +35,4 @@ anyhow = { version = "1.0.66", default-features = false }
cap-std = { version = "3.4.2", default-features = false }
clap = { version = "4", features = ["derive"] }
cfg-if = "1.0"
lind-perf = { path = "../wasmtime/crates/lind-perf", optional = true }
10 changes: 10 additions & 0 deletions src/lind-boot/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ pub struct CliOptions {
/// cause the environment variable `FOO` to be inherited.
#[arg(long = "env", number_of_values = 1, value_name = "NAME[=VAL]", value_parser = parse_env_var)]
pub vars: Vec<(String, Option<String>)>,

/// Run performance benchmark with CLOCK_GETTIME (requires the `lind_perf` feature)
#[cfg(feature = "lind_perf")]
#[arg(long)]
pub perf: bool,

/// Run performance benchmarks with TSC (requires the `lind_perf` feature)
#[cfg(feature = "lind_perf")]
#[arg(long)]
pub perftsc: bool,
}

pub fn parse_env_var(s: &str) -> Result<(String, Option<String>), String> {
Expand Down
11 changes: 10 additions & 1 deletion src/lind-boot/src/lind_wasmtime/execute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@ use sysdefs::constants::lind_platform_const::{RAWPOSIX_CAGEID, WASMTIME_CAGEID};
use threei::threei_const;
use wasi_common::sync::WasiCtxBuilder;
use wasmtime::{
AsContextMut, Engine, Func, InstantiateType, Linker, Module, Precompiled, Store, Val, ValType,
AsContextMut, Engine, Func, InstantiateType, Linker, Module, Store, Val, ValType,
WasmBacktraceDetails,
};
use wasmtime_lind_3i::{VmCtxWrapper, init_vmctx_pool, rm_vmctx, set_vmctx, set_vmctx_thread};
use wasmtime_lind_multi_process::{CAGE_START_ID, LindCtx, THREAD_START_ID};
use wasmtime_lind_utils::LindCageManager;
use wasmtime_wasi_threads::WasiThreadsCtx;

#[cfg(feature = "lind_perf")]
use crate::perf;

/// Boots the Lind + RawPOSIX + 3i runtime and executes the initial Wasm program
/// in the first cage.
///
Expand Down Expand Up @@ -376,6 +379,9 @@ fn load_main_module(
cageid: u64,
args: &[String],
) -> Result<Vec<Val>> {
#[cfg(feature = "lind_perf")]
let _perf_scope = perf::enabled::LOAD_MAIN_MODULE.scope();

// todo:
// I don't setup `epoch_handler` since it seems not being used by our previous implementation.
// Not sure if this is related to our thread exit problem
Expand Down Expand Up @@ -534,6 +540,9 @@ fn read_wasm_or_cwasm(engine: &Engine, path: &Path) -> Result<Module> {
/// This function takes a Wasm function (Func) and a list of string arguments, parses the
/// arguments into Wasm values based on expected types (ValType), and invokes the function
fn invoke_func(store: &mut Store<HostCtx>, func: Func, args: &[String]) -> Result<Vec<Val>> {
#[cfg(feature = "lind_perf")]
let _perf_scope = perf::enabled::INVOKE_FUNC.scope();

let ty = func.ty(&store);
if ty.params().len() > 0 {
eprintln!(
Expand Down
27 changes: 25 additions & 2 deletions src/lind-boot/src/lind_wasmtime/trampoline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ use wasmtime::{Caller, Instance};
use wasmtime_lind_3i::{VmCtxWrapper, get_vmctx, set_vmctx};
use wasmtime_lind_multi_process;

#[cfg(feature = "lind_perf")]
use crate::perf;

/// The callback function registered with 3i uses a unified Wasm entry
/// function as the single re-entry point into the Wasm executable.
///
Expand Down Expand Up @@ -45,12 +48,19 @@ pub extern "C" fn grate_callback_trampoline(
arg6: u64,
arg6cageid: u64,
) -> i32 {
#[cfg(feature = "lind_perf")]
let _trampoline_scope = perf::enabled::GRATE_CALLBACK_TRAMPOLINE.scope();

#[cfg(feature = "lind_perf")]
let _get_vmctx_scope = perf::enabled::TRAMPOLINE_GET_VMCTX.scope();
let vmctx_wrapper: VmCtxWrapper = match get_vmctx(cageid) {
Some(v) => v,
None => {
panic!("no VMContext found for cage_id {}", cageid);
}
};
#[cfg(feature = "lind_perf")]
drop(_get_vmctx_scope);

// Convert back to VMContext
let opaque: *mut VMOpaqueContext = vmctx_wrapper.as_ptr() as *mut VMOpaqueContext;
Expand All @@ -59,20 +69,26 @@ pub extern "C" fn grate_callback_trampoline(

// Re-enter Wasmtime using the stored vmctx pointer
let grate_ret = unsafe {
#[cfg(feature = "lind_perf")]
let _caller_scope = perf::enabled::TRAMPOLINE_CALLER_WITH.scope();
Caller::with(vmctx_raw, |caller: Caller<'_, HostCtx>| {
let Caller {
mut store,
caller: instance,
} = caller;

// Resolve the unified entry function once per call
#[cfg(feature = "lind_perf")]
let _get_entry_scope = perf::enabled::TRAMPOLINE_GET_PASS_FPTR_TO_WT.scope();
let entry_func = instance
.host_state()
.downcast_ref::<Instance>()
.ok_or_else(|| anyhow!("bad host_state Instance"))?
.get_export(&mut store, "pass_fptr_to_wt")
.and_then(|f| f.into_func())
.ok_or_else(|| anyhow!("missing export `pass_fptr_to_wt`"))?;
#[cfg(feature = "lind_perf")]
drop(_get_entry_scope);

let typed_func = entry_func.typed::<(
u64,
Expand All @@ -92,7 +108,9 @@ pub extern "C" fn grate_callback_trampoline(
), i32>(&mut store)?;

// Call the entry function with all arguments and in grate function pointer
typed_func.call(
#[cfg(feature = "lind_perf")]
let _call_scope = perf::enabled::TRAMPOLINE_TYPED_DISPATCH_CALL.scope();
let call_res = typed_func.call(
&mut store,
(
in_grate_fn_ptr_u64,
Expand All @@ -110,12 +128,17 @@ pub extern "C" fn grate_callback_trampoline(
arg6,
arg6cageid,
),
)
);
#[cfg(feature = "lind_perf")]
drop(_call_scope);
call_res
})
.unwrap_or(threei_const::GRATE_ERR)
};
// Push the vmctx back to the global pool
set_vmctx(cageid, vmctx_wrapper);
#[cfg(feature = "lind_perf")]
std::hint::black_box(&_trampoline_scope);
grate_ret
}

Expand Down
Loading
Loading