Skip to content

Commit b40e02f

Browse files
committed
Added the experimental syscall_overrides config option
This allows users to override the behaviour of emulated syscalls using Lua expressions.
1 parent 6255a8e commit b40e02f

7 files changed

Lines changed: 192 additions & 2 deletions

File tree

docs/shadow_config_spec.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ hosts:
8585
- [`experimental.socket_send_autotune`](#experimentalsocket_send_autotune)
8686
- [`experimental.socket_send_buffer`](#experimentalsocket_send_buffer)
8787
- [`experimental.strace_logging_mode`](#experimentalstrace_logging_mode)
88+
- [`experimental.syscall_override`](#experimentalsyscall_override)
8889
- [`experimental.unblocked_syscall_latency`](#experimentalunblocked_syscall_latency)
8990
- [`experimental.unblocked_vdso_latency`](#experimentalunblocked_vdso_latency)
9091
- [`experimental.use_cpu_pinning`](#experimentaluse_cpu_pinning)
@@ -425,6 +426,54 @@ Limitations:
425426
process may not actually see this return value. Instead the syscall may be
426427
restarted.
427428

429+
#### `experimental.syscall_overrides`
430+
431+
Default: {}
432+
Type: Object
433+
434+
Override the behaviour of emulated syscalls.
435+
436+
A map of syscall numbers to lists of [Lua expressions][lua]. For each syscall
437+
number listed, a list of expressions will be evaluated before running Shadow's
438+
syscall handler. If an expression returns a non-nil signed integer result, that
439+
result will be used as the return value for the syscall. Otherwise if all
440+
expressions return nil, Shadow will perform the syscall as usual. Note that not
441+
all syscalls can be overridden. Specifically, syscalls that are handled by
442+
Shadow's shim (for example `gettimeofday`) cannot currently be overridden.
443+
444+
[lua]: https://www.lua.org/
445+
446+
The following variables are available in expressions:
447+
448+
- `args`: Array of syscall arguments cast as integers.
449+
- `host`: The hostname of the current host.
450+
- `process`: The name of the current process.
451+
- `pid`: The pid of the current process.
452+
- `tid`: The tid of the current thread.
453+
454+
Example:
455+
456+
```yaml
457+
experimental:
458+
syscall_overrides:
459+
# SYS_SETSOCKOPT = 54, SOL_IP = 0, IP_BIND_ADDRESS_NO_PORT = 24, IP_TTL = 2
460+
54:
461+
- # setsockopt(_, SOL_IP, IP_BIND_ADDRESS_NO_PORT, ...) -> 0
462+
"(args[1] == 0 and args[2] == 24) and 0 or nil"
463+
- # setsockopt(_, SOL_IP, IP_TTL, ...) -> 0
464+
"(args[1] == 0 and args[2] == 2) and 0 or nil"
465+
```
466+
467+
You must not modify global state or do anything weird within an expression. In
468+
other words, there should be no side effects. The Lua interpreter may be reused.
469+
470+
To easily find the integer value for a given libc constant, you can search for
471+
the constant name within the libc crate's documentation. For example to find
472+
the integer value of `SO_REUSEADDR`, you can [search for
473+
`SO_REUSEADDR`][so_reuseaddr] at https://docs.rs/libc/latest/libc/.
474+
475+
[so_reuseaddr]: https://docs.rs/libc/latest/libc/constant.SO_REUSEADDR.html
476+
428477
#### `experimental.unblocked_syscall_latency`
429478

430479
Default: "1 microseconds"

src/Cargo.lock

Lines changed: 56 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/main/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ shadow-shim-helper-rs = { path = "../lib/shadow-shim-helper-rs" }
3232
lzma-rs = "0.3"
3333
memoffset = "0.9.0"
3434
merge = "0.1"
35+
mlua = { version = "0.9.9", features = ["lua54", "vendored"] }
3536
neli = "0.6.4"
3637
nix = { version = "0.27.1", features = ["feature", "ioctl", "mman", "net", "personality", "resource", "sched", "signal", "socket", "time", "uio", "user"] }
3738
shadow-pod = { path = "../lib/pod" }

src/main/core/configuration.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//! that the configuration parsing does not become environment-dependent. If a configuration file
1212
//! parses on one system, it should parse successfully on other systems as well.
1313
14-
use std::collections::{BTreeMap, HashSet};
14+
use std::collections::{BTreeMap, HashMap, HashSet};
1515
use std::ffi::{CStr, CString, OsStr, OsString};
1616
use std::os::unix::ffi::OsStrExt;
1717
use std::str::FromStr;
@@ -483,6 +483,10 @@ pub struct ExperimentalOptions {
483483
#[clap(long, value_name = "bool")]
484484
#[clap(help = EXP_HELP.get("use_new_tcp").unwrap().as_str())]
485485
pub use_new_tcp: Option<bool>,
486+
487+
/// Override the behaviour of emulated syscalls.
488+
#[clap(skip)]
489+
pub syscall_overrides: Option<HashMap<u32, Vec<String>>>,
486490
}
487491

488492
impl ExperimentalOptions {
@@ -533,6 +537,7 @@ impl Default for ExperimentalOptions {
533537
scheduler: Some(Scheduler::ThreadPerCore),
534538
log_errors_to_tty: Some(true),
535539
use_new_tcp: Some(false),
540+
syscall_overrides: Some(HashMap::new()),
536541
}
537542
}
538543
}

src/main/core/manager.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::time::Duration;
99

1010
use anyhow::Context;
1111
use atomic_refcell::AtomicRefCell;
12+
use linux_api::syscall::SyscallNum;
1213
use log::warn;
1314
use rand::seq::SliceRandom;
1415
use rand_xoshiro::Xoshiro256PlusPlus;
@@ -37,6 +38,10 @@ use crate::utility;
3738
use crate::utility::childpid_watcher::ChildPidWatcher;
3839
use crate::utility::status_bar::Status;
3940

41+
thread_local! {
42+
pub static LUA: mlua::Lua = mlua::Lua::new();
43+
}
44+
4045
pub struct Manager<'a> {
4146
manager_config: Option<ManagerConfig>,
4247
controller: &'a Controller<'a>,
@@ -611,6 +616,15 @@ impl<'a> Manager<'a> {
611616
use_new_tcp: self.config.experimental.use_new_tcp.unwrap(),
612617
use_mem_mapper: self.config.experimental.use_memory_manager.unwrap(),
613618
use_syscall_counters: self.config.experimental.use_syscall_counters.unwrap(),
619+
syscall_overrides: self
620+
.config
621+
.experimental
622+
.syscall_overrides
623+
.as_ref()
624+
.unwrap()
625+
.iter()
626+
.map(|(syscall, exps)| (SyscallNum::new(*syscall), exps.clone()))
627+
.collect(),
614628
};
615629

616630
Box::new(unsafe {

src/main/host/host.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! An emulated Linux system.
22
33
use std::cell::{Cell, Ref, RefCell, RefMut, UnsafeCell};
4-
use std::collections::BTreeMap;
4+
use std::collections::{BTreeMap, HashMap};
55
use std::ffi::{CStr, CString, OsString};
66
use std::net::{Ipv4Addr, SocketAddrV4};
77
use std::num::NonZeroU8;
@@ -12,6 +12,7 @@ use std::sync::{Arc, Mutex};
1212

1313
use atomic_refcell::AtomicRefCell;
1414
use linux_api::signal::{siginfo_t, Signal};
15+
use linux_api::syscall::SyscallNum;
1516
use log::{debug, trace};
1617
use logger::LogLevel;
1718
use once_cell::unsync::OnceCell;
@@ -86,6 +87,7 @@ pub struct HostParameters {
8687
pub use_new_tcp: bool,
8788
pub use_mem_mapper: bool,
8889
pub use_syscall_counters: bool,
90+
pub syscall_overrides: HashMap<SyscallNum, Vec<String>>,
8991
}
9092

9193
use super::cpu::Cpu;

src/main/host/syscall/handler/mod.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,69 @@ impl SyscallHandler {
345345
let syscall = SyscallNum::new(ctx.args.number.try_into().unwrap());
346346
let syscall_name = syscall.to_str().unwrap_or("unknown-syscall");
347347

348+
// apply any user-provided syscall overrides if applicable
349+
if let Some(overrides) = ctx.objs.host.params.syscall_overrides.get(&syscall) {
350+
let rv = crate::core::manager::LUA.with(|lua| {
351+
// make sure to set every global each time (and not conditionally) since stale
352+
// globals from the last syscall handler will still be set
353+
let globals = lua.globals();
354+
355+
// we don't know what types the args really represent (if anything), so we'll just treat
356+
// them as integers
357+
globals.set("args", args.args.map(i64::from)).unwrap();
358+
359+
// things that the expression could potentially want to know
360+
globals.set("host", ctx.objs.host.name()).unwrap();
361+
globals
362+
.set("process", &*ctx.objs.process.plugin_name())
363+
.unwrap();
364+
globals
365+
.set("pid", u64::from(ctx.objs.process.id()))
366+
.unwrap();
367+
globals.set("tid", u64::from(ctx.objs.thread.id())).unwrap();
368+
369+
// compile and run all of the expressions until we find one that returns an integer
370+
overrides
371+
.iter()
372+
.find_map(|s| match lua.load(s).eval::<Option<i64>>() {
373+
Ok(Some(x)) => Some(x.into()),
374+
Ok(None) => None,
375+
Err(e) => {
376+
warn_once_then_debug!(
377+
"A syscall override expression failed to execute; Ignoring"
378+
);
379+
log::debug!("Syscall override expression failed: {e}");
380+
None
381+
}
382+
})
383+
});
384+
385+
// if one of the expressions returned an integer, return it
386+
if let Some(rv) = rv {
387+
// the return value could be negative which is typically an error value, but we
388+
// wouldn't know for sure that it is an error so we'll just always use `Ok`
389+
let rv = Ok(rv);
390+
391+
log::debug!(
392+
"Applying a syscall override expression to syscall {} ({})",
393+
syscall_name,
394+
ctx.args.number,
395+
);
396+
397+
log_syscall_simple(
398+
ctx.objs.process,
399+
ctx.objs.process.strace_logging_options(),
400+
ctx.objs.thread.id(),
401+
syscall_name,
402+
"...",
403+
&rv,
404+
)
405+
.unwrap();
406+
407+
return rv;
408+
}
409+
}
410+
348411
macro_rules! handle {
349412
($f:ident) => {{
350413
SyscallHandlerFn::call(Self::$f, &mut ctx)

0 commit comments

Comments
 (0)