Skip to content

Commit 626119b

Browse files
committed
fix(sidecar): block bind mount sources at syscall level to prevent rootfs tampering
Enforce bind source allowlist in the seccomp-notif supervisor, mirroring the OCI hook checkMounts() logic. Build containers skip hooks but inherit the filter, closing a possible rootfs tampering chain (podman build -v /:/hostroot). Update tests accordingly. Signed-off-by: Luca Di Maio <luca.dimaio1@gmail.com>
1 parent b429488 commit 626119b

File tree

3 files changed

+273
-6
lines changed

3 files changed

+273
-6
lines changed

container-images/sidecar/entrypoint/entrypoint_test.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,3 +451,65 @@ func TestReadPPID_Missing(t *testing.T) {
451451
t.Errorf("expected 0 for nonexistent PID, got %d", ppid)
452452
}
453453
}
454+
455+
// ---------------------------------------------------------------------------
456+
// Bind mount source allowlist tests
457+
// ---------------------------------------------------------------------------
458+
459+
func TestIsAllowedBindSource(t *testing.T) {
460+
workdir := "/home/user/project"
461+
tests := []struct {
462+
source string
463+
want bool
464+
}{
465+
// Workdir and children.
466+
{"/home/user/project", true},
467+
{"/home/user/project/src", true},
468+
{"/home/user/project/.git/hooks", true},
469+
470+
// Infrastructure storage.
471+
{"/var/lib/containers/storage/overlay/abc/merged", true},
472+
{"/var/run/containers/storage/abc", true},
473+
{"/var/cache/containers/blob-cache/sha256/abc", true},
474+
{"/run/containers/storage/abc", true},
475+
{"/run/credentials/gh", true},
476+
477+
// Device files.
478+
{"/dev/null", true},
479+
{"/dev/zero", true},
480+
{"/dev/shm/foo", false},
481+
482+
// Temp dirs.
483+
{"/tmp/buildah123", false},
484+
{"/var/tmp/foo", false},
485+
486+
// Special rootfs files.
487+
{"/sandbox-seal", true},
488+
{"/rename_exdev_shim.so", true},
489+
{"/empty", true},
490+
491+
// Empty source (remount).
492+
{"", true},
493+
494+
// ATTACK: rootfs paths that must be blocked.
495+
{"/", false},
496+
{"/etc", false},
497+
{"/etc/containers", false},
498+
{"/etc/containers/containers.conf", false},
499+
{"/usr", false},
500+
{"/usr/share/containers/oci/hooks.d", false},
501+
{"/usr/libexec/oci/hooks.d/security-policy", false},
502+
{"/bin", false},
503+
{"/home/user", false}, // parent of workdir, not under it
504+
{"/proc", false},
505+
{"/sys", false},
506+
{"/opt", false},
507+
{"/entrypoint", false},
508+
}
509+
for _, tt := range tests {
510+
got := isAllowedBindSource(tt.source, workdir)
511+
if got != tt.want {
512+
t.Errorf("isAllowedBindSource(%q, %q) = %v, want %v", tt.source, workdir, got, tt.want)
513+
}
514+
}
515+
}

container-images/sidecar/entrypoint/handlers.go

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,73 @@ import (
2121
// (IPT_SO_SET_REPLACE) and IPv6 (IP6T_SO_SET_REPLACE).
2222
const iptSOSetReplace = 64
2323

24+
// allowedBindSources lists path prefixes from which bind mount sources
25+
// are permitted.
26+
var allowedBindSources = []string{
27+
// infra mounts for namespace setup
28+
"/proc/self",
29+
"/proc/thread-self",
30+
"/run/user/0",
31+
"/run/netns",
32+
"/dev/char",
33+
"/dev/pts",
34+
// infra mounts for container storage, cache, logs
35+
"/run/containers",
36+
"/var/cache/containers",
37+
"/var/lib/containers/storage",
38+
"/var/run/containers/storage",
39+
// credential forwarding
40+
"/run/credentials",
41+
}
42+
43+
// allowedBindSourceFiles lists individual rootfs files that may be
44+
// bind-mounted into nested containers.
45+
var allowedBindSourceFiles = []string{
46+
"/dev/full",
47+
"/dev/null",
48+
"/dev/random",
49+
"/dev/tty",
50+
"/dev/urandom",
51+
"/dev/zero",
52+
"/empty",
53+
"/rename_exdev_shim.so",
54+
"/sandbox-seal",
55+
}
56+
57+
// isAllowedBindSource checks whether a bind mount source is permitted.
58+
func isAllowedBindSource(source, workdir string) bool {
59+
if source == "" {
60+
return true
61+
}
62+
63+
if workdir != "" && isSubPath(workdir, source) {
64+
return true
65+
}
66+
67+
for _, prefix := range allowedBindSources {
68+
if isSubPath(prefix, source) {
69+
return true
70+
}
71+
}
72+
73+
return slices.Contains(allowedBindSourceFiles, source)
74+
}
75+
2476
// proc1Sensitive lists /proc/1 sub-paths that must never be opened from
2577
// sidecar processes. Defense-in-depth behind the /dev/null mask on
2678
// /proc/1/mem and the mount supervisor blocking unmounts.
2779
var proc1Sensitive = []string{
28-
"/proc/1/mem",
80+
"/proc/1/auxv",
81+
"/proc/1/cwd",
2982
"/proc/1/environ",
83+
"/proc/1/exe",
84+
"/proc/1/io",
3085
"/proc/1/maps",
86+
"/proc/1/mem",
87+
"/proc/1/pagemap",
3188
"/proc/1/root",
32-
"/proc/1/cwd",
33-
"/proc/1/exe",
3489
"/proc/1/stack",
3590
"/proc/1/syscall",
36-
"/proc/1/io",
37-
"/proc/1/auxv",
38-
"/proc/1/pagemap",
3991
}
4092

4193
// ---------------------------------------------------------------------------
@@ -123,6 +175,15 @@ func handleMount(
123175
return
124176
}
125177

178+
// Block bind mounts from disallowed sources. This is the syscall-level
179+
// equivalent of the OCI hook's checkMounts()
180+
if flags&unix.MS_BIND != 0 && flags&unix.MS_REMOUNT == 0 && !isAllowedBindSource(source, workdir) {
181+
resp.Error = -int32(unix.EPERM)
182+
logf("BLOCKED mount(MS_BIND) source=%s target=%s pid=%d bin=%s (source not allowed)",
183+
source, target, pid, exePath(pid))
184+
return
185+
}
186+
126187
// Block non-recursive bind mount where the source is the workdir or
127188
// an ancestor of it. A non-recursive bind of any path that contains
128189
// the workdir doesn't carry the /dev/null sub-mounts, exposing masked files.

pkg/sandbox/integration_test.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,6 +1197,150 @@ func TestSupervisor(t *testing.T) {
11971197
})
11981198
}
11991199

1200+
// ---------------------------------------------------------------------------
1201+
// Rootfs tampering: pentest escape chain — bind mount source allowlist.
1202+
// Validates that the seccomp-notif supervisor blocks the attack at the
1203+
// mount level, preventing access to sidecar config via -v /:/hostroot.
1204+
// Build containers skip OCI hooks; the supervisor is the only defense.
1205+
// ---------------------------------------------------------------------------
1206+
1207+
func TestRootfsTampering(t *testing.T) {
1208+
t.Parallel()
1209+
1210+
t.Run("run_bind_root_blocked", func(t *testing.T) {
1211+
t.Parallel()
1212+
// podman run -v /:/hostroot — exposes entire sidecar rootfs.
1213+
// The supervisor's bind source check must reject source "/".
1214+
out, err := sidecarExec(t, sidecarName,
1215+
innerRun([]string{"-v", "/:/hostroot"}, "cat", "/hostroot/etc/containers/containers.conf"))
1216+
requireFail(t, out, err)
1217+
})
1218+
1219+
t.Run("run_bind_etc_blocked", func(t *testing.T) {
1220+
t.Parallel()
1221+
// podman run -v /etc:/hostetc — exposes sidecar config dir.
1222+
out, err := sidecarExec(t, sidecarName,
1223+
innerRun([]string{"-v", "/etc:/hostetc"}, "cat", "/hostetc/containers/containers.conf"))
1224+
requireFail(t, out, err)
1225+
})
1226+
1227+
t.Run("run_bind_usr_blocked", func(t *testing.T) {
1228+
t.Parallel()
1229+
// podman run -v /usr:/hostusr — exposes hook binaries/configs.
1230+
out, err := sidecarExec(t, sidecarName,
1231+
innerRun([]string{"-v", "/usr:/hostusr"}, "ls", "/hostusr/share/containers"))
1232+
requireFail(t, out, err)
1233+
})
1234+
1235+
t.Run("build_escalate_full_chain", func(t *testing.T) {
1236+
t.Parallel()
1237+
containerfile := strings.Join([]string{
1238+
"FROM " + alpineImage,
1239+
"RUN ls -la /hostroot/usr/share/containers/oci/hooks.d/security-policy.json /hostroot/usr/share/containers/oci/hooks.d/seal-inject.json",
1240+
"RUN cat /hostroot/usr/share/containers/oci/hooks.d/security-policy.json /hostroot/usr/share/containers/oci/hooks.d/seal-inject.json",
1241+
"RUN rm -f /hostroot/usr/share/containers/oci/hooks.d/security-policy.json /hostroot/usr/share/containers/oci/hooks.d/seal-inject.json",
1242+
"",
1243+
}, "\n")
1244+
cfPath := filepath.Join(workdir, "Containerfile.escalate")
1245+
if err := os.WriteFile(cfPath, []byte(containerfile), 0o644); err != nil {
1246+
t.Fatal(err)
1247+
}
1248+
defer os.Remove(cfPath)
1249+
out, err := sidecarExecTimeout(t, sidecarName, []string{
1250+
innerPodman, "build", "--no-cache",
1251+
"--cap-add", "ALL",
1252+
"-v", "/:/hostroot",
1253+
"-f", cfPath, workdir,
1254+
}, 120*time.Second)
1255+
requireFail(t, out, err)
1256+
})
1257+
1258+
t.Run("build_bind_root_delete_hooks", func(t *testing.T) {
1259+
t.Parallel()
1260+
cfPath := filepath.Join(workdir, "Containerfile.delete_hooks")
1261+
cf := "FROM " + alpineImage + "\nRUN rm -f /hostroot/usr/share/containers/oci/hooks.d/security-policy.json\n"
1262+
if err := os.WriteFile(cfPath, []byte(cf), 0o644); err != nil {
1263+
t.Fatal(err)
1264+
}
1265+
defer os.Remove(cfPath)
1266+
out, err := sidecarExecTimeout(t, sidecarName, []string{
1267+
innerPodman, "build", "--no-cache",
1268+
"-v", "/:/hostroot",
1269+
"-f", cfPath, workdir,
1270+
}, 120*time.Second)
1271+
requireFail(t, out, err)
1272+
})
1273+
1274+
t.Run("build_bind_root_overwrite_seccomp", func(t *testing.T) {
1275+
t.Parallel()
1276+
// Overwrite seccomp_nested.json to weaken nested container restrictions.
1277+
cfPath := filepath.Join(workdir, "Containerfile.overwrite_seccomp")
1278+
cf := "FROM " + alpineImage + "\nRUN echo '{}' > /hostroot/etc/containers/seccomp_nested.json\n"
1279+
if err := os.WriteFile(cfPath, []byte(cf), 0o644); err != nil {
1280+
t.Fatal(err)
1281+
}
1282+
defer os.Remove(cfPath)
1283+
out, err := sidecarExecTimeout(t, sidecarName, []string{
1284+
innerPodman, "build", "--no-cache",
1285+
"-v", "/:/hostroot",
1286+
"-f", cfPath, workdir,
1287+
}, 120*time.Second)
1288+
requireFail(t, out, err)
1289+
})
1290+
1291+
t.Run("build_bind_root_modify_containers_conf", func(t *testing.T) {
1292+
t.Parallel()
1293+
// Modify containers.conf to disable security defaults.
1294+
cfPath := filepath.Join(workdir, "Containerfile.modify_conf")
1295+
cf := "FROM " + alpineImage + "\nRUN echo '[containers]' > /hostroot/etc/containers/containers.conf\n"
1296+
if err := os.WriteFile(cfPath, []byte(cf), 0o644); err != nil {
1297+
t.Fatal(err)
1298+
}
1299+
defer os.Remove(cfPath)
1300+
out, err := sidecarExecTimeout(t, sidecarName, []string{
1301+
innerPodman, "build", "--no-cache",
1302+
"-v", "/:/hostroot",
1303+
"-f", cfPath, workdir,
1304+
}, 120*time.Second)
1305+
requireFail(t, out, err)
1306+
})
1307+
1308+
t.Run("run_bind_etc_containers_blocked", func(t *testing.T) {
1309+
t.Parallel()
1310+
out, err := sidecarExec(t, sidecarName,
1311+
innerRun([]string{"-v", "/etc/containers:/mnt"}, "cat", "/mnt/containers.conf"))
1312+
requireFail(t, out, err)
1313+
})
1314+
1315+
t.Run("run_bind_hook_dir_blocked", func(t *testing.T) {
1316+
t.Parallel()
1317+
out, err := sidecarExec(t, sidecarName,
1318+
innerRun([]string{"-v", "/usr/share/containers/oci/hooks.d:/mnt"}, "ls", "/mnt"))
1319+
requireFail(t, out, err)
1320+
})
1321+
1322+
t.Run("run_bind_hook_binary_blocked", func(t *testing.T) {
1323+
t.Parallel()
1324+
out, err := sidecarExec(t, sidecarName,
1325+
innerRun([]string{"-v", "/usr/libexec/oci/hooks.d/security-policy:/mnt/sp"}, "ls", "/mnt/sp"))
1326+
requireFail(t, out, err)
1327+
})
1328+
1329+
t.Run("run_bind_dev_fuse_blocked", func(t *testing.T) {
1330+
t.Parallel()
1331+
out, err := sidecarExec(t, sidecarName,
1332+
innerRun([]string{"-v", "/dev/fuse:/dev/fuse"}, "ls", "/dev/fuse"))
1333+
requireFail(t, out, err)
1334+
})
1335+
1336+
t.Run("run_workdir_mount_allowed", func(t *testing.T) {
1337+
t.Parallel()
1338+
out, err := sidecarExec(t, sidecarName,
1339+
innerRun([]string{"-v", workdir + ":/work"}, "ls", "/work"))
1340+
requireSuccess(t, out, err)
1341+
})
1342+
}
1343+
12001344
func TestSecurityAudit(t *testing.T) {
12011345
t.Run("cdk", func(t *testing.T) {
12021346
// Detect architecture for the correct CDK binary.

0 commit comments

Comments
 (0)