Skip to content

Commit 800953d

Browse files
committed
initial commit
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
0 parents  commit 800953d

File tree

11 files changed

+1234
-0
lines changed

11 files changed

+1234
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
subuidless

COPYING

Lines changed: 456 additions & 0 deletions
Large diffs are not rendered by default.

README.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# OCI Seccomp Receiver for running Rootless Containers without `/etc/subuid` and `/etc/subgid`
2+
3+
`subuidless` is an implementaion of OCI Seccomp Receiver for running Rootless Containers without `/etc/subuid` and `/etc/subgid`.
4+
5+
`subuidlesss` emulates ID-related system calls using Seccomp User Notification and XAttrs.
6+
Unlike ptrace implementatins of similar projects such as [runROOTLESS (PRoot)](https://github.com/rootless-containers/runrootless) and [remainroot](https://github.com/cyphar/remainroot), `subuidless` can minimize the overhead of system call hooking.
7+
8+
## Status
9+
10+
Early POC. Do not use.
11+
12+
## Why do we need subuidless?
13+
* It is hard to configure `/etc/subuid` and `/etc/subgid` in LDAP environments
14+
* Some container images may require strange UIDs/GIDs that are out of the typical `/etc/subuid` and `/etc/subgid` configuration. The typical configuration only allows 65,536 IDs to be available in the container.
15+
16+
## Goals and non-goals
17+
Goals:
18+
* Simplicity
19+
* Minimal overhead
20+
21+
Non-goals:
22+
* Provide security boundry across emulated IDs
23+
24+
## Requirements
25+
* crun with https://github.com/containers/crun/pull/438
26+
* libseccomp >= v2.5.0
27+
* libprotobuf-c
28+
29+
**Note**: libseccomp >= v2.5.0 is not available as a dpkg/rpm package in most distros as of July 2020.
30+
31+
To install libseccomp from the source onto a custom prefix (`/opt/libseccomp`):
32+
```console
33+
$ git clone https://github.com/seccomp/libseccomp.git
34+
$ cd libseccomp
35+
$ git checkout v2.5.0
36+
$ ./autogen.sh
37+
$ ./configure --prefix=/opt/seccomp && make && sudo make install
38+
```
39+
40+
To install crun:
41+
```console
42+
$ git clone https://github.com/containers/crun.git
43+
$ cd crun
44+
$ hub checkout https://github.com/containers/crun/pull/438
45+
$ ./autogen.sh
46+
$ CFLAGS="-I/opt/libseccomp/include/" LDFLAGS="-L/opt/libseccomp/lib" ./configure && make && sudo make install
47+
```
48+
49+
## Usage
50+
51+
Terminal 1:
52+
```console
53+
$ LIBSECCOMP_PREFIX=/opt/libseccomp ./make.sh
54+
$ mkdir -p ./test/rootfs && docker create --name foo alpine && docker export foo | tar Cx ./test/rootfs && docker rm -f foo
55+
$ ./subuidless ~/.subuidless.sock
56+
Listening on /home/user/.subuidless.sock
57+
...
58+
```
59+
60+
Terminal 2:
61+
```console
62+
$ RUN_OCI_SECCOMP_RECEIVER=~/.subuidless.sock unshare -r crun run -b ./test foo
63+
/ # cat /proc/self/uid_map
64+
0 1001 1
65+
/ # touch foo
66+
/ # chown 42:42 foo
67+
```
68+
69+
Make sure that the `chown` command succeeds without `Invalid argument` error, even though no subuid is configured in the `uid_map` file.
70+
The UID ang GID are recorded to [the `user.rootlesscontainers` xattr](https://github.com/rootless-containers/proto) of the target file.
71+
72+
> *FIXME*:
73+
> The chowned value are not shown in `ls -l` currently. Will be shown after the implementaion of stat syscalls.
74+
> Use `getfattr -d -e hex -m user.rootlesscontainers` to inspect the xattr value.
75+
76+
## Hooked system calls
77+
To be documented, see `SCMP_ACT_NOTIFY` entries in `./test/config.json`.
78+
79+
<!--
80+
TODO: Syscalls to be captured:
81+
https://github.com/rootless-containers/PRoot/blob/081bb63955eb4378e53cf4d0eb0ed0d3222bf66e/src/extension/fake_id0/fake_id0.c#L141-L205
82+
https://github.com/cyphar/remainroot/blob/master/src/ptrace/generic-shims.c
83+
-->

main.c

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/* SPDX-License-Identifier: LGPL-2.1-or-later */
2+
#define _GNU_SOURCE
3+
#include "pb/rootlesscontainers.pb-c.h"
4+
#include <errno.h>
5+
#include <linux/limits.h>
6+
#include <linux/seccomp.h>
7+
#include <seccomp.h>
8+
#include <stdio.h>
9+
#include <stdlib.h>
10+
#include <sys/socket.h>
11+
#include <sys/types.h>
12+
#include <sys/uio.h>
13+
#include <sys/un.h>
14+
#include <sys/xattr.h>
15+
#include <unistd.h>
16+
17+
/*
18+
* recvfd() was copied from
19+
* https://github.com/rootless-containers/slirp4netns/blob/d5c44a94a271701ddc48c9b20aa6e9539a92ad0a/main.c#L110-L141
20+
* The author (Akihiro Suda) relicensed the code to LGPL v2.1.
21+
*/
22+
static int recvfd(int sock) {
23+
int fd;
24+
ssize_t rc;
25+
struct msghdr msg;
26+
struct cmsghdr *cmsg;
27+
char cmsgbuf[CMSG_SPACE(sizeof(fd))];
28+
struct iovec iov;
29+
char dummy = '\0';
30+
memset(&msg, 0, sizeof(msg));
31+
iov.iov_base = &dummy;
32+
iov.iov_len = 1;
33+
msg.msg_iov = &iov;
34+
msg.msg_iovlen = 1;
35+
msg.msg_control = cmsgbuf;
36+
msg.msg_controllen = sizeof(cmsgbuf);
37+
if ((rc = recvmsg(sock, &msg, 0)) < 0) {
38+
perror("recvmsg");
39+
return (int)rc;
40+
}
41+
if (rc == 0) {
42+
fprintf(stderr, "the message is empty\n");
43+
return -1;
44+
}
45+
cmsg = CMSG_FIRSTHDR(&msg);
46+
if (cmsg == NULL || cmsg->cmsg_type != SCM_RIGHTS) {
47+
fprintf(stderr, "the message does not contain fd\n");
48+
return -1;
49+
}
50+
memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
51+
return fd;
52+
}
53+
54+
static int read_proc_mem_string(char **out, pid_t pid, off_t off,
55+
size_t max_len) {
56+
const size_t buf_len = max_len + 1;
57+
char *buf = malloc(buf_len);
58+
struct iovec local[1];
59+
struct iovec remote[1];
60+
ssize_t nread;
61+
local[0].iov_base = buf;
62+
local[0].iov_len = buf_len;
63+
remote[0].iov_base = (void *)off;
64+
remote[0].iov_len = buf_len;
65+
if ((nread = process_vm_readv(pid, local, 1, remote, 1, 0)) < 0) {
66+
perror("process_vm_readv");
67+
free(buf);
68+
*out = NULL;
69+
return nread;
70+
}
71+
buf[max_len] = '\0';
72+
*out = buf;
73+
return 0;
74+
}
75+
76+
#define USER_ROOTLESSCONTAINERS_XATTR "user.rootlesscontainers"
77+
78+
static int set_user_rootlesscontainers_xattr_chdired(const char *pathname,
79+
uid_t uid, gid_t gid) {
80+
uint8_t *buf = NULL;
81+
size_t sz = 0;
82+
int rc = -1;
83+
Rootlesscontainers__Resource msg;
84+
if (uid == 0 && gid == 0) {
85+
printf("DEBUG: removing %s xattr on \"%s\" in the PID cwd\n",
86+
USER_ROOTLESSCONTAINERS_XATTR, pathname);
87+
if ((rc = removexattr(pathname, USER_ROOTLESSCONTAINERS_XATTR)) < 0) {
88+
perror("removexattr");
89+
}
90+
return rc;
91+
}
92+
rootlesscontainers__resource__init(&msg);
93+
msg.uid = uid;
94+
msg.gid = gid;
95+
sz = rootlesscontainers__resource__get_packed_size(&msg);
96+
buf = malloc(sz);
97+
rootlesscontainers__resource__pack(&msg, buf);
98+
printf("DEBUG: setting %s xattr (%ld bytes) on \"%s\" in the PID cwd\n",
99+
USER_ROOTLESSCONTAINERS_XATTR, sz, pathname);
100+
if ((rc = setxattr(pathname, USER_ROOTLESSCONTAINERS_XATTR, buf, sz, 0)) <
101+
0) {
102+
perror("setxattr");
103+
}
104+
free(buf);
105+
return rc;
106+
}
107+
108+
static int set_user_rootlesscontainers_xattr(pid_t pid, const char *pathname,
109+
uid_t uid, gid_t gid) {
110+
char proc_pid_cwd[32];
111+
char *wd = NULL;
112+
int rc = -1;
113+
sprintf(proc_pid_cwd, "/proc/%d/cwd", pid);
114+
if ((wd = get_current_dir_name()) == NULL) {
115+
perror("get_current_dir_name");
116+
return -1;
117+
}
118+
if ((rc = chdir(proc_pid_cwd)) < 0) {
119+
perror("chdir");
120+
free(wd);
121+
return rc;
122+
}
123+
rc = set_user_rootlesscontainers_xattr_chdired(pathname, uid, gid);
124+
if (chdir(wd) < 0) {
125+
perror("chdir");
126+
fprintf(stderr, "can't chdir back to the previous wd \"%s\", aborting\n",
127+
wd);
128+
free(wd);
129+
abort();
130+
return -1;
131+
}
132+
free(wd);
133+
return 0;
134+
}
135+
136+
static void handle_sys_chown(struct seccomp_notif *req,
137+
struct seccomp_notif_resp *resp) {
138+
char *pathname = NULL;
139+
uid_t uid = req->data.args[1];
140+
gid_t gid = req->data.args[2];
141+
read_proc_mem_string(&pathname, req->pid, req->data.args[0], PATH_MAX);
142+
fprintf(stderr, "debug: <<< ID=%llud sys_chown(\"%s\", %d, %d)\n", req->id,
143+
pathname, uid, gid);
144+
if (set_user_rootlesscontainers_xattr(req->pid, pathname, uid, gid) < 0) {
145+
resp->error = -1;
146+
resp->error = -EIO;
147+
}
148+
fprintf(stderr, "debug: >>> ID=%llud error=%d\n", req->id, resp->error);
149+
free(pathname);
150+
}
151+
152+
static void handle_req(struct seccomp_notif *req,
153+
struct seccomp_notif_resp *resp) {
154+
resp->id = req->id;
155+
switch (req->data.nr) {
156+
/* FIXME: use SCMP_SYS macro */
157+
case __NR_chown:
158+
handle_sys_chown(req, resp);
159+
break;
160+
default:
161+
fprintf(stderr, "Unexpected syscall %d, returning -ENOTSUP\n",
162+
req->data.nr);
163+
resp->error = -ENOTSUP;
164+
break;
165+
}
166+
}
167+
168+
static int on_accept(int accept_fd) {
169+
int notify_fd = -1;
170+
if ((notify_fd = recvfd(accept_fd)) < 0) {
171+
perror("recvfd");
172+
return notify_fd;
173+
}
174+
printf("received notify_fd=%d\n", notify_fd);
175+
for (;;) {
176+
int rc = -1;
177+
struct seccomp_notif *req = NULL;
178+
struct seccomp_notif_resp *resp = NULL;
179+
if ((rc = seccomp_notify_alloc(&req, &resp)) < 0) {
180+
fprintf(stderr, "seccomp_notify_alloc() failed, rc=%d\n", rc);
181+
return rc;
182+
}
183+
if ((rc = seccomp_notify_receive(notify_fd, req)) < 0) {
184+
fprintf(stderr, "seccomp_notify_receive() failed, rc=%d\n", rc);
185+
seccomp_notify_free(req, resp);
186+
return rc;
187+
}
188+
if ((rc = seccomp_notify_id_valid(notify_fd, req->id)) < 0) {
189+
fprintf(stderr, "req->id=%lld is no longer valid, ignoring\n", req->id);
190+
seccomp_notify_free(req, resp);
191+
continue;
192+
}
193+
handle_req(req, resp);
194+
if ((rc = seccomp_notify_respond(notify_fd, resp)) < 0) {
195+
fprintf(stderr, "seccomp_notify_respond() failed, rc=%d\n", rc);
196+
seccomp_notify_free(req, resp);
197+
return rc;
198+
}
199+
seccomp_notify_free(req, resp);
200+
}
201+
}
202+
203+
int main(int argc, char *const argv[]) {
204+
const char *sock_path = NULL;
205+
int sock_fd = -1;
206+
const int sock_backlog = 128;
207+
struct sockaddr_un sun;
208+
if (argc != 2) {
209+
fprintf(stderr, "Usage: %s SOCK\n", argv[0]);
210+
exit(EXIT_FAILURE);
211+
}
212+
sock_path = argv[1];
213+
unlink(sock_path); /* remove existing socket */
214+
if ((sock_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
215+
perror("socket");
216+
exit(EXIT_FAILURE);
217+
}
218+
memset(&sun, 0, sizeof(struct sockaddr_un));
219+
sun.sun_family = AF_UNIX;
220+
strncpy(sun.sun_path, sock_path, sizeof(sun.sun_path) - 1);
221+
if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
222+
perror("bind");
223+
exit(EXIT_FAILURE);
224+
}
225+
if (listen(sock_fd, sock_backlog) < 0) {
226+
perror("listen");
227+
exit(EXIT_FAILURE);
228+
}
229+
printf("Listening on %s\n", sock_path);
230+
for (int i = 0;; i++) {
231+
int accept_fd = -1;
232+
if ((accept_fd = accept(sock_fd, NULL, NULL)) < 0) {
233+
perror("accept");
234+
exit(EXIT_FAILURE);
235+
}
236+
if (i > 1) {
237+
fprintf(stderr, "FIXME: only single accept() is supported currently\n");
238+
close(accept_fd);
239+
continue;
240+
}
241+
/* TODO: fork() here */
242+
if (!on_accept(accept_fd)) {
243+
fprintf(stderr, "on_accept() failed\n");
244+
exit(EXIT_FAILURE);
245+
}
246+
close(accept_fd);
247+
}
248+
exit(EXIT_SUCCESS);
249+
}

make.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/bin/bash
2+
# FIXME: use autoconf/automake
3+
4+
# requires libseccomp >= v2.5.0
5+
: ${LIBSECCOMP_PREFIX:=/opt/libseccomp}
6+
7+
set -eux -o pipefail
8+
gcc -o subuidless -I${LIBSECCOMP_PREFIX}/include *.c pb/*.c ${LIBSECCOMP_PREFIX}/lib/libseccomp.a -lprotobuf-c

pb/README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# rootlesscontainers.proto
2+
3+
This directory contains `rootlesscontainers.proto`, which is used for preserving emulated file owner information as `user.rootlesscontainers` xattr values.
4+
5+
## Source
6+
7+
https://raw.githubusercontent.com/rootless-containers/proto/316d7ae30bc5f448f217dc11623047b0f1589e53/rootlesscontainers.proto
8+
9+
## Compile
10+
11+
```console
12+
$ protoc-c --c_out=. rootlesscontainers.proto
13+
```

0 commit comments

Comments
 (0)