![Zbigniew Jędrzejewski-Szmek](/assets/img/avatar_default.png)
The naming of the functions was a complete mess: the most specific functions which don't know anything about cgroups had "cgroup_" prefix, while more general functions which took a node path and a cgroup for reporting had no prefix. Let's use "bpf_devices_" for the latter group, and "bpf_prog_*" for the rest. The main goal of this move is to split the implementation from the calling code and add unit tests in a later patch.
420 lines
15 KiB
C
420 lines
15 KiB
C
/* SPDX-License-Identifier: LGPL-2.1+ */
|
|
|
|
#include <fnmatch.h>
|
|
#include <linux/bpf_insn.h>
|
|
|
|
#include "bpf-devices.h"
|
|
#include "bpf-program.h"
|
|
#include "fd-util.h"
|
|
#include "fileio.h"
|
|
#include "parse-util.h"
|
|
#include "stat-util.h"
|
|
#include "stdio-util.h"
|
|
#include "string-util.h"
|
|
|
|
#define PASS_JUMP_OFF 4096
|
|
|
|
static int bpf_access_type(const char *acc) {
|
|
int r = 0;
|
|
|
|
assert(acc);
|
|
|
|
for (; *acc; acc++)
|
|
switch(*acc) {
|
|
case 'r':
|
|
r |= BPF_DEVCG_ACC_READ;
|
|
break;
|
|
case 'w':
|
|
r |= BPF_DEVCG_ACC_WRITE;
|
|
break;
|
|
case 'm':
|
|
r |= BPF_DEVCG_ACC_MKNOD;
|
|
break;
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int bpf_prog_whitelist_device(BPFProgram *prog, int type, int major, int minor, const char *acc) {
|
|
int r, access;
|
|
|
|
assert(prog);
|
|
assert(acc);
|
|
|
|
access = bpf_access_type(acc);
|
|
if (access <= 0)
|
|
return -EINVAL;
|
|
|
|
const struct bpf_insn insn[] = {
|
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 6), /* compare device type */
|
|
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
|
|
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
|
|
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
|
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2), /* compare major */
|
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1), /* compare minor */
|
|
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
|
|
};
|
|
|
|
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
|
|
if (r < 0)
|
|
log_error_errno(r, "Extending device control BPF program failed: %m");
|
|
|
|
return r;
|
|
}
|
|
|
|
static int bpf_prog_whitelist_major(BPFProgram *prog, int type, int major, const char *acc) {
|
|
int r, access;
|
|
|
|
assert(prog);
|
|
assert(acc);
|
|
|
|
access = bpf_access_type(acc);
|
|
if (access <= 0)
|
|
return -EINVAL;
|
|
|
|
const struct bpf_insn insn[] = {
|
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
|
|
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
|
|
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
|
|
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
|
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1), /* compare major */
|
|
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
|
|
};
|
|
|
|
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
|
|
if (r < 0)
|
|
log_error_errno(r, "Extending device control BPF program failed: %m");
|
|
|
|
return r;
|
|
}
|
|
|
|
static int bpf_prog_whitelist_class(BPFProgram *prog, int type, const char *acc) {
|
|
int r, access;
|
|
|
|
assert(prog);
|
|
assert(acc);
|
|
|
|
access = bpf_access_type(acc);
|
|
if (access <= 0)
|
|
return -EINVAL;
|
|
|
|
const struct bpf_insn insn[] = {
|
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_2, type, 5), /* compare device type */
|
|
BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), /* calculate access type */
|
|
BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
|
|
BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 1), /* compare access type */
|
|
BPF_JMP_A(PASS_JUMP_OFF), /* jump to PASS */
|
|
};
|
|
|
|
r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
|
|
if (r < 0)
|
|
log_error_errno(r, "Extending device control BPF program failed: %m");
|
|
|
|
return r;
|
|
}
|
|
|
|
int bpf_devices_cgroup_init(BPFProgram **ret, CGroupDevicePolicy policy, bool whitelist) {
|
|
const struct bpf_insn pre_insn[] = {
|
|
/* load device type to r2 */
|
|
BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1,
|
|
offsetof(struct bpf_cgroup_dev_ctx, access_type)),
|
|
|
|
/* load access type to r3 */
|
|
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
|
|
offsetof(struct bpf_cgroup_dev_ctx, access_type)),
|
|
BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
|
|
|
|
/* load major number to r4 */
|
|
BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
|
|
offsetof(struct bpf_cgroup_dev_ctx, major)),
|
|
|
|
/* load minor number to r5 */
|
|
BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
|
|
offsetof(struct bpf_cgroup_dev_ctx, minor)),
|
|
};
|
|
|
|
_cleanup_(bpf_program_unrefp) BPFProgram *prog = NULL;
|
|
int r;
|
|
|
|
assert(ret);
|
|
|
|
if (policy == CGROUP_DEVICE_POLICY_AUTO && !whitelist)
|
|
return 0;
|
|
|
|
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &prog);
|
|
if (r < 0)
|
|
return log_error_errno(r, "Loading device control BPF program failed: %m");
|
|
|
|
if (policy == CGROUP_DEVICE_POLICY_CLOSED || whitelist) {
|
|
r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
|
|
if (r < 0)
|
|
return log_error_errno(r, "Extending device control BPF program failed: %m");
|
|
}
|
|
|
|
*ret = TAKE_PTR(prog);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int bpf_devices_apply_policy(Unit *u, BPFProgram *prog, CGroupDevicePolicy policy, bool whitelist) {
|
|
_cleanup_free_ char *path = NULL;
|
|
int r;
|
|
|
|
if (!prog) {
|
|
/* Remove existing program. */
|
|
u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
|
|
return 0;
|
|
}
|
|
|
|
const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !whitelist;
|
|
|
|
const struct bpf_insn post_insn[] = {
|
|
/* return DENY */
|
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
|
BPF_JMP_A(1),
|
|
};
|
|
|
|
const struct bpf_insn exit_insn[] = {
|
|
/* finally return DENY if deny_everything else ALLOW */
|
|
BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1),
|
|
BPF_EXIT_INSN()
|
|
};
|
|
|
|
if (!deny_everything) {
|
|
r = bpf_program_add_instructions(prog, post_insn, ELEMENTSOF(post_insn));
|
|
if (r < 0)
|
|
return log_error_errno(r, "Extending device control BPF program failed: %m");
|
|
|
|
/* Fixup PASS_JUMP_OFF jump offsets. */
|
|
for (size_t off = 0; off < prog->n_instructions; off++) {
|
|
struct bpf_insn *ins = &prog->instructions[off];
|
|
|
|
if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
|
|
ins->off = prog->n_instructions - off - 1;
|
|
}
|
|
}
|
|
|
|
r = bpf_program_add_instructions(prog, exit_insn, ELEMENTSOF(exit_insn));
|
|
if (r < 0)
|
|
return log_error_errno(r, "Extending device control BPF program failed: %m");
|
|
|
|
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
|
|
if (r < 0)
|
|
return log_error_errno(r, "Failed to determine cgroup path: %m");
|
|
|
|
r = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE, path, BPF_F_ALLOW_MULTI);
|
|
if (r < 0)
|
|
return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m", path);
|
|
|
|
/* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
|
|
u->bpf_device_control_installed = bpf_program_unref(u->bpf_device_control_installed);
|
|
|
|
/* Remember that this BPF program is installed now. */
|
|
u->bpf_device_control_installed = bpf_program_ref(prog);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int bpf_devices_supported(void) {
|
|
const struct bpf_insn trivial[] = {
|
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
|
BPF_EXIT_INSN()
|
|
};
|
|
|
|
_cleanup_(bpf_program_unrefp) BPFProgram *program = NULL;
|
|
static int supported = -1;
|
|
int r;
|
|
|
|
/* Checks whether BPF device controller is supported. For this, we check five things:
|
|
*
|
|
* a) whether we are privileged
|
|
* b) whether the unified hierarchy is being used
|
|
* c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
|
|
*/
|
|
|
|
if (supported >= 0)
|
|
return supported;
|
|
|
|
if (geteuid() != 0) {
|
|
log_debug("Not enough privileges, BPF device control is not supported.");
|
|
return supported = 0;
|
|
}
|
|
|
|
r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
|
|
if (r < 0)
|
|
return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
|
|
if (r == 0) {
|
|
log_debug("Not running with unified cgroups, BPF device control is not supported.");
|
|
return supported = 0;
|
|
}
|
|
|
|
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, &program);
|
|
if (r < 0) {
|
|
log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
|
|
return supported = 0;
|
|
}
|
|
|
|
r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
|
|
if (r < 0) {
|
|
log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
|
|
return supported = 0;
|
|
}
|
|
|
|
r = bpf_program_load_kernel(program, NULL, 0);
|
|
if (r < 0) {
|
|
log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
|
|
return supported = 0;
|
|
}
|
|
|
|
return supported = 1;
|
|
}
|
|
|
|
static int whitelist_device_pattern(BPFProgram *prog, const char *path, char type, const unsigned *maj, const unsigned *min, const char *acc) {
|
|
assert(IN_SET(type, 'b', 'c'));
|
|
|
|
if (cg_all_unified() > 0) {
|
|
if (!prog)
|
|
return 0;
|
|
|
|
const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
|
|
if (maj && min)
|
|
return bpf_prog_whitelist_device(prog, bpf_type, *maj, *min, acc);
|
|
else if (maj)
|
|
return bpf_prog_whitelist_major(prog, bpf_type, *maj, acc);
|
|
else
|
|
return bpf_prog_whitelist_class(prog, bpf_type, acc);
|
|
|
|
} else {
|
|
char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
|
|
int r;
|
|
|
|
if (maj && min)
|
|
xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
|
|
else if (maj)
|
|
xsprintf(buf, "%c %u:* %s", type, *maj, acc);
|
|
else
|
|
xsprintf(buf, "%c *:* %s", type, acc);
|
|
|
|
/* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
|
|
* EINVAL here. */
|
|
|
|
r = cg_set_attribute("devices", path, "devices.allow", buf);
|
|
if (r < 0)
|
|
log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
|
|
r, "Failed to set devices.allow on %s: %m", path);
|
|
|
|
return r;
|
|
}
|
|
}
|
|
|
|
int bpf_devices_whitelist_device(BPFProgram *prog, const char *path, const char *node, const char *acc) {
|
|
mode_t mode;
|
|
dev_t rdev;
|
|
int r;
|
|
|
|
assert(path);
|
|
assert(acc);
|
|
assert(strlen(acc) <= 3);
|
|
|
|
/* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
|
|
* /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
|
|
* means clients can use these path without the device node actually around */
|
|
r = device_path_parse_major_minor(node, &mode, &rdev);
|
|
if (r < 0) {
|
|
if (r != -ENODEV)
|
|
return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
|
|
|
|
struct stat st;
|
|
if (stat(node, &st) < 0)
|
|
return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
|
|
|
|
if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
|
|
return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
|
|
|
|
mode = st.st_mode;
|
|
rdev = (dev_t) st.st_rdev;
|
|
}
|
|
|
|
unsigned maj = major(rdev), min = minor(rdev);
|
|
return whitelist_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
|
|
}
|
|
|
|
int bpf_devices_whitelist_major(BPFProgram *prog, const char *path, const char *name, char type, const char *acc) {
|
|
unsigned maj;
|
|
int r;
|
|
|
|
assert(path);
|
|
assert(acc);
|
|
assert(IN_SET(type, 'b', 'c'));
|
|
|
|
if (streq(name, "*"))
|
|
/* If the name is a wildcard, then apply this list to all devices of this type */
|
|
return whitelist_device_pattern(prog, path, type, NULL, NULL, acc);
|
|
|
|
if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
|
|
/* The name is numeric and suitable as major. In that case, let's take its major, and create
|
|
* the entry directly. */
|
|
return whitelist_device_pattern(prog, path, type, &maj, NULL, acc);
|
|
|
|
_cleanup_fclose_ FILE *f = NULL;
|
|
bool good = false;
|
|
|
|
f = fopen("/proc/devices", "re");
|
|
if (!f)
|
|
return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
|
|
|
|
for (;;) {
|
|
_cleanup_free_ char *line = NULL;
|
|
char *w, *p;
|
|
|
|
r = read_line(f, LONG_LINE_MAX, &line);
|
|
if (r < 0)
|
|
return log_warning_errno(r, "Failed to read /proc/devices: %m");
|
|
if (r == 0)
|
|
break;
|
|
|
|
if (type == 'c' && streq(line, "Character devices:")) {
|
|
good = true;
|
|
continue;
|
|
}
|
|
|
|
if (type == 'b' && streq(line, "Block devices:")) {
|
|
good = true;
|
|
continue;
|
|
}
|
|
|
|
if (isempty(line)) {
|
|
good = false;
|
|
continue;
|
|
}
|
|
|
|
if (!good)
|
|
continue;
|
|
|
|
p = strstrip(line);
|
|
|
|
w = strpbrk(p, WHITESPACE);
|
|
if (!w)
|
|
continue;
|
|
*w = 0;
|
|
|
|
r = safe_atou(p, &maj);
|
|
if (r < 0)
|
|
continue;
|
|
if (maj <= 0)
|
|
continue;
|
|
|
|
w++;
|
|
w += strspn(w, WHITESPACE);
|
|
|
|
if (fnmatch(name, w, 0) != 0)
|
|
continue;
|
|
|
|
(void) whitelist_device_pattern(prog, path, type, &maj, NULL, acc);
|
|
}
|
|
|
|
return 0;
|
|
}
|