core: refactor bpf firewall support into a pseudo-controller

The idea is to introduce a concept of bpf-based pseudo-controllers
to make adding new bpf-based features easier.
This commit is contained in:
Roman Gushchin 2018-09-30 11:33:16 +01:00
parent b03d6c5f48
commit 17f149556a
7 changed files with 61 additions and 49 deletions

View File

@ -2767,6 +2767,7 @@ static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
[CGROUP_CONTROLLER_MEMORY] = "memory",
[CGROUP_CONTROLLER_DEVICES] = "devices",
[CGROUP_CONTROLLER_PIDS] = "pids",
[CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall",
};
DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);

View File

@ -19,6 +19,7 @@
/* An enum of well known cgroup controllers */
typedef enum CGroupController {
/* Original cgroup controllers */
CGROUP_CONTROLLER_CPU,
CGROUP_CONTROLLER_CPUACCT, /* v1 only */
CGROUP_CONTROLLER_IO, /* v2 only */
@ -26,6 +27,10 @@ typedef enum CGroupController {
CGROUP_CONTROLLER_MEMORY,
CGROUP_CONTROLLER_DEVICES, /* v1 only */
CGROUP_CONTROLLER_PIDS,
/* BPF-based pseudo-controllers, v2 only */
CGROUP_CONTROLLER_BPF_FIREWALL,
_CGROUP_CONTROLLER_MAX,
_CGROUP_CONTROLLER_INVALID = -1,
} CGroupController;
@ -41,6 +46,7 @@ typedef enum CGroupMask {
CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY),
CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES),
CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS),
CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL),
_CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1
} CGroupMask;

View File

@ -770,7 +770,6 @@ static void cgroup_apply_firewall(Unit *u) {
static void cgroup_context_apply(
Unit *u,
CGroupMask apply_mask,
bool apply_bpf,
ManagerState state) {
const char *path;
@ -781,7 +780,7 @@ static void cgroup_context_apply(
assert(u);
/* Nothing to do? Exit early! */
if (apply_mask == 0 && !apply_bpf)
if (apply_mask == 0)
return;
/* Some cgroup attributes are not supported on the root cgroup, hence silently ignore */
@ -1127,7 +1126,7 @@ static void cgroup_context_apply(
}
}
if (apply_bpf)
if (apply_mask & CGROUP_MASK_BPF_FIREWALL)
cgroup_apply_firewall(u);
}
@ -1161,6 +1160,15 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) {
return mask;
}
CGroupMask unit_get_bpf_mask(Unit *u) {
CGroupMask mask = 0;
if (unit_get_needs_bpf_firewall(u))
mask |= CGROUP_MASK_BPF_FIREWALL;
return mask;
}
CGroupMask unit_get_own_mask(Unit *u) {
CGroupContext *c;
@ -1170,7 +1178,7 @@ CGroupMask unit_get_own_mask(Unit *u) {
if (!c)
return 0;
return cgroup_context_get_mask(c) | unit_get_delegate_mask(u);
return cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u);
}
CGroupMask unit_get_delegate_mask(Unit *u) {
@ -1278,7 +1286,7 @@ CGroupMask unit_get_enable_mask(Unit *u) {
return mask;
}
bool unit_get_needs_bpf(Unit *u) {
bool unit_get_needs_bpf_firewall(Unit *u) {
CGroupContext *c;
Unit *p;
assert(u);
@ -1508,8 +1516,7 @@ int unit_pick_cgroup_path(Unit *u) {
static int unit_create_cgroup(
Unit *u,
CGroupMask target_mask,
CGroupMask enable_mask,
bool needs_bpf) {
CGroupMask enable_mask) {
CGroupContext *c;
int r;
@ -1549,7 +1556,6 @@ static int unit_create_cgroup(
u->cgroup_realized = true;
u->cgroup_realized_mask = target_mask;
u->cgroup_enabled_mask = enable_mask;
u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
@ -1725,16 +1731,14 @@ static void cgroup_xattr_apply(Unit *u) {
static bool unit_has_mask_realized(
Unit *u,
CGroupMask target_mask,
CGroupMask enable_mask,
bool needs_bpf) {
CGroupMask enable_mask) {
assert(u);
return u->cgroup_realized &&
u->cgroup_realized_mask == target_mask &&
u->cgroup_enabled_mask == enable_mask &&
((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
(!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
u->cgroup_invalidated_mask == 0;
}
static void unit_add_to_cgroup_realize_queue(Unit *u) {
@ -1765,7 +1769,6 @@ static void unit_remove_from_cgroup_realize_queue(Unit *u) {
* Returns 0 on success and < 0 on failure. */
static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
CGroupMask target_mask, enable_mask;
bool needs_bpf, apply_bpf;
int r;
assert(u);
@ -1774,16 +1777,10 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
target_mask = unit_get_target_mask(u);
enable_mask = unit_get_enable_mask(u);
needs_bpf = unit_get_needs_bpf(u);
if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
if (unit_has_mask_realized(u, target_mask, enable_mask))
return 0;
/* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
* the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
* this will trickle down properly to cgroupfs. */
apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
/* First, realize parents */
if (UNIT_ISSET(u->slice)) {
r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
@ -1792,12 +1789,12 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
}
/* And then do the real work */
r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
r = unit_create_cgroup(u, target_mask, enable_mask);
if (r < 0)
return r;
/* Finally, apply the necessary attributes. */
cgroup_context_apply(u, target_mask, apply_bpf, state);
cgroup_context_apply(u, target_mask, state);
cgroup_xattr_apply(u);
return 0;
@ -1863,8 +1860,7 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) {
* any changes. */
if (unit_has_mask_realized(m,
unit_get_target_mask(m),
unit_get_enable_mask(m),
unit_get_needs_bpf(m)))
unit_get_enable_mask(m)))
continue;
unit_add_to_cgroup_realize_queue(m);
@ -2207,11 +2203,25 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
}
}
static int cg_bpf_mask_supported(CGroupMask *ret) {
CGroupMask mask = 0;
int r;
/* BPF-based firewall */
r = bpf_firewall_supported();
if (r > 0)
mask |= CGROUP_MASK_BPF_FIREWALL;
*ret = mask;
return 0;
}
int manager_setup_cgroup(Manager *m) {
_cleanup_free_ char *path = NULL;
const char *scope_path;
CGroupController c;
int r, all_unified;
CGroupMask mask;
char *e;
assert(m);
@ -2341,10 +2351,18 @@ int manager_setup_cgroup(Manager *m) {
if (!all_unified && m->test_run_flags == 0)
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
/* 8. Figure out which controllers are supported, and log about it */
/* 8. Figure out which controllers are supported */
r = cg_mask_supported(&m->cgroup_supported);
if (r < 0)
return log_error_errno(r, "Failed to determine supported controllers: %m");
/* 9. Figure out which bpf-based pseudo-controllers are supported */
r = cg_bpf_mask_supported(&mask);
if (r < 0)
return log_error_errno(r, "Failed to determine supported bpf-based pseudo-controllers: %m");
m->cgroup_supported |= mask;
/* 10. Log which controllers are supported */
for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++)
log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c)));
@ -2718,10 +2736,10 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
if (!UNIT_HAS_CGROUP_CONTEXT(u))
return;
if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED) /* NOP? */
if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */
return;
u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
unit_add_to_cgroup_realize_queue(u);
/* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access

View File

@ -157,7 +157,8 @@ CGroupMask unit_get_subtree_mask(Unit *u);
CGroupMask unit_get_target_mask(Unit *u);
CGroupMask unit_get_enable_mask(Unit *u);
bool unit_get_needs_bpf(Unit *u);
bool unit_get_needs_bpf_firewall(Unit *u);
CGroupMask unit_get_bpf_mask(Unit *u);
void unit_update_cgroup_members_masks(Unit *u);

View File

@ -93,7 +93,7 @@ Unit *unit_new(Manager *m, size_t size) {
u->ref_uid = UID_INVALID;
u->ref_gid = GID_INVALID;
u->cpu_usage_last = NSEC_INFINITY;
u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL;
u->ip_accounting_ingress_map_fd = -1;
u->ip_accounting_egress_map_fd = -1;
@ -3253,7 +3253,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
(void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
(void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);
(void) unit_serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask);
if (uid_is_valid(u->ref_uid))
unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
@ -3568,18 +3568,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
continue;
} else if (streq(l, "cgroup-bpf-realized")) {
int i;
} else if (streq(l, "cgroup-invalidated-mask")) {
r = safe_atoi(v, &i);
r = cg_mask_from_string(v, &u->cgroup_invalidated_mask);
if (r < 0)
log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
else
u->cgroup_bpf_state =
i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
i > 0 ? UNIT_CGROUP_BPF_ON :
UNIT_CGROUP_BPF_OFF;
log_unit_debug(u, "Failed to parse cgroup-invalidated-mask %s, ignoring.", v);
continue;
} else if (streq(l, "ref-uid")) {

View File

@ -105,12 +105,6 @@ struct UnitRef {
LIST_FIELDS(UnitRef, refs_by_target);
};
typedef enum UnitCGroupBPFState {
UNIT_CGROUP_BPF_OFF = 0,
UNIT_CGROUP_BPF_ON = 1,
UNIT_CGROUP_BPF_INVALIDATED = -1,
} UnitCGroupBPFState;
typedef struct Unit {
Manager *manager;
@ -258,6 +252,7 @@ typedef struct Unit {
char *cgroup_path;
CGroupMask cgroup_realized_mask;
CGroupMask cgroup_enabled_mask;
CGroupMask cgroup_invalidated_mask;
CGroupMask cgroup_subtree_mask;
CGroupMask cgroup_members_mask;
int cgroup_inotify_wd;
@ -336,8 +331,6 @@ typedef struct Unit {
bool cgroup_members_mask_valid:1;
bool cgroup_subtree_mask_valid:1;
UnitCGroupBPFState cgroup_bpf_state:2;
/* Reset cgroup accounting next time we fork something off */
bool reset_accounting:1;

View File

@ -100,7 +100,7 @@ static void test_cg_mask_to_string_one(CGroupMask mask, const char *t) {
static void test_cg_mask_to_string(void) {
test_cg_mask_to_string_one(0, NULL);
test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids");
test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids bpf-firewall");
test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu");
test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct");
test_cg_mask_to_string_one(CGROUP_MASK_IO, "io");