diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index daa15dbfcb..6c38e59031 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2767,6 +2767,7 @@ static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { [CGROUP_CONTROLLER_MEMORY] = "memory", [CGROUP_CONTROLLER_DEVICES] = "devices", [CGROUP_CONTROLLER_PIDS] = "pids", + [CGROUP_CONTROLLER_BPF_FIREWALL] = "bpf-firewall", }; DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController); diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 1a28a8163a..23602b6da9 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -19,6 +19,7 @@ /* An enum of well known cgroup controllers */ typedef enum CGroupController { + /* Original cgroup controllers */ CGROUP_CONTROLLER_CPU, CGROUP_CONTROLLER_CPUACCT, /* v1 only */ CGROUP_CONTROLLER_IO, /* v2 only */ @@ -26,6 +27,10 @@ typedef enum CGroupController { CGROUP_CONTROLLER_MEMORY, CGROUP_CONTROLLER_DEVICES, /* v1 only */ CGROUP_CONTROLLER_PIDS, + + /* BPF-based pseudo-controllers, v2 only */ + CGROUP_CONTROLLER_BPF_FIREWALL, + _CGROUP_CONTROLLER_MAX, _CGROUP_CONTROLLER_INVALID = -1, } CGroupController; @@ -41,6 +46,7 @@ typedef enum CGroupMask { CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY), CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES), CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS), + CGROUP_MASK_BPF_FIREWALL = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_BPF_FIREWALL), _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1 } CGroupMask; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 7e07b2f13c..c390e69681 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -770,7 +770,6 @@ static void cgroup_apply_firewall(Unit *u) { static void cgroup_context_apply( Unit *u, CGroupMask apply_mask, - bool apply_bpf, ManagerState state) { const char *path; @@ -781,7 +780,7 @@ static void cgroup_context_apply( assert(u); /* Nothing to do? Exit early! */ - if (apply_mask == 0 && !apply_bpf) + if (apply_mask == 0) return; /* Some cgroup attributes are not supported on the root cgroup, hence silently ignore */ @@ -1127,7 +1126,7 @@ static void cgroup_context_apply( } } - if (apply_bpf) + if (apply_mask & CGROUP_MASK_BPF_FIREWALL) cgroup_apply_firewall(u); } @@ -1161,6 +1160,15 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { return mask; } +CGroupMask unit_get_bpf_mask(Unit *u) { + CGroupMask mask = 0; + + if (unit_get_needs_bpf_firewall(u)) + mask |= CGROUP_MASK_BPF_FIREWALL; + + return mask; +} + CGroupMask unit_get_own_mask(Unit *u) { CGroupContext *c; @@ -1170,7 +1178,7 @@ CGroupMask unit_get_own_mask(Unit *u) { if (!c) return 0; - return cgroup_context_get_mask(c) | unit_get_delegate_mask(u); + return cgroup_context_get_mask(c) | unit_get_bpf_mask(u) | unit_get_delegate_mask(u); } CGroupMask unit_get_delegate_mask(Unit *u) { @@ -1278,7 +1286,7 @@ CGroupMask unit_get_enable_mask(Unit *u) { return mask; } -bool unit_get_needs_bpf(Unit *u) { +bool unit_get_needs_bpf_firewall(Unit *u) { CGroupContext *c; Unit *p; assert(u); @@ -1508,8 +1516,7 @@ int unit_pick_cgroup_path(Unit *u) { static int unit_create_cgroup( Unit *u, CGroupMask target_mask, - CGroupMask enable_mask, - bool needs_bpf) { + CGroupMask enable_mask) { CGroupContext *c; int r; @@ -1549,7 +1556,6 @@ static int unit_create_cgroup( u->cgroup_realized = true; u->cgroup_realized_mask = target_mask; u->cgroup_enabled_mask = enable_mask; - u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF; if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) { @@ -1725,16 +1731,14 @@ static void cgroup_xattr_apply(Unit *u) { static bool unit_has_mask_realized( Unit *u, CGroupMask target_mask, - CGroupMask enable_mask, - bool needs_bpf) { + CGroupMask enable_mask) { assert(u); return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask && - ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) || - (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF)); + u->cgroup_invalidated_mask == 0; } static void unit_add_to_cgroup_realize_queue(Unit *u) { @@ -1765,7 +1769,6 @@ static void unit_remove_from_cgroup_realize_queue(Unit *u) { * Returns 0 on success and < 0 on failure. */ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { CGroupMask target_mask, enable_mask; - bool needs_bpf, apply_bpf; int r; assert(u); @@ -1774,16 +1777,10 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { target_mask = unit_get_target_mask(u); enable_mask = unit_get_enable_mask(u); - needs_bpf = unit_get_needs_bpf(u); - if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf)) + if (unit_has_mask_realized(u, target_mask, enable_mask)) return 0; - /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously - * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it - * this will trickle down properly to cgroupfs. */ - apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF; - /* First, realize parents */ if (UNIT_ISSET(u->slice)) { r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state); @@ -1792,12 +1789,12 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { } /* And then do the real work */ - r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf); + r = unit_create_cgroup(u, target_mask, enable_mask); if (r < 0) return r; /* Finally, apply the necessary attributes. */ - cgroup_context_apply(u, target_mask, apply_bpf, state); + cgroup_context_apply(u, target_mask, state); cgroup_xattr_apply(u); return 0; @@ -1863,8 +1860,7 @@ static void unit_add_siblings_to_cgroup_realize_queue(Unit *u) { * any changes. */ if (unit_has_mask_realized(m, unit_get_target_mask(m), - unit_get_enable_mask(m), - unit_get_needs_bpf(m))) + unit_get_enable_mask(m))) continue; unit_add_to_cgroup_realize_queue(m); @@ -2207,11 +2203,25 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, } } +static int cg_bpf_mask_supported(CGroupMask *ret) { + CGroupMask mask = 0; + int r; + + /* BPF-based firewall */ + r = bpf_firewall_supported(); + if (r > 0) + mask |= CGROUP_MASK_BPF_FIREWALL; + + *ret = mask; + return 0; +} + int manager_setup_cgroup(Manager *m) { _cleanup_free_ char *path = NULL; const char *scope_path; CGroupController c; int r, all_unified; + CGroupMask mask; char *e; assert(m); @@ -2341,10 +2351,18 @@ int manager_setup_cgroup(Manager *m) { if (!all_unified && m->test_run_flags == 0) (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); - /* 8. Figure out which controllers are supported, and log about it */ + /* 8. Figure out which controllers are supported */ r = cg_mask_supported(&m->cgroup_supported); if (r < 0) return log_error_errno(r, "Failed to determine supported controllers: %m"); + + /* 9. Figure out which bpf-based pseudo-controllers are supported */ + r = cg_bpf_mask_supported(&mask); + if (r < 0) + return log_error_errno(r, "Failed to determine supported bpf-based pseudo-controllers: %m"); + m->cgroup_supported |= mask; + + /* 10. Log which controllers are supported */ for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) log_debug("Controller '%s' supported: %s", cgroup_controller_to_string(c), yes_no(m->cgroup_supported & CGROUP_CONTROLLER_TO_MASK(c))); @@ -2718,10 +2736,10 @@ void unit_invalidate_cgroup_bpf(Unit *u) { if (!UNIT_HAS_CGROUP_CONTEXT(u)) return; - if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED) /* NOP? */ + if (u->cgroup_invalidated_mask & CGROUP_MASK_BPF_FIREWALL) /* NOP? */ return; - u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED; + u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL; unit_add_to_cgroup_realize_queue(u); /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 5517bd79fb..d7daed3fe0 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -157,7 +157,8 @@ CGroupMask unit_get_subtree_mask(Unit *u); CGroupMask unit_get_target_mask(Unit *u); CGroupMask unit_get_enable_mask(Unit *u); -bool unit_get_needs_bpf(Unit *u); +bool unit_get_needs_bpf_firewall(Unit *u); +CGroupMask unit_get_bpf_mask(Unit *u); void unit_update_cgroup_members_masks(Unit *u); diff --git a/src/core/unit.c b/src/core/unit.c index 853db527f7..663df3e3c5 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -93,7 +93,7 @@ Unit *unit_new(Manager *m, size_t size) { u->ref_uid = UID_INVALID; u->ref_gid = GID_INVALID; u->cpu_usage_last = NSEC_INFINITY; - u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED; + u->cgroup_invalidated_mask |= CGROUP_MASK_BPF_FIREWALL; u->ip_accounting_ingress_map_fd = -1; u->ip_accounting_egress_map_fd = -1; @@ -3253,7 +3253,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized)); (void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask); (void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask); - unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state); + (void) unit_serialize_cgroup_mask(f, "cgroup-invalidated-mask", u->cgroup_invalidated_mask); if (uid_is_valid(u->ref_uid)) unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid); @@ -3568,18 +3568,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v); continue; - } else if (streq(l, "cgroup-bpf-realized")) { - int i; + } else if (streq(l, "cgroup-invalidated-mask")) { - r = safe_atoi(v, &i); + r = cg_mask_from_string(v, &u->cgroup_invalidated_mask); if (r < 0) - log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v); - else - u->cgroup_bpf_state = - i < 0 ? UNIT_CGROUP_BPF_INVALIDATED : - i > 0 ? UNIT_CGROUP_BPF_ON : - UNIT_CGROUP_BPF_OFF; - + log_unit_debug(u, "Failed to parse cgroup-invalidated-mask %s, ignoring.", v); continue; } else if (streq(l, "ref-uid")) { diff --git a/src/core/unit.h b/src/core/unit.h index 2c2819f38e..5a97d9f27c 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -105,12 +105,6 @@ struct UnitRef { LIST_FIELDS(UnitRef, refs_by_target); }; -typedef enum UnitCGroupBPFState { - UNIT_CGROUP_BPF_OFF = 0, - UNIT_CGROUP_BPF_ON = 1, - UNIT_CGROUP_BPF_INVALIDATED = -1, -} UnitCGroupBPFState; - typedef struct Unit { Manager *manager; @@ -258,6 +252,7 @@ typedef struct Unit { char *cgroup_path; CGroupMask cgroup_realized_mask; CGroupMask cgroup_enabled_mask; + CGroupMask cgroup_invalidated_mask; CGroupMask cgroup_subtree_mask; CGroupMask cgroup_members_mask; int cgroup_inotify_wd; @@ -336,8 +331,6 @@ typedef struct Unit { bool cgroup_members_mask_valid:1; bool cgroup_subtree_mask_valid:1; - UnitCGroupBPFState cgroup_bpf_state:2; - /* Reset cgroup accounting next time we fork something off */ bool reset_accounting:1; diff --git a/src/test/test-cgroup-mask.c b/src/test/test-cgroup-mask.c index bab27edf54..c4df325365 100644 --- a/src/test/test-cgroup-mask.c +++ b/src/test/test-cgroup-mask.c @@ -100,7 +100,7 @@ static void test_cg_mask_to_string_one(CGroupMask mask, const char *t) { static void test_cg_mask_to_string(void) { test_cg_mask_to_string_one(0, NULL); - test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids"); + test_cg_mask_to_string_one(_CGROUP_MASK_ALL, "cpu cpuacct io blkio memory devices pids bpf-firewall"); test_cg_mask_to_string_one(CGROUP_MASK_CPU, "cpu"); test_cg_mask_to_string_one(CGROUP_MASK_CPUACCT, "cpuacct"); test_cg_mask_to_string_one(CGROUP_MASK_IO, "io");