From 32ee7d3309816994a02b3ff000e9734120d71214 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Thu, 15 Jan 2015 20:08:42 +0100 Subject: [PATCH] cgroup: add support for net_cls controllers Add a new config directive called NetClass= to CGroup enabled units. Allowed values are positive numbers for fix assignments and "auto" for picking a free value automatically, for which we need to keep track of dynamically assigned net class IDs of units. Introduce a hash table for this, and also record the last ID that was given out, so the allocator can start its search for the next 'hole' from there. This could eventually be optimized with something like an irb. The class IDs up to 65536 are considered reserved and won't be assigned automatically by systemd. This barrier can be made a config directive in the future. Values set in unit files are stored in the CGroupContext of the unit and considered read-only. The actually assigned number (which may have been chosen dynamically) is stored in the unit itself and is guaranteed to remain stable as long as the unit is active. In the CGroup controller, set the configured CGroup net class to net_cls.classid. Multiple unit may share the same net class ID, and those which do are linked together. --- man/systemd.unit.xml | 14 +++ src/basic/cgroup-util.c | 1 + src/basic/cgroup-util.h | 2 + src/core/cgroup.c | 117 +++++++++++++++++++++++++- src/core/cgroup.h | 24 +++++- src/core/dbus-unit.c | 1 + src/core/load-fragment-gperf.gperf.m4 | 3 +- src/core/load-fragment.c | 42 +++++++++ src/core/load-fragment.h | 1 + src/core/manager.c | 4 + src/core/manager.h | 4 + src/core/unit.c | 27 ++++++ src/core/unit.h | 5 ++ 13 files changed, 241 insertions(+), 4 deletions(-) diff --git a/man/systemd.unit.xml b/man/systemd.unit.xml index ea58580bba..015deab4bb 100644 --- a/man/systemd.unit.xml +++ b/man/systemd.unit.xml @@ -1044,6 +1044,20 @@ files. This functionality should not be used in normal units. + + + NetClass= + Configures a network class number to assign to the + unit. This value will be set to the + net_cls.class_id property of the + net_cls cgroup of the unit. The directive + accepts a numerical value (for fixed number assignment) and the keyword + auto (for dynamic allocation). Network traffic of + all processes inside the unit will have the network class ID assigned + by the kernel. Also see + systemd.resource-control5 + . + diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 4bf08cfe03..95fc2b9e5d 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -2254,6 +2254,7 @@ static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = { [CGROUP_CONTROLLER_MEMORY] = "memory", [CGROUP_CONTROLLER_DEVICES] = "devices", [CGROUP_CONTROLLER_PIDS] = "pids", + [CGROUP_CONTROLLER_NET_CLS] = "net_cls", }; DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController); diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 33ca28cab7..01359fa7cb 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -36,6 +36,7 @@ typedef enum CGroupController { CGROUP_CONTROLLER_MEMORY, CGROUP_CONTROLLER_DEVICES, CGROUP_CONTROLLER_PIDS, + CGROUP_CONTROLLER_NET_CLS, _CGROUP_CONTROLLER_MAX, _CGROUP_CONTROLLER_INVALID = -1, } CGroupController; @@ -50,6 +51,7 @@ typedef enum CGroupMask { CGROUP_MASK_MEMORY = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_MEMORY), CGROUP_MASK_DEVICES = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_DEVICES), CGROUP_MASK_PIDS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_PIDS), + CGROUP_MASK_NET_CLS = CGROUP_CONTROLLER_TO_MASK(CGROUP_CONTROLLER_NET_CLS), _CGROUP_MASK_ALL = CGROUP_CONTROLLER_TO_MASK(_CGROUP_CONTROLLER_MAX) - 1 } CGroupMask; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 7d2a2ba67a..0c790c33da 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -47,6 +47,8 @@ void cgroup_context_init(CGroupContext *c) { c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID; c->tasks_max = (uint64_t) -1; + + c->netclass_type = CGROUP_NETCLASS_TYPE_NONE; } void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) { @@ -291,7 +293,7 @@ fail: return -errno; } -void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state) { +void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, uint32_t netclass, ManagerState state) { bool is_root; int r; @@ -489,6 +491,17 @@ void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, M log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, "Failed to set pids.max on %s: %m", path); } + + if (mask & CGROUP_MASK_NET_CLS) { + char buf[DECIMAL_STR_MAX(uint32_t)]; + + sprintf(buf, "%" PRIu32, netclass); + + r = cg_set_attribute("net_cls", path, "net_cls.classid", buf); + if (r < 0) + log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set net_cls.classid on %s: %m", path); + } } CGroupMask cgroup_context_get_mask(CGroupContext *c) { @@ -521,6 +534,9 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c) { c->tasks_max != (uint64_t) -1) mask |= CGROUP_MASK_PIDS; + if (c->netclass_type != CGROUP_NETCLASS_TYPE_NONE) + mask |= CGROUP_MASK_NET_CLS; + return mask; } @@ -888,6 +904,103 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask) { return u->cgroup_realized && u->cgroup_realized_mask == target_mask; } +static int unit_find_free_netclass_cgroup(Unit *u, uint32_t *ret) { + + uint32_t start, i; + Manager *m; + + assert(u); + + m = u->manager; + + i = start = m->cgroup_netclass_registry_last; + + do { + i++; + + if (!hashmap_get(m->cgroup_netclass_registry, UINT_TO_PTR(i))) { + m->cgroup_netclass_registry_last = i; + *ret = i; + return 0; + } + + if (i == UINT32_MAX) + i = CGROUP_NETCLASS_FIXED_MAX; + + } while (i != start); + + return -ENOBUFS; +} + +int unit_add_to_netclass_cgroup(Unit *u) { + + CGroupContext *cc; + Unit *first; + void *key; + int r; + + assert(u); + + cc = unit_get_cgroup_context(u); + if (!cc) + return 0; + + switch (cc->netclass_type) { + case CGROUP_NETCLASS_TYPE_NONE: + return 0; + + case CGROUP_NETCLASS_TYPE_FIXED: + u->cgroup_netclass_id = cc->netclass_id; + break; + + case CGROUP_NETCLASS_TYPE_AUTO: + /* Allocate a new ID in case it was requested and not done yet */ + if (u->cgroup_netclass_id == 0) { + r = unit_find_free_netclass_cgroup(u, &u->cgroup_netclass_id); + if (r < 0) + return r; + + log_debug("Dynamically assigned netclass cgroup id %" PRIu32 " to %s", u->cgroup_netclass_id, u->id); + } + + break; + } + + r = hashmap_ensure_allocated(&u->manager->cgroup_netclass_registry, &trivial_hash_ops); + if (r < 0) + return r; + + key = UINT32_TO_PTR(u->cgroup_netclass_id); + first = hashmap_get(u->manager->cgroup_netclass_registry, key); + + if (first) { + LIST_PREPEND(cgroup_netclass, first, u); + return hashmap_replace(u->manager->cgroup_netclass_registry, key, u); + } + + return hashmap_put(u->manager->cgroup_netclass_registry, key, u); +} + +int unit_remove_from_netclass_cgroup(Unit *u) { + + Unit *head; + void *key; + + assert(u); + + key = UINT32_TO_PTR(u->cgroup_netclass_id); + + LIST_FIND_HEAD(cgroup_netclass, u, head); + LIST_REMOVE(cgroup_netclass, head, u); + + if (head) + return hashmap_replace(u->manager->cgroup_netclass_registry, key, head); + + hashmap_remove(u->manager->cgroup_netclass_registry, key); + + return 0; +} + /* Check if necessary controllers and attributes for a unit are in place. * * If so, do nothing. @@ -923,7 +1036,7 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) { return r; /* Finally, apply the necessary attributes. */ - cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, state); + cgroup_context_apply(unit_get_cgroup_context(u), target_mask, u->cgroup_path, u->cgroup_netclass_id, state); return 0; } diff --git a/src/core/cgroup.h b/src/core/cgroup.h index e897b31451..457544b49f 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -26,6 +26,11 @@ #include "list.h" #include "time-util.h" +/* Maximum value for fixed (manual) net class ID assignment, + * and also the value at which the range of automatic assignments starts + */ +#define CGROUP_NETCLASS_FIXED_MAX UINT32_C(65535) + typedef struct CGroupContext CGroupContext; typedef struct CGroupDeviceAllow CGroupDeviceAllow; typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight; @@ -47,6 +52,17 @@ typedef enum CGroupDevicePolicy { _CGROUP_DEVICE_POLICY_INVALID = -1 } CGroupDevicePolicy; +typedef enum CGroupNetClassType { + /* Default - do not assign a net class */ + CGROUP_NETCLASS_TYPE_NONE, + + /* Automatically assign a net class */ + CGROUP_NETCLASS_TYPE_AUTO, + + /* Assign the net class that was provided by the user */ + CGROUP_NETCLASS_TYPE_FIXED, +} CGroupNetClassType; + struct CGroupDeviceAllow { LIST_FIELDS(CGroupDeviceAllow, device_allow); char *path; @@ -88,6 +104,9 @@ struct CGroupContext { CGroupDevicePolicy device_policy; LIST_HEAD(CGroupDeviceAllow, device_allow); + CGroupNetClassType netclass_type; + uint32_t netclass_id; + uint64_t tasks_max; bool delegate; @@ -99,7 +118,7 @@ struct CGroupContext { void cgroup_context_init(CGroupContext *c); void cgroup_context_done(CGroupContext *c); void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix); -void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, ManagerState state); +void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, uint32_t netclass_id, ManagerState state); CGroupMask cgroup_context_get_mask(CGroupContext *c); @@ -127,6 +146,9 @@ int unit_watch_cgroup(Unit *u); int unit_attach_pids_to_cgroup(Unit *u); +int unit_add_to_netclass_cgroup(Unit *u); +int unit_remove_from_netclass_cgroup(Unit *u); + int manager_setup_cgroup(Manager *m); void manager_shutdown_cgroup(Manager *m, bool delete); diff --git a/src/core/dbus-unit.c b/src/core/dbus-unit.c index f7e9795928..cd88a87340 100644 --- a/src/core/dbus-unit.c +++ b/src/core/dbus-unit.c @@ -679,6 +679,7 @@ const sd_bus_vtable bus_unit_vtable[] = { SD_BUS_PROPERTY("Asserts", "a(sbbsi)", property_get_conditions, offsetof(Unit, asserts), 0), SD_BUS_PROPERTY("LoadError", "(ss)", property_get_load_error, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Transient", "b", bus_property_get_bool, offsetof(Unit, transient), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("NetClass", "u", bus_property_get_unsigned, offsetof(Unit, cgroup_netclass_id), 0), SD_BUS_METHOD("Start", "s", "o", method_start, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("Stop", "s", "o", method_stop, SD_BUS_VTABLE_UNPRIVILEGED), diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index e056fd863c..ec744214c1 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -126,7 +126,8 @@ $1.BlockIOReadBandwidth, config_parse_blockio_bandwidth, 0, $1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0, offsetof($1, cgroup_context) $1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting) $1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context) -$1.Delegate, config_parse_bool, 0, offsetof($1, cgroup_context.delegate)' +$1.Delegate, config_parse_bool, 0, offsetof($1, cgroup_context.delegate) +$1.NetClass, config_parse_netclass, 0, offsetof($1, cgroup_context)' )m4_dnl Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description) Unit.Documentation, config_parse_documentation, 0, offsetof(Unit, documentation) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index b476d472b3..1217b4651e 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2962,6 +2962,48 @@ int config_parse_blockio_bandwidth( return 0; } +int config_parse_netclass( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + CGroupContext *c = data; + unsigned v; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (streq(rvalue, "auto")) { + c->netclass_type = CGROUP_NETCLASS_TYPE_AUTO; + return 0; + } + + r = safe_atou32(rvalue, &v); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Netclass '%s' invalid. Ignoring.", rvalue); + return 0; + } + + if (v > CGROUP_NETCLASS_FIXED_MAX) + log_syntax(unit, LOG_ERR, filename, line, EINVAL, + "Fixed netclass %" PRIu32 " out of allowed range (0-%d). Applying anyway.", v, (uint32_t) CGROUP_NETCLASS_FIXED_MAX); + + c->netclass_id = v; + c->netclass_type = CGROUP_NETCLASS_TYPE_FIXED; + + return 0; +} + DEFINE_CONFIG_PARSE_ENUM(config_parse_job_mode, job_mode, JobMode, "Failed to parse job mode"); int config_parse_job_mode_isolate( diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 638b343a6e..5d0a09249f 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -90,6 +90,7 @@ int config_parse_device_allow(const char *unit, const char *filename, unsigned l int config_parse_blockio_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_blockio_device_weight(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_blockio_bandwidth(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_netclass(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_job_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_job_mode_isolate(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_selinux_context(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); diff --git a/src/core/manager.c b/src/core/manager.c index 8e518fbaaf..4e672a8c48 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -579,6 +579,8 @@ int manager_new(ManagerRunningAs running_as, bool test_run, Manager **_m) { m->have_ask_password = -EINVAL; /* we don't know */ m->first_boot = -1; + m->cgroup_netclass_registry_last = CGROUP_NETCLASS_FIXED_MAX; + m->test_run = test_run; /* Reboot immediately if the user hits C-A-D more often than 7x per 2s */ @@ -960,6 +962,8 @@ Manager* manager_free(Manager *m) { hashmap_free(m->cgroup_unit); set_free_free(m->unit_path_cache); + hashmap_free(m->cgroup_netclass_registry); + free(m->switch_root); free(m->switch_root_init); diff --git a/src/core/manager.h b/src/core/manager.h index 5cf0dbd508..b955982100 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -303,6 +303,10 @@ struct Manager { const char *unit_log_format_string; int first_boot; + + /* Used for NetClass=auto units */ + Hashmap *cgroup_netclass_registry; + uint32_t cgroup_netclass_registry_last; }; int manager_new(ManagerRunningAs running_as, bool test_run, Manager **m); diff --git a/src/core/unit.c b/src/core/unit.c index 3bfc2460bc..3356b97522 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -452,6 +452,7 @@ static void unit_free_requires_mounts_for(Unit *u) { static void unit_done(Unit *u) { ExecContext *ec; CGroupContext *cc; + int r; assert(u); @@ -468,6 +469,10 @@ static void unit_done(Unit *u) { cc = unit_get_cgroup_context(u); if (cc) cgroup_context_done(cc); + + r = unit_remove_from_netclass_cgroup(u); + if (r < 0) + log_warning_errno(r, "Unable to remove unit from netclass group: %m"); } void unit_free(Unit *u) { @@ -1241,6 +1246,14 @@ int unit_load(Unit *u) { } unit_update_cgroup_members_masks(u); + + /* If we are reloading, we need to wait for the deserializer + * to restore the net_cls ids that have been set previously */ + if (u->manager->n_reloading <= 0) { + r = unit_add_to_netclass_cgroup(u); + if (r < 0) + return r; + } } assert((u->load_state != UNIT_MERGED) == !u->merged_into); @@ -2591,6 +2604,9 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { unit_serialize_item(u, f, "cgroup", u->cgroup_path); unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized)); + if (u->cgroup_netclass_id) + unit_serialize_item_format(u, f, "netclass-id", "%" PRIu32, u->cgroup_netclass_id); + if (serialize_jobs) { if (u->job) { fprintf(f, "job\n"); @@ -2777,6 +2793,17 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { else u->cgroup_realized = b; + continue; + } else if (streq(l, "netclass-id")) { + r = safe_atou32(v, &u->cgroup_netclass_id); + if (r < 0) + log_unit_debug(u, "Failed to parse netclass ID %s, ignoring.", v); + else { + r = unit_add_to_netclass_cgroup(u); + if (r < 0) + log_unit_debug_errno(u, r, "Failed to add unit to netclass cgroup, ignoring: %m"); + } + continue; } diff --git a/src/core/unit.h b/src/core/unit.h index 3c7684411b..c868d75c79 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -161,6 +161,9 @@ struct Unit { /* CGroup realize members queue */ LIST_FIELDS(Unit, cgroup_queue); + /* Units with the same CGroup netclass */ + LIST_FIELDS(Unit, cgroup_netclass); + /* PIDs we keep an eye on. Note that a unit might have many * more, but these are the ones we care enough about to * process SIGCHLD for */ @@ -189,6 +192,8 @@ struct Unit { CGroupMask cgroup_members_mask; int cgroup_inotify_wd; + uint32_t cgroup_netclass_id; + /* How to start OnFailure units */ JobMode on_failure_job_mode;