diff --git a/TODO b/TODO index ae9f066ac6..30409196f5 100644 --- a/TODO +++ b/TODO @@ -62,8 +62,6 @@ Features: * bootctl,sd-boot: actually honour the "architecture" key -* set memory.oom.group in cgroup v2 for all leaf cgroups (kernel v4.19+) - * add a new syscall group "@esoteric" for more esoteric stuff such as bpf() and usefaultd() and make systemd-analyze check for it. @@ -71,8 +69,6 @@ Features: first. i.e. look for all places we use string_erase()/string_free_erase() and augment them with mlock(). Also use MADV_DONTDUMP -* whenever oom_kill memory.event event is triggered print a nice log message - * Move RestrictAddressFamily= to the new cgroup create socket * support the bind/connect/sendmsg cgroup stuff for sandboxing, and possibly diff --git a/catalog/systemd.catalog.in b/catalog/systemd.catalog.in index 5c6799ee5d..d39a2251b0 100644 --- a/catalog/systemd.catalog.in +++ b/catalog/systemd.catalog.in @@ -391,3 +391,16 @@ The following "tags" are possible: - "overflowgid-not-65534" — the kernel group ID used for "unknown" users (with NFS or user namespaces) is not 65534 Current system is tagged as @TAINT@. + +-- fe6faa94e7774663a0da52717891d8ef +Subject: A process of @UNIT@ unit has been killed by the OOM killer. +Defined-By: systemd +Support: %SUPPORT_URL% + +A process of unit @UNIT has been killed by the Linux kernel out-of-memory (OOM) +killer logic. This usually indicates that the system is low on memory and that +memory needed to be freed. A process associated with @UNIT@ has been determined +as the best process to terminate and has been forcibly terminated by the +kernel. + +Note that the memory pressure might or might not have been caused by @UNIT@. diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml index d23b3fb45d..5d1e4d1b97 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml @@ -364,6 +364,17 @@ limits are only defaults for units, they are not applied to PID 1 itself. + + + DefaultOOMPolicy= + + Configure the default policy for reacting to processes being killed by the Linux + Out-Of-Memory (OOM) killer. This may be used to pick a global default for the per-unit + OOMPolicy= setting. See + systemd.service5 + for details. Note that this default is not used for services that have Delegate= + turned on. + diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 688147ea32..df1e1e8681 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -651,11 +651,17 @@ CapabilityBoundingSet=~CAP_B CAP_C OOMScoreAdjust= - Sets the adjustment level for the Out-Of-Memory killer for executed processes. Takes an integer - between -1000 (to disable OOM killing for this process) and 1000 (to make killing of this process under memory - pressure very likely). See proc.txt for - details. + Sets the adjustment value for the Linux kernel's Out-Of-Memory (OOM) killer score for + executed processes. Takes an integer between -1000 (to disable OOM killing of processes of this unit) + and 1000 (to make killing of processes of this unit under memory pressure very likely). See proc.txt for details. If + not specified defaults to the OOM score adjustment level of the service manager itself, which is + normally at 0. + + Use the OOMPolicy= setting of service units to configure how the service + manager shall react to the kernel OOM killer terminating a process of the service. See + systemd.service5 + for details. diff --git a/man/systemd.service.xml b/man/systemd.service.xml index 5b88417530..1f40c2ff37 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -963,6 +963,29 @@ above. + + OOMPolicy= + + Configure the Out-Of-Memory (OOM) killer policy. On Linux, when memory becomes scarce + the kernel might decide to kill a running process in order to free up memory and reduce memory + pressure. This setting takes one of continue, stop or + kill. If set to continue and a process of the service is + killed by the kernel's OOM killer this is logged but the service continues running. If set to + stop the event is logged but the service is terminated cleanly by the service + manager. If set to kill and one of the service's processes is killed by the OOM + killer the kernel is instructed to kill all remaining processes of the service, too. Defaults to the + setting DefaultOOMPolicy= in + system.conf5 is + set to, except for services where Delegate= is turned on, where it defaults to + continue. + + Use the OOMScoreAdjust= setting to configure whether processes of the unit + shall be considered preferred or less preferred candidates for process termination by the Linux OOM + killer logic. See + systemd.exec5 for + details. + + Check diff --git a/src/core/cgroup.c b/src/core/cgroup.c index ad67ba0438..6a477adb71 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -3,6 +3,8 @@ #include #include +#include "sd-messages.h" + #include "alloc-util.h" #include "blockdev-util.h" #include "bpf-devices.h" @@ -1141,6 +1143,8 @@ static void cgroup_context_apply( cgroup_apply_unified_memory_limit(u, "memory.max", max); cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max); + (void) set_attribute_and_warn(u, "memory", "memory.oom.group", one_zero(c->memory_oom_group)); + } else { char buf[DECIMAL_STR_MAX(uint64_t) + 1]; uint64_t val; @@ -1568,15 +1572,14 @@ int unit_set_cgroup_path(Unit *u, const char *path) { assert(u); + if (streq_ptr(u->cgroup_path, path)) + return 0; + if (path) { p = strdup(path); if (!p) return -ENOMEM; - } else - p = NULL; - - if (streq_ptr(u->cgroup_path, p)) - return 0; + } if (p) { r = hashmap_put(u->manager->cgroup_unit, p, u); @@ -1585,7 +1588,6 @@ int unit_set_cgroup_path(Unit *u, const char *path) { } unit_release_cgroup(u); - u->cgroup_path = TAKE_PTR(p); return 1; @@ -1597,10 +1599,13 @@ int unit_watch_cgroup(Unit *u) { assert(u); + /* Watches the "cgroups.events" attribute of this unit's cgroup for "empty" events, but only if + * cgroupv2 is available. */ + if (!u->cgroup_path) return 0; - if (u->cgroup_inotify_wd >= 0) + if (u->cgroup_control_inotify_wd >= 0) return 0; /* Only applies to the unified hierarchy */ @@ -1610,11 +1615,11 @@ int unit_watch_cgroup(Unit *u) { if (r == 0) return 0; - /* Don't watch the root slice, it's pointless. */ + /* No point in watch the top-level slice, it's never going to run empty. */ if (unit_has_name(u, SPECIAL_ROOT_SLICE)) return 0; - r = hashmap_ensure_allocated(&u->manager->cgroup_inotify_wd_unit, &trivial_hash_ops); + r = hashmap_ensure_allocated(&u->manager->cgroup_control_inotify_wd_unit, &trivial_hash_ops); if (r < 0) return log_oom(); @@ -1622,20 +1627,82 @@ int unit_watch_cgroup(Unit *u) { if (r < 0) return log_oom(); - u->cgroup_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY); - if (u->cgroup_inotify_wd < 0) { + u->cgroup_control_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY); + if (u->cgroup_control_inotify_wd < 0) { - if (errno == ENOENT) /* If the directory is already - * gone we don't need to track - * it, so this is not an error */ + if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this + * is not an error */ return 0; - return log_unit_error_errno(u, errno, "Failed to add inotify watch descriptor for control group %s: %m", u->cgroup_path); + return log_unit_error_errno(u, errno, "Failed to add control inotify watch descriptor for control group %s: %m", u->cgroup_path); } - r = hashmap_put(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd), u); + r = hashmap_put(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd), u); if (r < 0) - return log_unit_error_errno(u, r, "Failed to add inotify watch descriptor to hash map: %m"); + return log_unit_error_errno(u, r, "Failed to add control inotify watch descriptor to hash map: %m"); + + return 0; +} + +int unit_watch_cgroup_memory(Unit *u) { + _cleanup_free_ char *events = NULL; + CGroupContext *c; + int r; + + assert(u); + + /* Watches the "memory.events" attribute of this unit's cgroup for "oom_kill" events, but only if + * cgroupv2 is available. */ + + if (!u->cgroup_path) + return 0; + + c = unit_get_cgroup_context(u); + if (!c) + return 0; + + /* The "memory.events" attribute is only available if the memory controller is on. Let's hence tie + * this to memory accounting, in a way watching for OOM kills is a form of memory accounting after + * all. */ + if (!c->memory_accounting) + return 0; + + /* Don't watch inner nodes, as the kernel doesn't report oom_kill events recursively currently, and + * we also don't want to generate a log message for each parent cgroup of a process. */ + if (u->type == UNIT_SLICE) + return 0; + + if (u->cgroup_memory_inotify_wd >= 0) + return 0; + + /* Only applies to the unified hierarchy */ + r = cg_all_unified(); + if (r < 0) + return log_error_errno(r, "Failed to determine whether the memory controller is unified: %m"); + if (r == 0) + return 0; + + r = hashmap_ensure_allocated(&u->manager->cgroup_memory_inotify_wd_unit, &trivial_hash_ops); + if (r < 0) + return log_oom(); + + r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "memory.events", &events); + if (r < 0) + return log_oom(); + + u->cgroup_memory_inotify_wd = inotify_add_watch(u->manager->cgroup_inotify_fd, events, IN_MODIFY); + if (u->cgroup_memory_inotify_wd < 0) { + + if (errno == ENOENT) /* If the directory is already gone we don't need to track it, so this + * is not an error */ + return 0; + + return log_unit_error_errno(u, errno, "Failed to add memory inotify watch descriptor for control group %s: %m", u->cgroup_path); + } + + r = hashmap_put(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd), u); + if (r < 0) + return log_unit_error_errno(u, r, "Failed to add memory inotify watch descriptor to hash map: %m"); return 0; } @@ -1692,6 +1759,7 @@ static int unit_create_cgroup( /* Start watching it */ (void) unit_watch_cgroup(u); + (void) unit_watch_cgroup_memory(u); /* Preserve enabled controllers in delegated units, adjust others. */ if (created || !u->cgroup_realized || !unit_cgroup_delegate(u)) { @@ -2225,12 +2293,20 @@ void unit_release_cgroup(Unit *u) { u->cgroup_path = mfree(u->cgroup_path); } - if (u->cgroup_inotify_wd >= 0) { - if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_inotify_wd) < 0) - log_unit_debug_errno(u, errno, "Failed to remove cgroup inotify watch %i for %s, ignoring: %m", u->cgroup_inotify_wd, u->id); + if (u->cgroup_control_inotify_wd >= 0) { + if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_control_inotify_wd) < 0) + log_unit_debug_errno(u, errno, "Failed to remove cgroup control inotify watch %i for %s, ignoring: %m", u->cgroup_control_inotify_wd, u->id); - (void) hashmap_remove(u->manager->cgroup_inotify_wd_unit, INT_TO_PTR(u->cgroup_inotify_wd)); - u->cgroup_inotify_wd = -1; + (void) hashmap_remove(u->manager->cgroup_control_inotify_wd_unit, INT_TO_PTR(u->cgroup_control_inotify_wd)); + u->cgroup_control_inotify_wd = -1; + } + + if (u->cgroup_memory_inotify_wd >= 0) { + if (inotify_rm_watch(u->manager->cgroup_inotify_fd, u->cgroup_memory_inotify_wd) < 0) + log_unit_debug_errno(u, errno, "Failed to remove cgroup memory inotify watch %i for %s, ignoring: %m", u->cgroup_memory_inotify_wd, u->id); + + (void) hashmap_remove(u->manager->cgroup_memory_inotify_wd_unit, INT_TO_PTR(u->cgroup_memory_inotify_wd)); + u->cgroup_memory_inotify_wd = -1; } } @@ -2479,6 +2555,106 @@ void unit_add_to_cgroup_empty_queue(Unit *u) { log_debug_errno(r, "Failed to enable cgroup empty event source: %m"); } +static int unit_check_oom(Unit *u) { + _cleanup_free_ char *oom_kill = NULL; + bool increased; + uint64_t c; + int r; + + if (!u->cgroup_path) + return 0; + + r = cg_get_keyed_attribute("memory", u->cgroup_path, "memory.events", STRV_MAKE("oom_kill"), &oom_kill); + if (r < 0) + return log_unit_debug_errno(u, r, "Failed to read oom_kill field of memory.events cgroup attribute: %m"); + + r = safe_atou64(oom_kill, &c); + if (r < 0) + return log_unit_debug_errno(u, r, "Failed to parse oom_kill field: %m"); + + increased = c > u->oom_kill_last; + u->oom_kill_last = c; + + if (!increased) + return 0; + + log_struct(LOG_NOTICE, + "MESSAGE_ID=" SD_MESSAGE_UNIT_OUT_OF_MEMORY_STR, + LOG_UNIT_ID(u), + LOG_UNIT_INVOCATION_ID(u), + LOG_UNIT_MESSAGE(u, "A process of this unit has been killed by the OOM killer.")); + + if (UNIT_VTABLE(u)->notify_cgroup_oom) + UNIT_VTABLE(u)->notify_cgroup_oom(u); + + return 1; +} + +static int on_cgroup_oom_event(sd_event_source *s, void *userdata) { + Manager *m = userdata; + Unit *u; + int r; + + assert(s); + assert(m); + + u = m->cgroup_oom_queue; + if (!u) + return 0; + + assert(u->in_cgroup_oom_queue); + u->in_cgroup_oom_queue = false; + LIST_REMOVE(cgroup_oom_queue, m->cgroup_oom_queue, u); + + if (m->cgroup_oom_queue) { + /* More stuff queued, let's make sure we remain enabled */ + r = sd_event_source_set_enabled(s, SD_EVENT_ONESHOT); + if (r < 0) + log_debug_errno(r, "Failed to reenable cgroup oom event source, ignoring: %m"); + } + + (void) unit_check_oom(u); + return 0; +} + +static void unit_add_to_cgroup_oom_queue(Unit *u) { + int r; + + assert(u); + + if (u->in_cgroup_oom_queue) + return; + if (!u->cgroup_path) + return; + + LIST_PREPEND(cgroup_oom_queue, u->manager->cgroup_oom_queue, u); + u->in_cgroup_oom_queue = true; + + /* Trigger the defer event */ + if (!u->manager->cgroup_oom_event_source) { + _cleanup_(sd_event_source_unrefp) sd_event_source *s = NULL; + + r = sd_event_add_defer(u->manager->event, &s, on_cgroup_oom_event, u->manager); + if (r < 0) { + log_error_errno(r, "Failed to create cgroup oom event source: %m"); + return; + } + + r = sd_event_source_set_priority(s, SD_EVENT_PRIORITY_NORMAL-8); + if (r < 0) { + log_error_errno(r, "Failed to set priority of cgroup oom event source: %m"); + return; + } + + (void) sd_event_source_set_description(s, "cgroup-oom"); + u->manager->cgroup_oom_event_source = TAKE_PTR(s); + } + + r = sd_event_source_set_enabled(u->manager->cgroup_oom_event_source, SD_EVENT_ONESHOT); + if (r < 0) + log_error_errno(r, "Failed to enable cgroup oom event source: %m"); +} + static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, void *userdata) { Manager *m = userdata; @@ -2510,15 +2686,16 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, /* The watch was just removed */ continue; - u = hashmap_get(m->cgroup_inotify_wd_unit, INT_TO_PTR(e->wd)); - if (!u) /* Not that inotify might deliver - * events for a watch even after it - * was removed, because it was queued - * before the removal. Let's ignore - * this here safely. */ - continue; + /* Note that inotify might deliver events for a watch even after it was removed, + * because it was queued before the removal. Let's ignore this here safely. */ - unit_add_to_cgroup_empty_queue(u); + u = hashmap_get(m->cgroup_control_inotify_wd_unit, INT_TO_PTR(e->wd)); + if (u) + unit_add_to_cgroup_empty_queue(u); + + u = hashmap_get(m->cgroup_memory_inotify_wd_unit, INT_TO_PTR(e->wd)); + if (u) + unit_add_to_cgroup_oom_queue(u); } } } @@ -2606,6 +2783,9 @@ int manager_setup_cgroup(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to create cgroup empty event source: %m"); + /* Schedule cgroup empty checks early, but after having processed service notification messages or + * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of + * notification, and we collected the metadata the notification and SIGCHLD stuff offers first. */ r = sd_event_source_set_priority(m->cgroup_empty_event_source, SD_EVENT_PRIORITY_NORMAL-5); if (r < 0) return log_error_errno(r, "Failed to set priority of cgroup empty event source: %m"); @@ -2632,9 +2812,10 @@ int manager_setup_cgroup(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to watch control group inotify object: %m"); - /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also - * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */ - r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-4); + /* Process cgroup empty notifications early. Note that when this event is dispatched it'll + * just add the unit to a cgroup empty queue, hence let's run earlier than that. Also see + * handling of cgroup agent notifications, for the classic cgroup hierarchy support. */ + r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-9); if (r < 0) return log_error_errno(r, "Failed to set priority of inotify event source: %m"); @@ -2704,7 +2885,8 @@ void manager_shutdown_cgroup(Manager *m, bool delete) { m->cgroup_empty_event_source = sd_event_source_unref(m->cgroup_empty_event_source); - m->cgroup_inotify_wd_unit = hashmap_free(m->cgroup_inotify_wd_unit); + m->cgroup_control_inotify_wd_unit = hashmap_free(m->cgroup_control_inotify_wd_unit); + m->cgroup_memory_inotify_wd_unit = hashmap_free(m->cgroup_memory_inotify_wd_unit); m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source); m->cgroup_inotify_fd = safe_close(m->cgroup_inotify_fd); diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 51e7c96d60..050b963579 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -79,6 +79,9 @@ struct CGroupContext { bool tasks_accounting; bool ip_accounting; + /* Configures the memory.oom.group attribute (on unified) */ + bool memory_oom_group; + bool delegate; CGroupMask delegate_controllers; CGroupMask disable_controllers; @@ -174,6 +177,7 @@ int unit_realize_cgroup(Unit *u); void unit_release_cgroup(Unit *u); void unit_prune_cgroup(Unit *u); int unit_watch_cgroup(Unit *u); +int unit_watch_cgroup_memory(Unit *u); void unit_add_to_cgroup_empty_queue(Unit *u); diff --git a/src/core/dbus-manager.c b/src/core/dbus-manager.c index de997c3305..9c0b994cc3 100644 --- a/src/core/dbus-manager.c +++ b/src/core/dbus-manager.c @@ -43,6 +43,8 @@ static UnitFileFlags unit_file_bools_to_flags(bool runtime, bool force) { (force ? UNIT_FILE_FORCE : 0); } +BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_oom_policy, oom_policy, OOMPolicy); + static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_version, "s", GIT_VERSION); static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_features, "s", SYSTEMD_FEATURES); static BUS_DEFINE_PROPERTY_GET_GLOBAL(property_get_architecture, "s", architecture_to_string(uname_architecture())); @@ -2452,6 +2454,7 @@ const sd_bus_vtable bus_manager_vtable[] = { SD_BUS_PROPERTY("DefaultLimitRTTIMESoft", "t", bus_property_get_rlimit, offsetof(Manager, rlimit[RLIMIT_RTTIME]), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("DefaultTasksMax", "t", NULL, offsetof(Manager, default_tasks_max), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("TimerSlackNSec", "t", property_get_timer_slack_nsec, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("DefaultOOMPolicy", "s", bus_property_get_oom_policy, offsetof(Manager, default_oom_policy), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_METHOD("GetUnit", "s", "o", method_get_unit, SD_BUS_VTABLE_UNPRIVILEGED), SD_BUS_METHOD("GetUnitByPID", "u", "o", method_get_unit_by_pid, SD_BUS_VTABLE_UNPRIVILEGED), diff --git a/src/core/dbus-manager.h b/src/core/dbus-manager.h index d0306adc70..10aa2eccee 100644 --- a/src/core/dbus-manager.h +++ b/src/core/dbus-manager.h @@ -12,3 +12,5 @@ void bus_manager_send_reloading(Manager *m, bool active); void bus_manager_send_change_signal(Manager *m); int verify_run_space_and_log(const char *message); + +int bus_property_get_oom_policy(sd_bus *bus, const char *path, const char *interface, const char *property, sd_bus_message *reply, void *userdata, sd_bus_error *ret_error); diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index 48c0fb74a7..0f563a6625 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -10,6 +10,7 @@ #include "dbus-cgroup.h" #include "dbus-execute.h" #include "dbus-kill.h" +#include "dbus-manager.h" #include "dbus-service.h" #include "dbus-util.h" #include "exit-status.h" @@ -127,6 +128,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("UID", "u", bus_property_get_uid, offsetof(Unit, ref_uid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("GID", "u", bus_property_get_gid, offsetof(Unit, ref_gid), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("NRestarts", "u", bus_property_get_unsigned, offsetof(Service, n_restarts), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), + SD_BUS_PROPERTY("OOMPolicy", "s", bus_property_get_oom_policy, offsetof(Service, oom_policy), SD_BUS_VTABLE_PROPERTY_CONST), BUS_EXEC_STATUS_VTABLE("ExecMain", offsetof(Service, main_exec_status), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), BUS_EXEC_COMMAND_LIST_VTABLE("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), SD_BUS_VTABLE_PROPERTY_EMITS_INVALIDATION), @@ -257,6 +259,7 @@ static int bus_set_transient_std_fd( static BUS_DEFINE_SET_TRANSIENT_PARSE(notify_access, NotifyAccess, notify_access_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(service_type, ServiceType, service_type_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(service_restart, ServiceRestart, service_restart_from_string); +static BUS_DEFINE_SET_TRANSIENT_PARSE(oom_policy, OOMPolicy, oom_policy_from_string); static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(bus_name, service_name_is_valid); static int bus_service_set_transient_property( @@ -291,6 +294,9 @@ static int bus_service_set_transient_property( if (streq(name, "Type")) return bus_set_transient_service_type(u, name, &s->type, message, flags, error); + if (streq(name, "OOMPolicy")) + return bus_set_transient_oom_policy(u, name, &s->oom_policy, message, flags, error); + if (streq(name, "RestartUSec")) return bus_set_transient_usec(u, name, &s->restart_usec, message, flags, error); diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 7fb1e6cc8b..9cbce4f11f 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -333,6 +333,7 @@ Service.Sockets, config_parse_service_sockets, 0, Service.BusPolicy, config_parse_warn_compat, DISABLED_LEGACY, 0 Service.USBFunctionDescriptors, config_parse_unit_path_printf, 0, offsetof(Service, usb_function_descriptors) Service.USBFunctionStrings, config_parse_unit_path_printf, 0, offsetof(Service, usb_function_strings) +Service.OOMPolicy, config_parse_oom_policy, 0, offsetof(Service, oom_policy) EXEC_CONTEXT_CONFIG_ITEMS(Service)m4_dnl CGROUP_CONTEXT_CONFIG_ITEMS(Service)m4_dnl KILL_CONTEXT_CONFIG_ITEMS(Service)m4_dnl diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 9b7c8cba67..66e5c6d231 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -86,6 +86,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_runtime_preserve_mode, exec_preserve_mode, DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType, "Failed to parse service type"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceRestart, "Failed to parse service restart specifier"); DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy"); DEFINE_CONFIG_PARSE_ENUM_WITH_DEFAULT(config_parse_ip_tos, ip_tos, int, -1, "Failed to parse IP TOS value"); DEFINE_CONFIG_PARSE_PTR(config_parse_blockio_weight, cg_blkio_weight_parse, uint64_t, "Invalid block IO weight"); DEFINE_CONFIG_PARSE_PTR(config_parse_cg_weight, cg_weight_parse, uint64_t, "Invalid weight"); diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index e0d3b4ec3b..95d06320c0 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -106,6 +106,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_collect_mode); CONFIG_PARSER_PROTOTYPE(config_parse_pid_file); CONFIG_PARSER_PROTOTYPE(config_parse_exit_status); CONFIG_PARSER_PROTOTYPE(config_parse_disable_controllers); +CONFIG_PARSER_PROTOTYPE(config_parse_oom_policy); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, GPERF_LEN_TYPE length); diff --git a/src/core/main.c b/src/core/main.c index 46db47126c..3c4b5202df 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -135,6 +135,7 @@ static bool arg_default_tasks_accounting = true; static uint64_t arg_default_tasks_max = UINT64_MAX; static sd_id128_t arg_machine_id = {}; static EmergencyAction arg_cad_burst_action = EMERGENCY_ACTION_REBOOT_FORCE; +static OOMPolicy arg_default_oom_policy = OOM_STOP; _noreturn_ static void freeze_or_exit_or_reboot(void) { @@ -725,6 +726,7 @@ static int parse_config_file(void) { { "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting }, { "Manager", "DefaultTasksMax", config_parse_tasks_max, 0, &arg_default_tasks_max }, { "Manager", "CtrlAltDelBurstAction", config_parse_emergency_action, 0, &arg_cad_burst_action }, + { "Manager", "DefaultOOMPolicy", config_parse_oom_policy, 0, &arg_default_oom_policy }, {} }; @@ -780,6 +782,7 @@ static void set_manager_defaults(Manager *m) { m->default_memory_accounting = arg_default_memory_accounting; m->default_tasks_accounting = arg_default_tasks_accounting; m->default_tasks_max = arg_default_tasks_max; + m->default_oom_policy = arg_default_oom_policy; (void) manager_set_default_rlimits(m, arg_default_rlimit); diff --git a/src/core/manager.c b/src/core/manager.c index 12ae911a38..4154aec12f 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -764,6 +764,8 @@ int manager_new(UnitFileScope scope, ManagerTestRunFlags test_run_flags, Manager .have_ask_password = -EINVAL, /* we don't know */ .first_boot = -1, .test_run_flags = test_run_flags, + + .default_oom_policy = OOM_STOP, }; #if ENABLE_EFI @@ -992,11 +994,10 @@ static int manager_setup_cgroups_agent(Manager *m) { if (r < 0) return log_error_errno(r, "Failed to allocate cgroups agent event source: %m"); - /* Process cgroups notifications early, but after having processed service notification messages or - * SIGCHLD signals, so that a cgroup running empty is always just the last safety net of notification, - * and we collected the metadata the notification and SIGCHLD stuff offers first. Also see handling of - * cgroup inotify for the unified cgroup stuff. */ - r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-4); + /* Process cgroups notifications early. Note that when the agent notification is received + * we'll just enqueue the unit in the cgroup empty queue, hence pick a high priority than + * that. Also see handling of cgroup inotify for the unified cgroup stuff. */ + r = sd_event_source_set_priority(m->cgroups_agent_event_source, SD_EVENT_PRIORITY_NORMAL-9); if (r < 0) return log_error_errno(r, "Failed to set priority of cgroups agent event source: %m"); @@ -4715,3 +4716,11 @@ static const char *const manager_timestamp_table[_MANAGER_TIMESTAMP_MAX] = { }; DEFINE_STRING_TABLE_LOOKUP(manager_timestamp, ManagerTimestamp); + +static const char* const oom_policy_table[_OOM_POLICY_MAX] = { + [OOM_CONTINUE] = "continue", + [OOM_STOP] = "stop", + [OOM_KILL] = "kill", +}; + +DEFINE_STRING_TABLE_LOOKUP(oom_policy, OOMPolicy); diff --git a/src/core/manager.h b/src/core/manager.h index cdd4882a6a..b5def59ed0 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -56,6 +56,14 @@ typedef enum StatusType { STATUS_TYPE_EMERGENCY, } StatusType; +typedef enum OOMPolicy { + OOM_CONTINUE, /* The kernel kills the process it wants to kill, and that's it */ + OOM_STOP, /* The kernel kills the process it wants to kill, and we stop the unit */ + OOM_KILL, /* The kernel kills the process it wants to kill, and all others in the unit, and we stop the unit */ + _OOM_POLICY_MAX, + _OOM_POLICY_INVALID = -1 +} OOMPolicy; + /* Notes: * 1. TIMESTAMP_FIRMWARE, TIMESTAMP_LOADER, TIMESTAMP_KERNEL, TIMESTAMP_INITRD, * TIMESTAMP_SECURITY_START, and TIMESTAMP_SECURITY_FINISH are set only when @@ -159,6 +167,9 @@ struct Manager { /* Units whose cgroup ran empty */ LIST_HEAD(Unit, cgroup_empty_queue); + /* Units whose memory.event fired */ + LIST_HEAD(Unit, cgroup_oom_queue); + /* Target units whose default target dependencies haven't been set yet */ LIST_HEAD(Unit, target_deps_queue); @@ -268,10 +279,15 @@ struct Manager { /* Notifications from cgroups, when the unified hierarchy is used is done via inotify. */ int cgroup_inotify_fd; sd_event_source *cgroup_inotify_event_source; - Hashmap *cgroup_inotify_wd_unit; + + /* Maps for finding the unit for each inotify watch descriptor for the cgroup.events and + * memory.events cgroupv2 attributes. */ + Hashmap *cgroup_control_inotify_wd_unit; + Hashmap *cgroup_memory_inotify_wd_unit; /* A defer event for handling cgroup empty events and processing them after SIGCHLD in all cases. */ sd_event_source *cgroup_empty_event_source; + sd_event_source *cgroup_oom_event_source; /* Make sure the user cannot accidentally unmount our cgroup * file system */ @@ -328,6 +344,8 @@ struct Manager { uint64_t default_tasks_max; usec_t default_timer_accuracy_usec; + OOMPolicy default_oom_policy; + int original_log_level; LogTarget original_log_target; bool log_level_overridden:1; @@ -519,3 +537,6 @@ void manager_disable_confirm_spawn(void); const char *manager_timestamp_to_string(ManagerTimestamp m) _const_; ManagerTimestamp manager_timestamp_from_string(const char *s) _pure_; ManagerTimestamp manager_timestamp_initrd_mangle(ManagerTimestamp s); + +const char* oom_policy_to_string(OOMPolicy i) _const_; +OOMPolicy oom_policy_from_string(const char *s) _pure_; diff --git a/src/core/service.c b/src/core/service.c index 02899901f7..53cead772a 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -112,6 +112,8 @@ static void service_init(Unit *u) { EXEC_KEYRING_PRIVATE : EXEC_KEYRING_INHERIT; s->watchdog_original_usec = USEC_INFINITY; + + s->oom_policy = _OOM_POLICY_INVALID; } static void service_unwatch_control_pid(Service *s) { @@ -731,6 +733,15 @@ static int service_add_extras(Service *s) { (s->type == SERVICE_NOTIFY || s->watchdog_usec > 0 || s->n_fd_store_max > 0)) s->notify_access = NOTIFY_MAIN; + /* If no OOM policy was explicitly set, then default to the configure default OOM policy. Except when + * delegation is on, in that case it we assume the payload knows better what to do and can process + * things in a more focussed way. */ + if (s->oom_policy < 0) + s->oom_policy = s->cgroup_context.delegate ? OOM_CONTINUE : UNIT(s)->manager->default_oom_policy; + + /* Let the kernel do the killing if that's requested. */ + s->cgroup_context.memory_oom_group = s->oom_policy == OOM_KILL; + r = service_add_default_dependencies(s); if (r < 0) return r; @@ -799,7 +810,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { "%sType: %s\n" "%sRestart: %s\n" "%sNotifyAccess: %s\n" - "%sNotifyState: %s\n", + "%sNotifyState: %s\n" + "%sOOMPolicy: %s\n", prefix, service_state_to_string(s->state), prefix, service_result_to_string(s->result), prefix, service_result_to_string(s->reload_result), @@ -810,7 +822,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) { prefix, service_type_to_string(s->type), prefix, service_restart_to_string(s->restart), prefix, notify_access_to_string(s->notify_access), - prefix, notify_state_to_string(s->notify_state)); + prefix, notify_state_to_string(s->notify_state), + prefix, oom_policy_to_string(s->oom_policy)); if (s->control_pid > 0) fprintf(f, @@ -3148,7 +3161,7 @@ static void service_notify_cgroup_empty_event(Unit *u) { assert(u); - log_unit_debug(u, "cgroup is empty"); + log_unit_debug(u, "Control group is empty."); switch (s->state) { @@ -3211,6 +3224,57 @@ static void service_notify_cgroup_empty_event(Unit *u) { } } +static void service_notify_cgroup_oom_event(Unit *u) { + Service *s = SERVICE(u); + + log_unit_debug(u, "Process of control group was killed by the OOM killer."); + + if (s->oom_policy == OOM_CONTINUE) + return; + + switch (s->state) { + + case SERVICE_START_PRE: + case SERVICE_START: + case SERVICE_START_POST: + case SERVICE_STOP: + if (s->oom_policy == OOM_STOP) + service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_OOM_KILL); + else if (s->oom_policy == OOM_KILL) + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL); + + break; + + case SERVICE_EXITED: + case SERVICE_RUNNING: + if (s->oom_policy == OOM_STOP) + service_enter_stop(s, SERVICE_FAILURE_OOM_KILL); + else if (s->oom_policy == OOM_KILL) + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL); + + break; + + case SERVICE_STOP_WATCHDOG: + case SERVICE_STOP_SIGTERM: + service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_OOM_KILL); + break; + + case SERVICE_STOP_SIGKILL: + case SERVICE_FINAL_SIGKILL: + if (s->result == SERVICE_SUCCESS) + s->result = SERVICE_FAILURE_OOM_KILL; + break; + + case SERVICE_STOP_POST: + case SERVICE_FINAL_SIGTERM: + service_enter_signal(s, SERVICE_FINAL_SIGKILL, SERVICE_FAILURE_OOM_KILL); + break; + + default: + ; + } +} + static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) { bool notify_dbus = true; Service *s = SERVICE(u); @@ -4116,6 +4180,7 @@ static const char* const service_result_table[_SERVICE_RESULT_MAX] = { [SERVICE_FAILURE_CORE_DUMP] = "core-dump", [SERVICE_FAILURE_WATCHDOG] = "watchdog", [SERVICE_FAILURE_START_LIMIT_HIT] = "start-limit-hit", + [SERVICE_FAILURE_OOM_KILL] = "oom-kill", }; DEFINE_STRING_TABLE_LOOKUP(service_result, ServiceResult); @@ -4169,6 +4234,7 @@ const UnitVTable service_vtable = { .reset_failed = service_reset_failed, .notify_cgroup_empty = service_notify_cgroup_empty_event, + .notify_cgroup_oom = service_notify_cgroup_oom_event, .notify_message = service_notify_message, .main_pid = service_main_pid, diff --git a/src/core/service.h b/src/core/service.h index 57eefbb34d..5cc946acaf 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -67,6 +67,7 @@ typedef enum ServiceResult { SERVICE_FAILURE_CORE_DUMP, SERVICE_FAILURE_WATCHDOG, SERVICE_FAILURE_START_LIMIT_HIT, + SERVICE_FAILURE_OOM_KILL, _SERVICE_RESULT_MAX, _SERVICE_RESULT_INVALID = -1 } ServiceResult; @@ -184,6 +185,8 @@ struct Service { unsigned n_restarts; bool flush_n_restarts; + + OOMPolicy oom_policy; }; extern const UnitVTable service_vtable; diff --git a/src/core/unit.c b/src/core/unit.c index 2cde494a7e..608a33530c 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -91,7 +91,8 @@ Unit *unit_new(Manager *m, size_t size) { u->unit_file_state = _UNIT_FILE_STATE_INVALID; u->unit_file_preset = -1; u->on_failure_job_mode = JOB_REPLACE; - u->cgroup_inotify_wd = -1; + u->cgroup_control_inotify_wd = -1; + u->cgroup_memory_inotify_wd = -1; u->job_timeout = USEC_INFINITY; u->job_running_timeout = USEC_INFINITY; u->ref_uid = UID_INVALID; @@ -3245,6 +3246,9 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { if (u->cpu_usage_last != NSEC_INFINITY) (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last); + if (u->oom_kill_last > 0) + (void) serialize_item_format(f, "oom-kill-last", "%" PRIu64, u->oom_kill_last); + if (u->cgroup_path) (void) serialize_item(f, "cgroup", u->cgroup_path); @@ -3478,6 +3482,14 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { continue; + } else if (streq(l, "oom-kill-last")) { + + r = safe_atou64(v, &u->oom_kill_last); + if (r < 0) + log_unit_debug(u, "Failed to read OOM kill last %s, ignoring.", v); + + continue; + } else if (streq(l, "cgroup")) { r = unit_set_cgroup_path(u, v); @@ -3485,6 +3497,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { log_unit_debug_errno(u, r, "Failed to set cgroup path %s, ignoring: %m", v); (void) unit_watch_cgroup(u); + (void) unit_watch_cgroup_memory(u); continue; } else if (streq(l, "cgroup-realized")) { diff --git a/src/core/unit.h b/src/core/unit.h index 28878b5f4f..f730f6eee4 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -200,6 +200,9 @@ typedef struct Unit { /* cgroup empty queue */ LIST_FIELDS(Unit, cgroup_empty_queue); + /* cgroup OOM queue */ + LIST_FIELDS(Unit, cgroup_oom_queue); + /* Target dependencies queue */ LIST_FIELDS(Unit, target_deps_queue); @@ -246,13 +249,19 @@ typedef struct Unit { nsec_t cpu_usage_base; nsec_t cpu_usage_last; /* the most recently read value */ + /* The current counter of the oom_kill field in the memory.events cgroup attribute */ + uint64_t oom_kill_last; + /* Counterparts in the cgroup filesystem */ char *cgroup_path; CGroupMask cgroup_realized_mask; /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */ CGroupMask cgroup_enabled_mask; /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */ CGroupMask cgroup_invalidated_mask; /* A mask specifiying controllers which shall be considered invalidated, and require re-realization */ CGroupMask cgroup_members_mask; /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */ - int cgroup_inotify_wd; + + /* Inotify watch descriptors for watching cgroup.events and memory.events on cgroupv2 */ + int cgroup_control_inotify_wd; + int cgroup_memory_inotify_wd; /* Device Controller BPF program */ BPFProgram *bpf_device_control_installed; @@ -320,6 +329,7 @@ typedef struct Unit { bool in_gc_queue:1; bool in_cgroup_realize_queue:1; bool in_cgroup_empty_queue:1; + bool in_cgroup_oom_queue:1; bool in_target_deps_queue:1; bool in_stop_when_unneeded_queue:1; @@ -494,10 +504,12 @@ typedef struct UnitVTable { /* Reset failed state if we are in failed state */ void (*reset_failed)(Unit *u); - /* Called whenever any of the cgroups this unit watches for - * ran empty */ + /* Called whenever any of the cgroups this unit watches for ran empty */ void (*notify_cgroup_empty)(Unit *u); + /* Called whenever an OOM kill event on this unit was seen */ + void (*notify_cgroup_oom)(Unit *u); + /* Called whenever a process of this unit sends us a message */ void (*notify_message)(Unit *u, const struct ucred *ucred, char **tags, FDSet *fds); diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index b42a9e5c5b..8d99955ca7 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -1314,7 +1314,7 @@ static int bus_append_service_property(sd_bus_message *m, const char *field, con if (STR_IN_SET(field, "PIDFile", "Type", "Restart", "BusName", "NotifyAccess", - "USBFunctionDescriptors", "USBFunctionStrings")) + "USBFunctionDescriptors", "USBFunctionStrings", "OOMPolicy")) return bus_append_string(m, field, eq); diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h index 293db8e8e1..c243642889 100644 --- a/src/systemd/sd-messages.h +++ b/src/systemd/sd-messages.h @@ -125,6 +125,9 @@ _SD_BEGIN_DECLARATIONS; #define SD_MESSAGE_OVERMOUNTING SD_ID128_MAKE(1d,ee,03,69,c7,fc,47,36,b7,09,9b,38,ec,b4,6e,e7) #define SD_MESSAGE_OVERMOUNTING_STR SD_ID128_MAKE_STR(1d,ee,03,69,c7,fc,47,36,b7,09,9b,38,ec,b4,6e,e7) +#define SD_MESSAGE_UNIT_OUT_OF_MEMORY SD_ID128_MAKE(fe,6f,aa,94,e7,77,46,63,a0,da,52,71,78,91,d8,ef) +#define SD_MESSAGE_UNIT_OUT_OF_MEMORY_STR SD_ID128_MAKE_STR(fe,6f,aa,94,e7,77,46,63,a0,da,52,71,78,91,d8,ef) + #define SD_MESSAGE_LID_OPENED SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,6f) #define SD_MESSAGE_LID_OPENED_STR SD_ID128_MAKE_STR(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,6f) #define SD_MESSAGE_LID_CLOSED SD_ID128_MAKE(b7,2e,a4,a2,88,15,45,a0,b5,0e,20,0e,55,b9,b0,70) diff --git a/test/TEST-31-OOMPOLICY/Makefile b/test/TEST-31-OOMPOLICY/Makefile new file mode 120000 index 0000000000..e9f93b1104 --- /dev/null +++ b/test/TEST-31-OOMPOLICY/Makefile @@ -0,0 +1 @@ +../TEST-01-BASIC/Makefile \ No newline at end of file diff --git a/test/TEST-31-OOMPOLICY/test.sh b/test/TEST-31-OOMPOLICY/test.sh new file mode 100755 index 0000000000..55752e6a70 --- /dev/null +++ b/test/TEST-31-OOMPOLICY/test.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# -*- mode: shell-script; indent-tabs-mode: nil; sh-basic-offset: 4; -*- +# ex: ts=8 sw=4 sts=4 et filetype=sh +set -e +TEST_DESCRIPTION="test OOM killer logic" +TEST_NO_NSPAWN=1 + +. $TEST_BASE_DIR/test-functions + +UNIFIED_CGROUP_HIERARCHY=yes + +test_setup() { + create_empty_image + mkdir -p $TESTDIR/root + mount ${LOOPDEV}p1 $TESTDIR/root + + ( + LOG_LEVEL=5 + eval $(udevadm info --export --query=env --name=${LOOPDEV}p2) + + setup_basic_environment + + # mask some services that we do not want to run in these tests + ln -fs /dev/null $initdir/etc/systemd/system/systemd-hwdb-update.service + ln -fs /dev/null $initdir/etc/systemd/system/systemd-journal-catalog-update.service + ln -fs /dev/null $initdir/etc/systemd/system/systemd-networkd.service + ln -fs /dev/null $initdir/etc/systemd/system/systemd-networkd.socket + ln -fs /dev/null $initdir/etc/systemd/system/systemd-resolved.service + ln -fs /dev/null $initdir/etc/systemd/system/systemd-machined.service + + # setup the testsuite service + cat >$initdir/etc/systemd/system/testsuite.service < /proc/sys/kernel/sysrq + echo f > /proc/sysrq-trigger + + while : ; do + STATE=`systemctl show -p ActiveState --value oomtest.service` + [ "$STATE" = "failed" ] && break + sleep .5 + done + + RESULT=`systemctl show -p Result --value oomtest.service` + test "$RESULT" = "oom-kill" + + systemd-analyze log-level info +fi + +echo OK > /testok + +exit 0