diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 95b5f2de59..1958c1be2b 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -2681,6 +2681,47 @@ static void unit_remove_from_cgroup_empty_queue(Unit *u) { u->in_cgroup_empty_queue = false; } +int unit_check_oomd_kill(Unit *u) { + _cleanup_free_ char *value = NULL; + bool increased; + uint64_t n = 0; + int r; + + if (!u->cgroup_path) + return 0; + + r = cg_all_unified(); + if (r < 0) + return log_unit_debug_errno(u, r, "Couldn't determine whether we are in all unified mode: %m"); + else if (r == 0) + return 0; + + r = cg_get_xattr_malloc(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, "user.systemd_oomd_kill", &value); + if (r < 0 && r != -ENODATA) + return r; + + if (!isempty(value)) { + r = safe_atou64(value, &n); + if (r < 0) + return r; + } + + increased = n > u->managed_oom_kill_last; + u->managed_oom_kill_last = n; + + if (!increased) + return 0; + + if (n > 0) + log_struct(LOG_NOTICE, + "MESSAGE_ID=" SD_MESSAGE_UNIT_OOMD_KILL_STR, + LOG_UNIT_ID(u), + LOG_UNIT_INVOCATION_ID(u), + LOG_UNIT_MESSAGE(u, "systemd-oomd killed %"PRIu64" process(es) in this unit.", n)); + + return 1; +} + int unit_check_oom(Unit *u) { _cleanup_free_ char *oom_kill = NULL; bool increased; diff --git a/src/core/cgroup.h b/src/core/cgroup.h index 1f592ef559..881b3f3dfe 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -229,6 +229,7 @@ int unit_watch_cgroup(Unit *u); int unit_watch_cgroup_memory(Unit *u); void unit_add_to_cgroup_empty_queue(Unit *u); +int unit_check_oomd_kill(Unit *u); int unit_check_oom(Unit *u); int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path); diff --git a/src/core/manager.c b/src/core/manager.c index d85d938e7b..611ecf23b2 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -2578,6 +2578,11 @@ static int manager_dispatch_sigchld(sd_event_source *source, void *userdata) { * We only do this for the cgroup the PID belonged to. */ (void) unit_check_oom(u1); + /* This only logs for now. In the future when the interface for kills/notifications + * is more stable we can extend service results table similar to how kernel oom kills + * are managed. */ + (void) unit_check_oomd_kill(u1); + manager_invoke_sigchld_event(m, u1, &si); } if (u2) diff --git a/src/core/unit.c b/src/core/unit.c index 44b9f66e42..0792fd8ede 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -3571,6 +3571,9 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) { if (u->cpu_usage_last != NSEC_INFINITY) (void) serialize_item_format(f, "cpu-usage-last", "%" PRIu64, u->cpu_usage_last); + if (u->managed_oom_kill_last > 0) + (void) serialize_item_format(f, "managed-oom-kill-last", "%" PRIu64, u->managed_oom_kill_last); + if (u->oom_kill_last > 0) (void) serialize_item_format(f, "oom-kill-last", "%" PRIu64, u->oom_kill_last); @@ -3816,6 +3819,14 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { continue; + } else if (streq(l, "managed-oom-kill-last")) { + + r = safe_atou64(v, &u->managed_oom_kill_last); + if (r < 0) + log_unit_debug(u, "Failed to read managed OOM kill last %s, ignoring.", v); + + continue; + } else if (streq(l, "oom-kill-last")) { r = safe_atou64(v, &u->oom_kill_last); diff --git a/src/core/unit.h b/src/core/unit.h index 9b2ea6c79f..1e6d7ccf6b 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -260,7 +260,10 @@ typedef struct Unit { nsec_t cpu_usage_base; nsec_t cpu_usage_last; /* the most recently read value */ - /* The current counter of the oom_kill field in the memory.events cgroup attribute */ + /* The current counter of processes sent SIGKILL by systemd-oomd */ + uint64_t managed_oom_kill_last; + + /* The current counter of the oom_kill field in the memory.events cgroup attribute */ uint64_t oom_kill_last; /* Where the io.stat data was at the time the unit was started */ diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h index 05f00ed577..eea8c2c900 100644 --- a/src/systemd/sd-messages.h +++ b/src/systemd/sd-messages.h @@ -127,6 +127,9 @@ _SD_BEGIN_DECLARATIONS; #define SD_MESSAGE_OVERMOUNTING SD_ID128_MAKE(1d,ee,03,69,c7,fc,47,36,b7,09,9b,38,ec,b4,6e,e7) #define SD_MESSAGE_OVERMOUNTING_STR SD_ID128_MAKE_STR(1d,ee,03,69,c7,fc,47,36,b7,09,9b,38,ec,b4,6e,e7) +#define SD_MESSAGE_UNIT_OOMD_KILL SD_ID128_MAKE(d9,89,61,1b,15,e4,4c,9d,bf,31,e3,c8,12,56,e4,ed) +#define SD_MESSAGE_UNIT_OOMD_KILL_STR SD_ID128_MAKE_STR(d9,89,61,1b,15,e4,4c,9d,bf,31,e3,c8,12,56,e4,ed) + #define SD_MESSAGE_UNIT_OUT_OF_MEMORY SD_ID128_MAKE(fe,6f,aa,94,e7,77,46,63,a0,da,52,71,78,91,d8,ef) #define SD_MESSAGE_UNIT_OUT_OF_MEMORY_STR SD_ID128_MAKE_STR(fe,6f,aa,94,e7,77,46,63,a0,da,52,71,78,91,d8,ef)