2017-11-18 17:09:20 +01:00
|
|
|
/* SPDX-License-Identifier: LGPL-2.1+ */
|
2012-07-18 19:07:51 +02:00
|
|
|
#pragma once
|
2010-03-31 16:29:55 +02:00
|
|
|
|
|
|
|
/***
|
|
|
|
This file is part of systemd.
|
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
Copyright 2013 Lennart Poettering
|
2010-03-31 16:29:55 +02:00
|
|
|
|
|
|
|
systemd is free software; you can redistribute it and/or modify it
|
2012-04-12 00:20:58 +02:00
|
|
|
under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or
|
2010-03-31 16:29:55 +02:00
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
systemd is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
2012-04-12 00:20:58 +02:00
|
|
|
Lesser General Public License for more details.
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2012-04-12 00:20:58 +02:00
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
2010-03-31 16:29:55 +02:00
|
|
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***/
|
|
|
|
|
2015-02-10 12:56:53 +01:00
|
|
|
#include <stdbool.h>
|
|
|
|
|
2016-11-11 19:59:19 +01:00
|
|
|
#include "cgroup-util.h"
|
|
|
|
#include "ip-address-access.h"
|
2013-06-27 04:14:27 +02:00
|
|
|
#include "list.h"
|
2015-02-10 12:56:53 +01:00
|
|
|
#include "time-util.h"
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
typedef struct CGroupContext CGroupContext;
|
|
|
|
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
|
2016-05-05 22:42:55 +02:00
|
|
|
typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
|
|
|
|
typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
|
2013-06-27 04:14:27 +02:00
|
|
|
typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
|
|
|
|
typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
typedef enum CGroupDevicePolicy {
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
/* When devices listed, will allow those, plus built-in ones,
|
|
|
|
if none are listed will allow everything. */
|
|
|
|
CGROUP_AUTO,
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
/* Everything forbidden, except built-in ones and listed ones. */
|
|
|
|
CGROUP_CLOSED,
|
2010-04-21 04:01:24 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
/* Everythings forbidden, except for the listed devices */
|
|
|
|
CGROUP_STRICT,
|
2010-04-21 04:01:24 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
_CGROUP_DEVICE_POLICY_MAX,
|
|
|
|
_CGROUP_DEVICE_POLICY_INVALID = -1
|
|
|
|
} CGroupDevicePolicy;
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
struct CGroupDeviceAllow {
|
|
|
|
LIST_FIELDS(CGroupDeviceAllow, device_allow);
|
|
|
|
char *path;
|
|
|
|
bool r:1;
|
|
|
|
bool w:1;
|
|
|
|
bool m:1;
|
|
|
|
};
|
2010-06-21 23:27:18 +02:00
|
|
|
|
2016-05-05 22:42:55 +02:00
|
|
|
struct CGroupIODeviceWeight {
|
|
|
|
LIST_FIELDS(CGroupIODeviceWeight, device_weights);
|
|
|
|
char *path;
|
|
|
|
uint64_t weight;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct CGroupIODeviceLimit {
|
|
|
|
LIST_FIELDS(CGroupIODeviceLimit, device_limits);
|
|
|
|
char *path;
|
2016-05-18 22:50:56 +02:00
|
|
|
uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
|
2016-05-05 22:42:55 +02:00
|
|
|
};
|
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
struct CGroupBlockIODeviceWeight {
|
|
|
|
LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
|
|
|
|
char *path;
|
2015-09-11 16:48:24 +02:00
|
|
|
uint64_t weight;
|
2010-03-31 16:29:55 +02:00
|
|
|
};
|
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
struct CGroupBlockIODeviceBandwidth {
|
|
|
|
LIST_FIELDS(CGroupBlockIODeviceBandwidth, device_bandwidths);
|
|
|
|
char *path;
|
2016-05-18 22:51:46 +02:00
|
|
|
uint64_t rbps;
|
|
|
|
uint64_t wbps;
|
2013-06-27 04:14:27 +02:00
|
|
|
};
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
struct CGroupContext {
|
|
|
|
bool cpu_accounting;
|
2016-05-05 22:42:55 +02:00
|
|
|
bool io_accounting;
|
2013-06-27 04:14:27 +02:00
|
|
|
bool blockio_accounting;
|
|
|
|
bool memory_accounting;
|
2015-09-10 12:32:16 +02:00
|
|
|
bool tasks_accounting;
|
2016-11-11 19:59:19 +01:00
|
|
|
bool ip_accounting;
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2016-05-05 22:42:55 +02:00
|
|
|
/* For unified hierarchy */
|
2016-08-07 15:45:39 +02:00
|
|
|
uint64_t cpu_weight;
|
|
|
|
uint64_t startup_cpu_weight;
|
|
|
|
usec_t cpu_quota_per_sec_usec;
|
|
|
|
|
2016-05-05 22:42:55 +02:00
|
|
|
uint64_t io_weight;
|
|
|
|
uint64_t startup_io_weight;
|
|
|
|
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
|
|
|
|
LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
|
|
|
|
|
2016-05-27 18:10:18 +02:00
|
|
|
uint64_t memory_low;
|
|
|
|
uint64_t memory_high;
|
|
|
|
uint64_t memory_max;
|
2016-07-04 09:03:54 +02:00
|
|
|
uint64_t memory_swap_max;
|
2016-05-27 18:10:18 +02:00
|
|
|
|
2016-11-11 19:59:19 +01:00
|
|
|
LIST_HEAD(IPAddressAccessItem, ip_address_allow);
|
|
|
|
LIST_HEAD(IPAddressAccessItem, ip_address_deny);
|
|
|
|
|
2016-05-05 22:42:55 +02:00
|
|
|
/* For legacy hierarchies */
|
2015-09-11 16:48:24 +02:00
|
|
|
uint64_t cpu_shares;
|
|
|
|
uint64_t startup_cpu_shares;
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2015-09-11 16:48:24 +02:00
|
|
|
uint64_t blockio_weight;
|
|
|
|
uint64_t startup_blockio_weight;
|
2013-06-27 04:14:27 +02:00
|
|
|
LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights);
|
|
|
|
LIST_HEAD(CGroupBlockIODeviceBandwidth, blockio_device_bandwidths);
|
2013-01-12 04:24:12 +01:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
uint64_t memory_limit;
|
2011-06-30 00:11:25 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
CGroupDevicePolicy device_policy;
|
|
|
|
LIST_HEAD(CGroupDeviceAllow, device_allow);
|
2014-11-05 17:57:23 +01:00
|
|
|
|
2016-05-05 22:42:55 +02:00
|
|
|
/* Common */
|
2015-09-10 12:32:16 +02:00
|
|
|
uint64_t tasks_max;
|
2015-09-11 16:48:24 +02:00
|
|
|
|
|
|
|
bool delegate;
|
2017-11-09 15:29:34 +01:00
|
|
|
CGroupMask delegate_controllers;
|
2013-06-27 04:14:27 +02:00
|
|
|
};
|
2011-06-30 00:11:25 +02:00
|
|
|
|
2017-09-05 19:27:53 +02:00
|
|
|
/* Used when querying IP accounting data */
|
|
|
|
typedef enum CGroupIPAccountingMetric {
|
|
|
|
CGROUP_IP_INGRESS_BYTES,
|
|
|
|
CGROUP_IP_INGRESS_PACKETS,
|
|
|
|
CGROUP_IP_EGRESS_BYTES,
|
|
|
|
CGROUP_IP_EGRESS_PACKETS,
|
|
|
|
_CGROUP_IP_ACCOUNTING_METRIC_MAX,
|
|
|
|
_CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1,
|
|
|
|
} CGroupIPAccountingMetric;
|
|
|
|
|
2015-11-18 22:46:33 +01:00
|
|
|
#include "unit.h"
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
void cgroup_context_init(CGroupContext *c);
|
|
|
|
void cgroup_context_done(CGroupContext *c);
|
|
|
|
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix);
|
2014-02-14 19:11:07 +01:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
CGroupMask cgroup_context_get_mask(CGroupContext *c);
|
2010-07-10 17:34:42 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
|
2016-05-05 22:42:55 +02:00
|
|
|
void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w);
|
|
|
|
void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l);
|
2013-06-27 04:14:27 +02:00
|
|
|
void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
|
|
|
|
void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);
|
2010-03-31 16:29:55 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
CGroupMask unit_get_own_mask(Unit *u);
|
2017-11-09 15:29:34 +01:00
|
|
|
CGroupMask unit_get_delegate_mask(Unit *u);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
CGroupMask unit_get_members_mask(Unit *u);
|
2017-11-09 15:29:34 +01:00
|
|
|
CGroupMask unit_get_siblings_mask(Unit *u);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
CGroupMask unit_get_subtree_mask(Unit *u);
|
|
|
|
|
|
|
|
CGroupMask unit_get_target_mask(Unit *u);
|
|
|
|
CGroupMask unit_get_enable_mask(Unit *u);
|
2014-02-14 19:11:07 +01:00
|
|
|
|
2017-09-05 19:27:53 +02:00
|
|
|
bool unit_get_needs_bpf(Unit *u);
|
|
|
|
|
2014-02-14 19:11:07 +01:00
|
|
|
void unit_update_cgroup_members_masks(Unit *u);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
|
|
|
char *unit_default_cgroup_path(Unit *u);
|
|
|
|
int unit_set_cgroup_path(Unit *u, const char *path);
|
2017-11-24 22:02:22 +01:00
|
|
|
int unit_pick_cgroup_path(Unit *u);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
|
2013-06-30 23:55:36 +02:00
|
|
|
int unit_realize_cgroup(Unit *u);
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
void unit_release_cgroup(Unit *u);
|
|
|
|
void unit_prune_cgroup(Unit *u);
|
|
|
|
int unit_watch_cgroup(Unit *u);
|
|
|
|
|
2017-09-26 22:43:08 +02:00
|
|
|
void unit_add_to_cgroup_empty_queue(Unit *u);
|
|
|
|
|
2014-12-10 22:06:44 +01:00
|
|
|
int unit_attach_pids_to_cgroup(Unit *u);
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
int manager_setup_cgroup(Manager *m);
|
|
|
|
void manager_shutdown_cgroup(Manager *m, bool delete);
|
2010-04-18 03:04:54 +02:00
|
|
|
|
2017-09-26 22:15:02 +02:00
|
|
|
unsigned manager_dispatch_cgroup_realize_queue(Manager *m);
|
2010-10-27 03:16:49 +02:00
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
|
2015-09-03 14:57:44 +02:00
|
|
|
Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid);
|
2013-06-27 04:14:27 +02:00
|
|
|
Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
|
2010-03-31 16:29:55 +02:00
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
int unit_search_main_pid(Unit *u, pid_t *ret);
|
|
|
|
int unit_watch_all_pids(Unit *u);
|
2010-03-31 16:29:55 +02:00
|
|
|
|
2018-01-12 13:06:48 +01:00
|
|
|
int unit_synthesize_cgroup_empty_event(Unit *u);
|
|
|
|
|
2015-03-01 16:24:19 +01:00
|
|
|
int unit_get_memory_current(Unit *u, uint64_t *ret);
|
2015-09-10 12:32:16 +02:00
|
|
|
int unit_get_tasks_current(Unit *u, uint64_t *ret);
|
2015-03-01 16:24:19 +01:00
|
|
|
int unit_get_cpu_usage(Unit *u, nsec_t *ret);
|
2017-09-05 19:27:53 +02:00
|
|
|
int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
|
|
|
|
|
|
|
|
int unit_reset_cpu_accounting(Unit *u);
|
|
|
|
int unit_reset_ip_accounting(Unit *u);
|
2015-03-01 16:24:19 +01:00
|
|
|
|
2017-09-27 17:54:06 +02:00
|
|
|
#define UNIT_CGROUP_BOOL(u, name) \
|
|
|
|
({ \
|
|
|
|
CGroupContext *cc = unit_get_cgroup_context(u); \
|
|
|
|
cc ? cc->name : false; \
|
|
|
|
})
|
2015-09-01 17:25:59 +02:00
|
|
|
|
2018-01-17 18:41:42 +01:00
|
|
|
bool unit_has_root_cgroup(Unit *u);
|
|
|
|
|
core: unified cgroup hierarchy support
This patch set adds full support the new unified cgroup hierarchy logic
of modern kernels.
A new kernel command line option "systemd.unified_cgroup_hierarchy=1" is
added. If specified the unified hierarchy is mounted to /sys/fs/cgroup
instead of a tmpfs. No further hierarchies are mounted. The kernel
command line option defaults to off. We can turn it on by default as
soon as the kernel's APIs regarding this are stabilized (but even then
downstream distros might want to turn this off, as this will break any
tools that access cgroupfs directly).
It is possibly to choose for each boot individually whether the unified
or the legacy hierarchy is used. nspawn will by default provide the
legacy hierarchy to containers if the host is using it, and the unified
otherwise. However it is possible to run containers with the unified
hierarchy on a legacy host and vice versa, by setting the
$UNIFIED_CGROUP_HIERARCHY environment variable for nspawn to 1 or 0,
respectively.
The unified hierarchy provides reliable cgroup empty notifications for
the first time, via inotify. To make use of this we maintain one
manager-wide inotify fd, and each cgroup to it.
This patch also removes cg_delete() which is unused now.
On kernel 4.2 only the "memory" controller is compatible with the
unified hierarchy, hence that's the only controller systemd exposes when
booted in unified heirarchy mode.
This introduces a new enum for enumerating supported controllers, plus a
related enum for the mask bits mapping to it. The core is changed to
make use of this everywhere.
This moves PID 1 into a new "init.scope" implicit scope unit in the root
slice. This is necessary since on the unified hierarchy cgroups may
either contain subgroups or processes but not both. PID 1 hence has to
move out of the root cgroup (strictly speaking the root cgroup is the
only one where processes and subgroups are still allowed, but in order
to support containers nicey, we move PID 1 into the new scope in all
cases.) This new unit is also used on legacy hierarchy setups. It's
actually pretty useful on all systems, as it can then be used to filter
journal messages coming from PID 1, and so on.
The root slice ("-.slice") is now implicitly created and started (and
does not require a unit file on disk anymore), since
that's where "init.scope" is located and the slice needs to be started
before the scope can.
To check whether we are in unified or legacy hierarchy mode we use
statfs() on /sys/fs/cgroup. If the .f_type field reports tmpfs we are in
legacy mode, if it reports cgroupfs we are in unified mode.
This patch set carefuly makes sure that cgls and cgtop continue to work
as desired.
When invoking nspawn as a service it will implicitly create two
subcgroups in the cgroup it is using, one to move the nspawn process
into, the other to move the actual container processes into. This is
done because of the requirement that cgroups may either contain
processes or other subgroups.
2015-09-01 19:22:36 +02:00
|
|
|
int manager_notify_cgroup_empty(Manager *m, const char *group);
|
|
|
|
|
2015-09-11 18:21:53 +02:00
|
|
|
void unit_invalidate_cgroup(Unit *u, CGroupMask m);
|
2017-09-05 19:27:53 +02:00
|
|
|
void unit_invalidate_cgroup_bpf(Unit *u);
|
2015-09-11 18:21:53 +02:00
|
|
|
|
|
|
|
void manager_invalidate_startup_units(Manager *m);
|
|
|
|
|
2013-06-27 04:14:27 +02:00
|
|
|
const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
|
|
|
|
CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
|
2018-02-06 11:57:35 +01:00
|
|
|
|
|
|
|
bool unit_cgroup_delegate(Unit *u);
|