core: add IODeviceLatencyTargetSec

This adds support for the following proposed latency based IO control
mechanism.

  https://lkml.org/lkml/2018/6/5/428
This commit is contained in:
Tejun Heo 2018-06-13 14:16:35 -07:00 committed by Zbigniew Jędrzejewski-Szmek
parent 71d1700afa
commit 6ae4283cb1
9 changed files with 320 additions and 12 deletions

View File

@ -393,11 +393,11 @@
<listitem>
<para>Set the per-device overall block I/O weight for the executed processes, if the unified control group
hierarchy is used on the system. Takes a space-separated pair of a file path and a weight value to specify
the device specific weight value, between 1 and 10000. (Example: "/dev/sda 1000"). The file path may be
specified as path to a block device node or as any other file, in which case the backing block device of the
file system of the file is determined. This controls the <literal>io.weight</literal> control group
attribute, which defaults to 100. Use this option multiple times to set weights for multiple devices. For
details about this control group attribute, see <ulink
the device specific weight value, between 1 and 10000. (Example: <literal>/dev/sda 1000</literal>). The file
path may be specified as path to a block device node or as any other file, in which case the backing block
device of the file system of the file is determined. This controls the <literal>io.weight</literal> control
group attribute, which defaults to 100. Use this option multiple times to set weights for multiple devices.
For details about this control group attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
<para>This setting replaces <varname>BlockIODeviceWeight=</varname> and disables settings prefixed with
@ -452,6 +452,25 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>IODeviceLatencyTargetSec=<replaceable>device</replaceable> <replaceable>target</replaceable></varname></term>
<listitem>
<para>Set the per-device average target I/O latency for the executed processes, if the unified control group
hierarchy is used on the system. Takes a file path and a timespan separated by a space to specify
the device specific latency target. (Example: "/dev/sda 25ms"). The file path may be specified
as path to a block device node or as any other file, in which case the backing block device of the file
system of the file is determined. This controls the <literal>io.latency</literal> control group
attribute. Use this option multiple times to set latency target for multiple devices. For details about this
control group attribute, see <ulink
url="https://www.kernel.org/doc/Documentation/cgroup-v2.txt">cgroup-v2.txt</ulink>.</para>
<para>Implies <literal>IOAccounting=true</literal>.</para>
<para>These settings are supported only if the unified control group hierarchy is used.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>IPAccounting=</varname></term>

View File

@ -114,6 +114,15 @@ void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight
free(w);
}
void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l) {
assert(c);
assert(l);
LIST_REMOVE(device_latencies, c->io_device_latencies, l);
free(l->path);
free(l);
}
void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l) {
assert(c);
assert(l);
@ -147,6 +156,9 @@ void cgroup_context_done(CGroupContext *c) {
while (c->io_device_weights)
cgroup_context_free_io_device_weight(c, c->io_device_weights);
while (c->io_device_latencies)
cgroup_context_free_io_device_latency(c, c->io_device_latencies);
while (c->io_device_limits)
cgroup_context_free_io_device_limit(c, c->io_device_limits);
@ -166,6 +178,7 @@ void cgroup_context_done(CGroupContext *c) {
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupIODeviceLimit *il;
CGroupIODeviceWeight *iw;
CGroupIODeviceLatency *l;
CGroupBlockIODeviceBandwidth *b;
CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a;
@ -246,11 +259,18 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
LIST_FOREACH(device_weights, iw, c->io_device_weights)
fprintf(f,
"%sIODeviceWeight=%s %" PRIu64,
"%sIODeviceWeight=%s %" PRIu64 "\n",
prefix,
iw->path,
iw->weight);
LIST_FOREACH(device_latencies, l, c->io_device_latencies)
fprintf(f,
"%sIODeviceLatencyTargetSec=%s %s\n",
prefix,
l->path,
format_timespan(u, sizeof(u), l->target_usec, 1));
LIST_FOREACH(device_limits, il, c->io_device_limits) {
char buf[FORMAT_BYTES_MAX];
CGroupIOLimitType type;
@ -577,6 +597,7 @@ static bool cgroup_context_has_io_config(CGroupContext *c) {
c->io_weight != CGROUP_WEIGHT_INVALID ||
c->startup_io_weight != CGROUP_WEIGHT_INVALID ||
c->io_device_weights ||
c->io_device_latencies ||
c->io_device_limits;
}
@ -650,6 +671,26 @@ static void cgroup_apply_blkio_device_weight(Unit *u, const char *dev_path, uint
"Failed to set blkio.weight_device: %m");
}
static void cgroup_apply_io_device_latency(Unit *u, const char *dev_path, usec_t target) {
char buf[DECIMAL_STR_MAX(dev_t)*2+2+7+DECIMAL_STR_MAX(uint64_t)+1];
dev_t dev;
int r;
r = lookup_block_device(dev_path, &dev);
if (r < 0)
return;
if (target != USEC_INFINITY)
xsprintf(buf, "%u:%u target=%" PRIu64 "\n", major(dev), minor(dev), target);
else
xsprintf(buf, "%u:%u target=max\n", major(dev), minor(dev));
r = cg_set_attribute("io", u->cgroup_path, "io.latency", buf);
if (r < 0)
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set io.latency on cgroup %s: %m", u->cgroup_path);
}
static void cgroup_apply_io_device_limit(Unit *u, const char *dev_path, uint64_t *limits) {
char limit_bufs[_CGROUP_IO_LIMIT_TYPE_MAX][DECIMAL_STR_MAX(uint64_t)];
char buf[DECIMAL_STR_MAX(dev_t)*2+2+(6+DECIMAL_STR_MAX(uint64_t)+1)*4];
@ -826,13 +867,11 @@ static void cgroup_context_apply(
if (has_io) {
CGroupIODeviceWeight *w;
/* FIXME: no way to reset this list */
LIST_FOREACH(device_weights, w, c->io_device_weights)
cgroup_apply_io_device_weight(u, w->path, w->weight);
} else if (has_blockio) {
CGroupBlockIODeviceWeight *w;
/* FIXME: no way to reset this list */
LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
weight = cgroup_weight_blkio_to_io(w->weight);
@ -842,9 +881,15 @@ static void cgroup_context_apply(
cgroup_apply_io_device_weight(u, w->path, weight);
}
}
if (has_io) {
CGroupIODeviceLatency *l;
LIST_FOREACH(device_latencies, l, c->io_device_latencies)
cgroup_apply_io_device_latency(u, l->path, l->target_usec);
}
}
/* Apply limits and free ones without config. */
if (has_io) {
CGroupIODeviceLimit *l;
@ -901,7 +946,6 @@ static void cgroup_context_apply(
if (has_io) {
CGroupIODeviceWeight *w;
/* FIXME: no way to reset this list */
LIST_FOREACH(device_weights, w, c->io_device_weights) {
weight = cgroup_weight_io_to_blkio(w->weight);
@ -913,13 +957,11 @@ static void cgroup_context_apply(
} else if (has_blockio) {
CGroupBlockIODeviceWeight *w;
/* FIXME: no way to reset this list */
LIST_FOREACH(device_weights, w, c->blockio_device_weights)
cgroup_apply_blkio_device_weight(u, w->path, w->weight);
}
}
/* Apply limits and free ones without config. */
if (has_io) {
CGroupIODeviceLimit *l;

View File

@ -12,6 +12,7 @@ typedef struct CGroupContext CGroupContext;
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
typedef struct CGroupIODeviceWeight CGroupIODeviceWeight;
typedef struct CGroupIODeviceLimit CGroupIODeviceLimit;
typedef struct CGroupIODeviceLatency CGroupIODeviceLatency;
typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
@ -51,6 +52,12 @@ struct CGroupIODeviceLimit {
uint64_t limits[_CGROUP_IO_LIMIT_TYPE_MAX];
};
struct CGroupIODeviceLatency {
LIST_FIELDS(CGroupIODeviceLatency, device_latencies);
char *path;
usec_t target_usec;
};
struct CGroupBlockIODeviceWeight {
LIST_FIELDS(CGroupBlockIODeviceWeight, device_weights);
char *path;
@ -81,6 +88,7 @@ struct CGroupContext {
uint64_t startup_io_weight;
LIST_HEAD(CGroupIODeviceWeight, io_device_weights);
LIST_HEAD(CGroupIODeviceLimit, io_device_limits);
LIST_HEAD(CGroupIODeviceLatency, io_device_latencies);
uint64_t memory_min;
uint64_t memory_low;
@ -134,6 +142,7 @@ CGroupMask cgroup_context_get_mask(CGroupContext *c);
void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a);
void cgroup_context_free_io_device_weight(CGroupContext *c, CGroupIODeviceWeight *w);
void cgroup_context_free_io_device_limit(CGroupContext *c, CGroupIODeviceLimit *l);
void cgroup_context_free_io_device_latency(CGroupContext *c, CGroupIODeviceLatency *l);
void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w);
void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b);

View File

@ -119,6 +119,36 @@ static int property_get_io_device_limits(
return sd_bus_message_close_container(reply);
}
static int property_get_io_device_latency(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
CGroupContext *c = userdata;
CGroupIODeviceLatency *l;
int r;
assert(bus);
assert(reply);
assert(c);
r = sd_bus_message_open_container(reply, 'a', "(st)");
if (r < 0)
return r;
LIST_FOREACH(device_latencies, l, c->io_device_latencies) {
r = sd_bus_message_append(reply, "(st)", l->path, l->target_usec);
if (r < 0)
return r;
}
return sd_bus_message_close_container(reply);
}
static int property_get_blockio_device_weight(
sd_bus *bus,
const char *path,
@ -291,6 +321,7 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("IOWriteBandwidthMax", "a(st)", property_get_io_device_limits, 0, 0),
SD_BUS_PROPERTY("IOReadIOPSMax", "a(st)", property_get_io_device_limits, 0, 0),
SD_BUS_PROPERTY("IOWriteIOPSMax", "a(st)", property_get_io_device_limits, 0, 0),
SD_BUS_PROPERTY("IODeviceLatencyTargetUSec", "a(st)", property_get_io_device_latency, 0, 0),
SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0),
SD_BUS_PROPERTY("BlockIOWeight", "t", NULL, offsetof(CGroupContext, blockio_weight), 0),
SD_BUS_PROPERTY("StartupBlockIOWeight", "t", NULL, offsetof(CGroupContext, startup_blockio_weight), 0),
@ -846,6 +877,86 @@ int bus_cgroup_set_property(
return 1;
} else if (streq(name, "IODeviceLatencyTargetUSec")) {
const char *path;
uint64_t target;
unsigned n = 0;
r = sd_bus_message_enter_container(message, 'a', "(st)");
if (r < 0)
return r;
while ((r = sd_bus_message_read(message, "(st)", &path, &target)) > 0) {
if (!path_is_normalized(path))
return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "Path '%s' specified in %s= is not normalized.", name, path);
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
CGroupIODeviceLatency *a = NULL, *b;
LIST_FOREACH(device_latencies, b, c->io_device_latencies) {
if (path_equal(b->path, path)) {
a = b;
break;
}
}
if (!a) {
a = new0(CGroupIODeviceLatency, 1);
if (!a)
return -ENOMEM;
a->path = strdup(path);
if (!a->path) {
free(a);
return -ENOMEM;
}
LIST_PREPEND(device_latencies, c->io_device_latencies, a);
}
a->target_usec = target;
}
n++;
}
r = sd_bus_message_exit_container(message);
if (r < 0)
return r;
if (!UNIT_WRITE_FLAGS_NOOP(flags)) {
_cleanup_free_ char *buf = NULL;
_cleanup_fclose_ FILE *f = NULL;
char ts[FORMAT_TIMESPAN_MAX];
CGroupIODeviceLatency *a;
size_t size = 0;
if (n == 0) {
while (c->io_device_latencies)
cgroup_context_free_io_device_latency(c, c->io_device_latencies);
}
unit_invalidate_cgroup(u, CGROUP_MASK_IO);
f = open_memstream(&buf, &size);
if (!f)
return -ENOMEM;
(void) __fsetlocking(f, FSETLOCKING_BYCALLER);
fputs("IODeviceLatencyTargetSec=\n", f);
LIST_FOREACH(device_latencies, a, c->io_device_latencies)
fprintf(f, "IODeviceLatencyTargetSec=%s %s\n",
a->path, format_timespan(ts, sizeof(ts), a->target_usec, 1));
r = fflush_and_check(f);
if (r < 0)
return r;
unit_write_setting(u, flags, name, buf);
}
return 1;
} else if (STR_IN_SET(name, "BlockIOReadBandwidth", "BlockIOWriteBandwidth")) {
const char *path;
bool read = true;

View File

@ -179,6 +179,7 @@ $1.IOReadBandwidthMax, config_parse_io_limit, 0,
$1.IOWriteBandwidthMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
$1.IOReadIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
$1.IOWriteIOPSMax, config_parse_io_limit, 0, offsetof($1, cgroup_context)
$1.IODeviceLatencyTargetSec, config_parse_io_device_latency, 0, offsetof($1, cgroup_context)
$1.BlockIOAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.blockio_accounting)
$1.BlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.blockio_weight)
$1.StartupBlockIOWeight, config_parse_blockio_weight, 0, offsetof($1, cgroup_context.startup_blockio_weight)

View File

@ -3337,6 +3337,77 @@ int config_parse_io_device_weight(
return 0;
}
int config_parse_io_device_latency(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
_cleanup_free_ char *path = NULL, *resolved = NULL;
CGroupIODeviceLatency *l;
CGroupContext *c = data;
const char *p = rvalue;
usec_t usec;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
if (isempty(rvalue)) {
while (c->io_device_latencies)
cgroup_context_free_io_device_latency(c, c->io_device_latencies);
return 0;
}
r = extract_first_word(&p, &path, NULL, EXTRACT_QUOTES);
if (r == -ENOMEM)
return log_oom();
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r,
"Invalid syntax, ignoring: %s", rvalue);
return 0;
}
if (r == 0 || isempty(p)) {
log_syntax(unit, LOG_WARNING, filename, line, 0,
"Failed to extract device path and latency from '%s', ignoring.", rvalue);
return 0;
}
r = unit_full_printf(userdata, path, &resolved);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r,
"Failed to resolve unit specifiers in '%s', ignoring: %m", path);
return 0;
}
r = path_simplify_and_warn(resolved, 0, unit, filename, line, lvalue);
if (r < 0)
return 0;
if (parse_sec(p, &usec) < 0) {
log_syntax(unit, LOG_ERR, filename, line, 0, "Failed to parse timer value, ignoring: %s", p);
return 0;
}
l = new0(CGroupIODeviceLatency, 1);
if (!l)
return log_oom();
l->path = TAKE_PTR(resolved);
l->target_usec = usec;
LIST_PREPEND(device_latencies, c->io_device_latencies, l);
return 0;
}
int config_parse_io_limit(
const char *unit,
const char *filename,
@ -4522,6 +4593,7 @@ void unit_dump_config_items(FILE *f) {
{ config_parse_device_policy, "POLICY" },
{ config_parse_io_limit, "LIMIT" },
{ config_parse_io_device_weight, "DEVICEWEIGHT" },
{ config_parse_io_device_latency, "DEVICELATENCY" },
{ config_parse_blockio_bandwidth, "BANDWIDTH" },
{ config_parse_blockio_weight, "WEIGHT" },
{ config_parse_blockio_device_weight, "DEVICEWEIGHT" },

View File

@ -69,6 +69,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_tasks_max);
CONFIG_PARSER_PROTOTYPE(config_parse_delegate);
CONFIG_PARSER_PROTOTYPE(config_parse_device_policy);
CONFIG_PARSER_PROTOTYPE(config_parse_device_allow);
CONFIG_PARSER_PROTOTYPE(config_parse_io_device_latency);
CONFIG_PARSER_PROTOTYPE(config_parse_io_device_weight);
CONFIG_PARSER_PROTOTYPE(config_parse_io_limit);
CONFIG_PARSER_PROTOTYPE(config_parse_blockio_weight);

View File

@ -552,6 +552,37 @@ static int bus_append_cgroup_property(sd_bus_message *m, const char *field, cons
return 1;
}
if (streq(field, "IODeviceLatencyTargetSec")) {
const char *field_usec = "IODeviceLatencyTargetUSec";
if (isempty(eq))
r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", USEC_INFINITY);
else {
const char *path, *target, *e;
usec_t usec;
e = strchr(eq, ' ');
if (!e) {
log_error("Failed to parse %s value %s.", field, eq);
return -EINVAL;
}
path = strndupa(eq, e - eq);
target = e+1;
r = parse_sec(target, &usec);
if (r < 0)
return log_error_errno(r, "Failed to parse %s value %s: %m", field, target);
r = sd_bus_message_append(m, "(sv)", field_usec, "a(st)", 1, path, usec);
}
if (r < 0)
return bus_log_create_error(r);
return 1;
}
if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) {
unsigned char prefixlen;
union in_addr_union prefix = {};

View File

@ -4899,6 +4899,28 @@ static int print_property(const char *name, sd_bus_message *m, bool value, bool
return 1;
} else if (contents[0] == SD_BUS_TYPE_STRUCT_BEGIN &&
streq(name, "IODeviceLatencyTargetUSec")) {
char ts[FORMAT_TIMESPAN_MAX];
const char *path;
uint64_t target;
r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "(st)");
if (r < 0)
return bus_log_parse_error(r);
while ((r = sd_bus_message_read(m, "(st)", &path, &target)) > 0)
print_prop(name, "%s %s", strna(path),
format_timespan(ts, sizeof(ts), target, 1));
if (r < 0)
return bus_log_parse_error(r);
r = sd_bus_message_exit_container(m);
if (r < 0)
return bus_log_parse_error(r);
return 1;
} else if (contents[0] == SD_BUS_TYPE_BYTE && streq(name, "StandardInputData")) {
_cleanup_free_ char *h = NULL;
const void *p;