test-cpu-set-util.c: fix typo in comment (#6916)

This commit is contained in:
Jan Synacek 2017-09-26 16:07:34 +02:00 committed by Zbigniew Jędrzejewski-Szmek
commit 0cde65e263
76 changed files with 4399 additions and 581 deletions

View File

@ -64,3 +64,17 @@ installed systemd tests:
* `$SYSTEMD_TEST_DATA` — override the location of test data. This is useful if
a test executable is moved to an arbitrary location.
nss-systemd:
* `$SYSTEMD_NSS_BYPASS_SYNTHETIC=1` — if set, `nss-systemd` won't synthesize
user/group records for the `root` and `nobody` users if they are missing from
`/etc/passwd`.
* `$SYSTEMD_NSS_DYNAMIC_BYPASS=1` — if set, `nss-systemd` won't return
user/group records for dynamically registered service users (i.e. users
registered through `DynamicUser=1`).
* `$SYSTEMD_NSS_BYPASS_BUS=1` — if set, `nss-systemd` won't use D-Bus to do
dynamic user lookups. This is primarily useful to make `nss-systemd` work
safely from within `dbus-daemon`.

9
TODO
View File

@ -26,6 +26,15 @@ Features:
* replace all uses of fgets() + LINE_MAX by read_line()
* fix logging in execute.c: extend log.c to have an optional mode where
log_open() is implicitly done before each log line and log_close() right
after. This way we don't have open fds around but logs will still
work. Because it is slow this mode should used exclusively in the execute.c
case.
* set IPAddressDeny=any on all services that shouldn't do networking (possibly
combined with IPAddressAllow=localhost).
* dissect: when we discover squashfs, don't claim we had a "writable" partition
in systemd-dissect

View File

@ -319,17 +319,14 @@
<term><varname>DefaultBlockIOAccounting=</varname></term>
<term><varname>DefaultMemoryAccounting=</varname></term>
<term><varname>DefaultTasksAccounting=</varname></term>
<term><varname>DefaultIPAccounting=</varname></term>
<listitem><para>Configure the default resource accounting
settings, as configured per-unit by
<varname>CPUAccounting=</varname>,
<varname>BlockIOAccounting=</varname>,
<varname>MemoryAccounting=</varname> and
<varname>TasksAccounting=</varname>. See
<listitem><para>Configure the default resource accounting settings, as configured per-unit by
<varname>CPUAccounting=</varname>, <varname>BlockIOAccounting=</varname>, <varname>MemoryAccounting=</varname>,
<varname>TasksAccounting=</varname> and <varname>IPAccounting=</varname>. See
<citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details on the per-unit
settings. <varname>DefaultTasksAccounting=</varname> defaults
to on, the other three settings to off.</para></listitem>
for details on the per-unit settings. <varname>DefaultTasksAccounting=</varname> defaults to on, the other
four settings to off.</para></listitem>
</varlistentry>
<varlistentry>

View File

@ -480,6 +480,123 @@
</listitem>
</varlistentry>
<varlistentry>
<term><varname>IPAccounting=</varname></term>
<listitem>
<para>Takes a boolean argument. If true, turns on IPv4 and IPv6 network traffic accounting for packets sent
or received by the unit. When this option is turned on, all IPv4 and IPv6 sockets created by any process of
the unit are accounted for. When this option is used in socket units, it applies to all IPv4 and IPv6 sockets
associated with it (including both listening and connection sockets where this applies). Note that for
socket-activated services, this configuration setting and the accounting data of the service unit and the
socket unit are kept separate, and displayed separately. No propagation of the setting and the collected
statistics is done, in either direction. Moreover, any traffic sent or received on any of the socket unit's
sockets is accounted to the socket unit — and never to the service unit it might have activated, even if the
socket is used by it. Note that IP accounting is currently not supported for slice units, and enabling this
option for them has no effect. The system default for this setting may be controlled with
<varname>DefaultIPAccounting=</varname> in
<citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>IPAddressAllow=<replaceable>ADDDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
<term><varname>IPAddressDeny=<replaceable>ADDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
<listitem>
<para>Turn on address range network traffic filtering for packets sent and received over AF_INET and AF_INET6
sockets. Both directives take a space separated list of IPv4 or IPv6 addresses, each optionally suffixed
with an address prefix length (separated by a <literal>/</literal> character). If the latter is omitted, the
address is considered a host address, i.e. the prefix covers the whole address (32 for IPv4, 128 for IPv6).
</para>
<para>The access lists configured with this option are applied to all sockets created by processes of this
unit (or in the case of socket units, associated with it). The lists are implicitly combined with any lists
configured for any of the parent slice units this unit might be a member of. By default all access lists are
empty. When configured the lists are enforced as follows:</para>
<itemizedlist>
<listitem><para>Access will be granted in case its destination/source address matches any entry in the
<varname>IPAddressAllow=</varname> setting.</para></listitem>
<listitem><para>Otherwise, access will be denied in case its destination/source address matches any entry
in the <varname>IPAddressDeny=</varname> setting.</para></listitem>
<listitem><para>Otherwise, access will be granted.</para></listitem>
</itemizedlist>
<para>In order to implement a whitelisting IP firewall, it is recommended to use a
<varname>IPAddressDeny=</varname><constant>any</constant> setting on an upper-level slice unit (such as the
root slice <filename>-.slice</filename> or the slice containing all system services
<filename>system.slice</filename> see
<citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
details on these slice units), plus individual per-service <varname>IPAddressAllow=</varname> lines
permitting network access to relevant services, and only them.</para>
<para>Note that for socket-activated services, the IP access list configured on the socket unit applies to
all sockets associated with it directly, but not to any sockets created by the ultimately activated services
for it. Conversely, the IP access list configured for the service is not applied to any sockets passed into
the service via socket activation. Thus, it is usually a good idea, to replicate the IP access lists on both
the socket and the service unit, however it often makes sense to maintain one list more open and the other
one more restricted, depending on the usecase.</para>
<para>If these settings are used multiple times in the same unit the specified lists are combined. If an
empty string is assigned to these settings the specific access list is reset and all previous settings undone.</para>
<para>In place of explicit IPv4 or IPv6 address and prefix length specifications a small set of symbolic
names may be used. The following names are defined:</para>
<table>
<title>Special address/network names</title>
<tgroup cols='3'>
<colspec colname='name'/>
<colspec colname='definition'/>
<colspec colname='meaning'/>
<thead>
<row>
<entry>Symbolic Name</entry>
<entry>Definition</entry>
<entry>Meaning</entry>
</row>
</thead>
<tbody>
<row>
<entry><constant>any</constant></entry>
<entry>0.0.0.0/0 ::/0</entry>
<entry>Any host</entry>
</row>
<row>
<entry><constant>localhost</constant></entry>
<entry>127.0.0.0/8 ::1/128</entry>
<entry>All addresses on the local loopback</entry>
</row>
<row>
<entry><constant>link-local</constant></entry>
<entry>169.254.0.0/16 fe80::/64</entry>
<entry>All link-local IP addresses</entry>
</row>
<row>
<entry><constant>multicast</constant></entry>
<entry>224.0.0.0/4 ff00::/8</entry>
<entry>All IP multicasting addresses</entry>
</row>
</tbody>
</tgroup>
</table>
<para>Note that these settings might not be supported on some systems (for example if eBPF control group
support is not enabled in the underlying kernel or container manager). These settings will have no effect in
that case. If compatibility with such systems is desired it is hence recommended to not exclusively rely on
them for IP security.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>DeviceAllow=</varname></term>

View File

@ -53,22 +53,15 @@
<refsect1>
<title>Description</title>
<para>A unit configuration file whose name ends in
<literal>.slice</literal> encodes information about a slice which
is a concept for hierarchically managing resources of a group of
processes. This management is performed by creating a node in the
Linux Control Group (cgroup) tree. Units that manage processes
(primarily scope and service units) may be assigned to a specific
slice. For each slice, certain resource limits may be set that
apply to all processes of all units contained in that
slice. Slices are organized hierarchically in a tree. The name of
the slice encodes the location in the tree. The name consists of a
dash-separated series of names, which describes the path to the
slice from the root slice. The root slice is named,
<filename>-.slice</filename>. Example:
<filename>foo-bar.slice</filename> is a slice that is located
within <filename>foo.slice</filename>, which in turn is located in
the root slice <filename>-.slice</filename>.
<para>A unit configuration file whose name ends in <literal>.slice</literal> encodes information about a slice
unit. A slice unit is a concept for hierarchically managing resources of a group of processes. This management is
performed by creating a node in the Linux Control Group (cgroup) tree. Units that manage processes (primarily scope
and service units) may be assigned to a specific slice. For each slice, certain resource limits may be set that
apply to all processes of all units contained in that slice. Slices are organized hierarchically in a tree. The
name of the slice encodes the location in the tree. The name consists of a dash-separated series of names, which
describes the path to the slice from the root slice. The root slice is named <filename>-.slice</filename>. Example:
<filename>foo-bar.slice</filename> is a slice that is located within <filename>foo.slice</filename>, which in turn
is located in the root slice <filename>-.slice</filename>.
</para>
<para>Note that slice units cannot be templated, nor is possible to add multiple names to a slice unit by creating

View File

@ -48,8 +48,7 @@
</refnamediv>
<refsynopsisdiv><para>
<!-- sort alphabetically, targets first -->
<filename>basic.target</filename>,
<!-- sort alphabetically, targets first --><filename>basic.target</filename>,
<filename>bluetooth.target</filename>,
<filename>cryptsetup-pre.target</filename>,
<filename>cryptsetup.target</filename>,
@ -107,15 +106,15 @@
<filename>time-sync.target</filename>,
<filename>timers.target</filename>,
<filename>umount.target</filename>,
<!-- slices -->
<filename>-.slice</filename>,
<!-- slices --><filename>-.slice</filename>,
<filename>system.slice</filename>,
<filename>user.slice</filename>,
<filename>machine.slice</filename>,
<!-- the rest -->
<!-- the rest --><filename>-.mount</filename>,
<filename>dbus.service</filename>,
<filename>dbus.socket</filename>,
<filename>display-manager.service</filename>,
<filename>init.scope</filename>,
<filename>system-update-cleanup.service</filename>
</para></refsynopsisdiv>
@ -131,6 +130,15 @@
<title>Special System Units</title>
<variablelist>
<varlistentry>
<term><filename>-.mount</filename></term>
<listitem>
<para>The root mount point, i.e. the mount unit for the <filename>/</filename> path. This unit is
unconditionally active, during the entire time the system is up, as this mount point is where the basic
userspace is running from.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><filename>basic.target</filename></term>
<listitem>
@ -326,6 +334,13 @@
directly.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><filename>init.scope</filename></term>
<listitem>
<para>This scope unit is where the system and service manager (PID 1) itself resides. It is active as long as
the system is running.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><filename>initrd-fs.target</filename></term>
<listitem>
@ -1009,17 +1024,17 @@ PartOf=graphical-session.target
<refsect1>
<title>Special Slice Units</title>
<para>There are four <literal>.slice</literal> units which form
the basis of the hierarchy for assignment of resources for
services, users, and virtual machines or containers.</para>
<para>There are four <literal>.slice</literal> units which form the basis of the hierarchy for assignment of
resources for services, users, and virtual machines or containers. See
<citerefentry><refentrytitle>-.slice</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details about slice
units.</para>
<variablelist>
<varlistentry>
<term><filename>-.slice</filename></term>
<listitem>
<para>The root slice is the root of the hierarchy. It
usually does not contain units directly, but may be used to
set defaults for the whole tree.</para>
<para>The root slice is the root of the slice hierarchy. It usually does not contain units directly, but may
be used to set defaults for the whole tree.</para>
</listitem>
</varlistentry>

View File

@ -443,6 +443,8 @@ foreach ident : [
#include <keyutils.h>'''],
['copy_file_range', '''#include <sys/syscall.h>
#include <unistd.h>'''],
['bpf', '''#include <sys/syscall.h>
#include <unistd.h>'''],
['explicit_bzero' , '''#include <string.h>'''],
]

View File

@ -28,7 +28,7 @@ export LC_CTYPE=C.UTF-8
[ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR"
ninja -C "$BUILDDIR" all
[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test
[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" )
ninja -C "$BUILDDIR" install
mkdir -p "$DESTDIR"/etc

183
src/basic/bpf-program.c Normal file
View File

@ -0,0 +1,183 @@
/***
This file is part of systemd.
Copyright 2016 Daniel Mack
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "alloc-util.h"
#include "bpf-program.h"
#include "fd-util.h"
#include "log.h"
#include "missing.h"
int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
p = new0(BPFProgram, 1);
if (!p)
return log_oom();
p->prog_type = prog_type;
p->kernel_fd = -1;
*ret = p;
p = NULL;
return 0;
}
BPFProgram *bpf_program_unref(BPFProgram *p) {
if (!p)
return NULL;
safe_close(p->kernel_fd);
free(p->instructions);
return mfree(p);
}
int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
assert(p);
if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
return -ENOMEM;
memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
p->n_instructions += count;
return 0;
}
int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
union bpf_attr attr;
assert(p);
if (p->kernel_fd >= 0)
return -EBUSY;
attr = (union bpf_attr) {
.prog_type = p->prog_type,
.insns = PTR_TO_UINT64(p->instructions),
.insn_cnt = p->n_instructions,
.license = PTR_TO_UINT64("GPL"),
.log_buf = PTR_TO_UINT64(log_buf),
.log_level = !!log_buf,
.log_size = log_size,
};
p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
if (p->kernel_fd < 0)
return -errno;
return 0;
}
int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
_cleanup_close_ int fd = -1;
union bpf_attr attr;
assert(p);
assert(type >= 0);
assert(path);
fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
if (fd < 0)
return -errno;
attr = (union bpf_attr) {
.attach_type = type,
.target_fd = fd,
.attach_bpf_fd = p->kernel_fd,
.attach_flags = flags,
};
if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
return -errno;
return 0;
}
int bpf_program_cgroup_detach(int type, const char *path) {
_cleanup_close_ int fd = -1;
union bpf_attr attr;
assert(path);
fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
if (fd < 0)
return -errno;
attr = (union bpf_attr) {
.attach_type = type,
.target_fd = fd,
};
if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
return -errno;
return 0;
}
int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
union bpf_attr attr = {
.map_type = type,
.key_size = key_size,
.value_size = value_size,
.max_entries = max_entries,
.map_flags = flags,
};
int fd;
fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
if (fd < 0)
return -errno;
return fd;
}
int bpf_map_update_element(int fd, const void *key, void *value) {
union bpf_attr attr = {
.map_fd = fd,
.key = PTR_TO_UINT64(key),
.value = PTR_TO_UINT64(value),
};
if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
return -errno;
return 0;
}
int bpf_map_lookup_element(int fd, const void *key, void *value) {
union bpf_attr attr = {
.map_fd = fd,
.key = PTR_TO_UINT64(key),
.value = PTR_TO_UINT64(value),
};
if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
return -errno;
return 0;
}

55
src/basic/bpf-program.h Normal file
View File

@ -0,0 +1,55 @@
#pragma once
/***
This file is part of systemd.
Copyright 2016 Daniel Mack
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
[Except for the stuff copy/pasted from the kernel sources, see below]
***/
#include <linux/bpf.h>
#include <stdint.h>
#include <sys/syscall.h>
#include "list.h"
#include "macro.h"
typedef struct BPFProgram BPFProgram;
struct BPFProgram {
int kernel_fd;
uint32_t prog_type;
size_t n_instructions;
size_t allocated;
struct bpf_insn *instructions;
};
int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
BPFProgram *bpf_program_unref(BPFProgram *p);
int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
int bpf_program_cgroup_detach(int type, const char *path);
int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
int bpf_map_update_element(int fd, const void *key, void *value);
int bpf_map_lookup_element(int fd, const void *key, void *value);
DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);

View File

@ -103,9 +103,12 @@ int cg_read_pid(FILE *f, pid_t *_pid) {
return 1;
}
int cg_read_event(const char *controller, const char *path, const char *event,
char **val)
{
int cg_read_event(
const char *controller,
const char *path,
const char *event,
char **val) {
_cleanup_free_ char *events = NULL, *content = NULL;
char *p, *line;
int r;

View File

@ -308,22 +308,22 @@ int in_addr_from_string(int family, const char *s, union in_addr_union *ret) {
return 0;
}
int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret) {
int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) {
int r;
assert(s);
r = in_addr_from_string(AF_INET, s, ret);
if (r >= 0) {
if (family)
*family = AF_INET;
if (ret_family)
*ret_family = AF_INET;
return 0;
}
r = in_addr_from_string(AF_INET6, s, ret);
if (r >= 0) {
if (family)
*family = AF_INET6;
if (ret_family)
*ret_family = AF_INET6;
return 0;
}
@ -371,13 +371,13 @@ int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_u
return r;
}
unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr) {
unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) {
assert(addr);
return 32 - u32ctz(be32toh(addr->s_addr));
}
struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
assert(addr);
assert(prefixlen <= 32);
@ -390,7 +390,7 @@ struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char
return addr;
}
int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
uint8_t msb_octet = *(uint8_t*) addr;
/* addr may not be aligned, so make sure we only access it byte-wise */
@ -414,18 +414,18 @@ int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixl
return 0;
}
int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
unsigned char prefixlen;
int r;
assert(addr);
assert(mask);
r = in_addr_default_prefixlen(addr, &prefixlen);
r = in4_addr_default_prefixlen(addr, &prefixlen);
if (r < 0)
return r;
in_addr_prefixlen_to_netmask(mask, prefixlen);
in4_addr_prefixlen_to_netmask(mask, prefixlen);
return 0;
}
@ -435,7 +435,7 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
if (family == AF_INET) {
struct in_addr mask;
if (!in_addr_prefixlen_to_netmask(&mask, prefixlen))
if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen))
return -EINVAL;
addr->in.s_addr &= mask.s_addr;
@ -465,10 +465,57 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
return -EAFNOSUPPORT;
}
int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen) {
int in_addr_prefix_covers(int family,
const union in_addr_union *prefix,
unsigned char prefixlen,
const union in_addr_union *address) {
union in_addr_union masked_prefix, masked_address;
int r;
assert(prefix);
assert(address);
masked_prefix = *prefix;
r = in_addr_mask(family, &masked_prefix, prefixlen);
if (r < 0)
return r;
masked_address = *address;
r = in_addr_mask(family, &masked_address, prefixlen);
if (r < 0)
return r;
return in_addr_equal(family, &masked_prefix, &masked_address);
}
int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) {
uint8_t u;
int r;
if (!IN_SET(family, AF_INET, AF_INET6))
return -EAFNOSUPPORT;
r = safe_atou8(p, &u);
if (r < 0)
return r;
if (u > FAMILY_ADDRESS_SIZE(family) * 8)
return -ERANGE;
*ret = u;
return 0;
}
int in_addr_prefix_from_string(
const char *p,
int family,
union in_addr_union *ret_prefix,
unsigned char *ret_prefixlen) {
union in_addr_union buffer;
const char *e, *l;
uint8_t k;
unsigned char k;
int r;
assert(p);
@ -486,23 +533,58 @@ int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *r
if (r < 0)
return r;
k = FAMILY_ADDRESS_SIZE(family) * 8;
if (e) {
uint8_t n;
r = safe_atou8(e + 1, &n);
r = in_addr_parse_prefixlen(family, e+1, &k);
if (r < 0)
return r;
} else
k = FAMILY_ADDRESS_SIZE(family) * 8;
if (n > k)
return -ERANGE;
k = n;
}
*ret_prefix = buffer;
*ret_prefixlen = k;
if (ret_prefix)
*ret_prefix = buffer;
if (ret_prefixlen)
*ret_prefixlen = k;
return 0;
}
int in_addr_prefix_from_string_auto(
const char *p,
int *ret_family,
union in_addr_union *ret_prefix,
unsigned char *ret_prefixlen) {
union in_addr_union buffer;
const char *e, *l;
unsigned char k;
int family, r;
assert(p);
e = strchr(p, '/');
if (e)
l = strndupa(p, e - p);
else
l = p;
r = in_addr_from_string_auto(l, &family, &buffer);
if (r < 0)
return r;
if (e) {
r = in_addr_parse_prefixlen(family, e+1, &k);
if (r < 0)
return r;
} else
k = FAMILY_ADDRESS_SIZE(family) * 8;
if (ret_family)
*ret_family = family;
if (ret_prefix)
*ret_prefix = buffer;
if (ret_prefixlen)
*ret_prefixlen = k;
return 0;
}

View File

@ -53,14 +53,17 @@ int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen);
int in_addr_to_string(int family, const union in_addr_union *u, char **ret);
int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret);
int in_addr_from_string(int family, const char *s, union in_addr_union *ret);
int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret);
int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret);
int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex);
unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr);
struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr);
struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen);
int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen);
int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address);
int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret);
int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
static inline size_t FAMILY_ADDRESS_SIZE(int family) {
assert(family == AF_INET || family == AF_INET6);

View File

@ -40,14 +40,6 @@ int fd_wait_for_event(int fd, int event, usec_t timeout);
ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length);
#define IOVEC_SET_STRING(i, s) \
do { \
struct iovec *_i = &(i); \
char *_s = (char *)(s); \
_i->iov_base = _s; \
_i->iov_len = strlen(_s); \
} while (false)
static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, unsigned n) {
unsigned j;
size_t r = 0;
@ -93,3 +85,8 @@ static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) {
return FILE_SIZE_VALID(l);
}
#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) }
#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len)
#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string))
#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string)

View File

@ -20,8 +20,9 @@
#include <unistd.h>
#include "alloc-util.h"
#include "journal-importer.h"
#include "fd-util.h"
#include "io-util.h"
#include "journal-importer.h"
#include "parse-util.h"
#include "string-util.h"
#include "unaligned.h"
@ -38,7 +39,7 @@ static int iovw_put(struct iovec_wrapper *iovw, void* data, size_t len) {
if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1))
return log_oom();
iovw->iovec[iovw->count++] = (struct iovec) {data, len};
iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
return 0;
}

View File

@ -351,22 +351,22 @@ static int write_to_console(
if (log_target == LOG_TARGET_CONSOLE_PREFIXED) {
xsprintf(prefix, "<%i>", level);
IOVEC_SET_STRING(iovec[n++], prefix);
iovec[n++] = IOVEC_MAKE_STRING(prefix);
}
highlight = LOG_PRI(level) <= LOG_ERR && show_color;
if (show_location) {
snprintf(location, sizeof(location), "(%s:%i) ", file, line);
IOVEC_SET_STRING(iovec[n++], location);
iovec[n++] = IOVEC_MAKE_STRING(location);
}
if (highlight)
IOVEC_SET_STRING(iovec[n++], ANSI_HIGHLIGHT_RED);
IOVEC_SET_STRING(iovec[n++], buffer);
iovec[n++] = IOVEC_MAKE_STRING(ANSI_HIGHLIGHT_RED);
iovec[n++] = IOVEC_MAKE_STRING(buffer);
if (highlight)
IOVEC_SET_STRING(iovec[n++], ANSI_NORMAL);
IOVEC_SET_STRING(iovec[n++], "\n");
iovec[n++] = IOVEC_MAKE_STRING(ANSI_NORMAL);
iovec[n++] = IOVEC_MAKE_STRING("\n");
if (writev(console_fd, iovec, n) < 0) {
@ -425,11 +425,11 @@ static int write_to_syslog(
xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
IOVEC_SET_STRING(iovec[0], header_priority);
IOVEC_SET_STRING(iovec[1], header_time);
IOVEC_SET_STRING(iovec[2], program_invocation_short_name);
IOVEC_SET_STRING(iovec[3], header_pid);
IOVEC_SET_STRING(iovec[4], buffer);
iovec[0] = IOVEC_MAKE_STRING(header_priority);
iovec[1] = IOVEC_MAKE_STRING(header_time);
iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name);
iovec[3] = IOVEC_MAKE_STRING(header_pid);
iovec[4] = IOVEC_MAKE_STRING(buffer);
/* When using syslog via SOCK_STREAM separate the messages by NUL chars */
if (syslog_is_stream)
@ -470,11 +470,11 @@ static int write_to_kmsg(
xsprintf(header_priority, "<%i>", level);
xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());
IOVEC_SET_STRING(iovec[0], header_priority);
IOVEC_SET_STRING(iovec[1], program_invocation_short_name);
IOVEC_SET_STRING(iovec[2], header_pid);
IOVEC_SET_STRING(iovec[3], buffer);
IOVEC_SET_STRING(iovec[4], "\n");
iovec[0] = IOVEC_MAKE_STRING(header_priority);
iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name);
iovec[2] = IOVEC_MAKE_STRING(header_pid);
iovec[3] = IOVEC_MAKE_STRING(buffer);
iovec[4] = IOVEC_MAKE_STRING("\n");
if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0)
return -errno;
@ -547,10 +547,10 @@ static int write_to_journal(
log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);
IOVEC_SET_STRING(iovec[0], header);
IOVEC_SET_STRING(iovec[1], "MESSAGE=");
IOVEC_SET_STRING(iovec[2], buffer);
IOVEC_SET_STRING(iovec[3], "\n");
iovec[0] = IOVEC_MAKE_STRING(header);
iovec[1] = IOVEC_MAKE_STRING("MESSAGE=");
iovec[2] = IOVEC_MAKE_STRING(buffer);
iovec[3] = IOVEC_MAKE_STRING("\n");
mh.msg_iov = iovec;
mh.msg_iovlen = ELEMENTSOF(iovec);
@ -872,7 +872,7 @@ int log_format_iovec(
* the next format string */
VA_FORMAT_ADVANCE(format, ap);
IOVEC_SET_STRING(iovec[(*n)++], m);
iovec[(*n)++] = IOVEC_MAKE_STRING(m);
if (newline_separator) {
iovec[*n].iov_base = (char*) &nl;
@ -893,9 +893,9 @@ int log_struct_internal(
const char *func,
const char *format, ...) {
LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
char buf[LINE_MAX];
bool found = false;
LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
PROTECT_ERRNO;
va_list ap;
@ -926,7 +926,7 @@ int log_struct_internal(
/* If the journal is available do structured logging */
log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
IOVEC_SET_STRING(iovec[n++], header);
iovec[n++] = IOVEC_MAKE_STRING(header);
va_start(ap, format);
r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap);
@ -975,6 +975,73 @@ int log_struct_internal(
return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
}
int log_struct_iovec_internal(
int level,
int error,
const char *file,
int line,
const char *func,
const struct iovec input_iovec[],
size_t n_input_iovec) {
LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
PROTECT_ERRNO;
size_t i;
char *m;
if (error < 0)
error = -error;
if (_likely_(LOG_PRI(level) > log_max_level[realm]))
return -error;
if (log_target == LOG_TARGET_NULL)
return -error;
if ((level & LOG_FACMASK) == 0)
level = log_facility | LOG_PRI(level);
if (IN_SET(log_target, LOG_TARGET_AUTO,
LOG_TARGET_JOURNAL_OR_KMSG,
LOG_TARGET_JOURNAL) &&
journal_fd >= 0) {
struct iovec iovec[1 + n_input_iovec*2];
char header[LINE_MAX];
struct msghdr mh = {
.msg_iov = iovec,
.msg_iovlen = 1 + n_input_iovec*2,
};
log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
iovec[0] = IOVEC_MAKE_STRING(header);
for (i = 0; i < n_input_iovec; i++) {
iovec[1+i*2] = input_iovec[i];
iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n");
}
if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0)
return -error;
}
for (i = 0; i < n_input_iovec; i++) {
if (input_iovec[i].iov_len < strlen("MESSAGE="))
continue;
if (memcmp(input_iovec[i].iov_base, "MESSAGE=", strlen("MESSAGE=")) == 0)
break;
}
if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */
return -error;
m = strndupa(input_iovec[i].iov_base + strlen("MESSAGE="),
input_iovec[i].iov_len - strlen("MESSAGE="));
return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m);
}
int log_set_target_from_string(const char *e) {
LogTarget t;

View File

@ -187,6 +187,15 @@ int log_format_iovec(
const char *format,
va_list ap) _printf_(6, 0);
int log_struct_iovec_internal(
int level,
int error,
const char *file,
int line,
const char *func,
const struct iovec input_iovec[],
size_t n_input_iovec);
/* This modifies the buffer passed! */
int log_dump_internal(
int level,
@ -270,6 +279,11 @@ void log_assert_failed_return_realm(
error, __FILE__, __LINE__, __func__, __VA_ARGS__)
#define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__)
#define log_struct_iovec_errno(level, error, iovec, n_iovec) \
log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
error, __FILE__, __LINE__, __func__, iovec, n_iovec)
#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec)
/* This modifies the buffer passed! */
#define log_dump(level, buffer) \
log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \

View File

@ -1,4 +1,6 @@
basic_sources_plain = files('''
MurmurHash2.c
MurmurHash2.h
af-list.c
af-list.h
alloc-util.c
@ -16,6 +18,8 @@ basic_sources_plain = files('''
bitmap.c
bitmap.h
blkid-util.h
bpf-program.c
bpf-program.h
btrfs-ctree.h
btrfs-util.c
btrfs-util.h
@ -24,10 +28,10 @@ basic_sources_plain = files('''
bus-label.h
calendarspec.c
calendarspec.h
capability-util.c
capability-util.h
cap-list.c
cap-list.h
capability-util.c
capability-util.h
cgroup-util.c
cgroup-util.h
chattr-util.c
@ -61,10 +65,10 @@ basic_sources_plain = files('''
extract-word.h
fd-util.c
fd-util.h
fileio.c
fileio.h
fileio-label.c
fileio-label.h
fileio.c
fileio.h
format-util.h
fs-util.c
fs-util.h
@ -82,9 +86,9 @@ basic_sources_plain = files('''
hostname-util.h
in-addr-util.c
in-addr-util.h
ioprio.h
io-util.c
io-util.h
ioprio.h
journal-importer.c
journal-importer.h
khash.c
@ -106,13 +110,11 @@ basic_sources_plain = files('''
mempool.c
mempool.h
missing_syscall.h
mkdir-label.c
mkdir.c
mkdir.h
mkdir-label.c
mount-util.c
mount-util.h
MurmurHash2.c
MurmurHash2.h
nss-util.h
ordered-set.c
ordered-set.h
@ -138,9 +140,9 @@ basic_sources_plain = files('''
rlimit-util.h
rm-rf.c
rm-rf.h
securebits.h
securebits-util.c
securebits-util.h
securebits.h
selinux-util.c
selinux-util.h
set.h

View File

@ -22,6 +22,8 @@
/* Missing glibc definitions to access certain kernel APIs */
#include <sys/types.h>
#if !HAVE_DECL_PIVOT_ROOT
static inline int pivot_root(const char *new_root, const char *put_old) {
return syscall(SYS_pivot_root, new_root, put_old);
@ -316,3 +318,33 @@ static inline ssize_t copy_file_range(int fd_in, loff_t *off_in,
# endif
}
#endif
#if !HAVE_DECL_BPF
# ifndef __NR_bpf
# if defined __i386__
# define __NR_bpf 357
# elif defined __x86_64__
# define __NR_bpf 321
# elif defined __aarch64__
# define __NR_bpf 280
# elif defined __sparc__
# define __NR_bpf 349
# elif defined __s390__
# define __NR_bpf 351
# else
# warning "__NR_bpf not defined for your architecture"
# endif
# endif
union bpf_attr;
static inline int bpf(int cmd, union bpf_attr *attr, size_t size) {
#ifdef __NR_bpf
return (int) syscall(__NR_bpf, cmd, attr, size);
#else
errno = ENOSYS;
return -1;
#endif
}
#endif

View File

@ -42,7 +42,8 @@ int setrlimit_closest(int resource, const struct rlimit *rlim) {
/* So we failed to set the desired setrlimit, then let's try
* to get as close as we can */
assert_se(getrlimit(resource, &highest) == 0);
if (getrlimit(resource, &highest) < 0)
return -errno;
fixed.rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max);
fixed.rlim_max = MIN(rlim->rlim_max, highest.rlim_max);

View File

@ -83,7 +83,7 @@ int socket_address_listen(
return -errno;
}
if (socket_address_family(a) == AF_INET || socket_address_family(a) == AF_INET6) {
if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) {
if (bind_to_device)
if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, bind_to_device, strlen(bind_to_device)+1) < 0)
return -errno;

680
src/core/bpf-firewall.c Normal file
View File

@ -0,0 +1,680 @@
/***
This file is part of systemd.
Copyright 2016 Daniel Mack
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <arpa/inet.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/libbpf.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bpf-program.h"
#include "fd-util.h"
#include "ip-address-access.h"
#include "unit.h"
enum {
MAP_KEY_PACKETS,
MAP_KEY_BYTES,
};
enum {
ACCESS_ALLOWED = 1,
ACCESS_DENIED = 2,
};
/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
static int add_lookup_instructions(
BPFProgram *p,
int map_fd,
int protocol,
bool is_ingress,
int verdict) {
int r, addr_offset, addr_size;
assert(p);
assert(map_fd >= 0);
switch (protocol) {
case ETH_P_IP:
addr_size = sizeof(uint32_t);
addr_offset = is_ingress ?
offsetof(struct iphdr, saddr) :
offsetof(struct iphdr, daddr);
break;
case ETH_P_IPV6:
addr_size = 4 * sizeof(uint32_t);
addr_offset = is_ingress ?
offsetof(struct ip6_hdr, ip6_src.s6_addr) :
offsetof(struct ip6_hdr, ip6_dst.s6_addr);
break;
default:
return -EAFNOSUPPORT;
}
do {
/* Compare IPv4 with one word instruction (32bit) */
struct bpf_insn insn[] = {
/* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
/*
* Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
*
* R1: Pointer to the skb
* R2: Data offset
* R3: Destination buffer on the stack (r10 - 4)
* R4: Number of bytes to read (4)
*/
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_MOV32_IMM(BPF_REG_2, addr_offset),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
BPF_MOV32_IMM(BPF_REG_4, addr_size),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
/*
* Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
* LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
* has to be set to the maximum possible value.
*
* On success, the looked up value is stored in R0. For this application, the actual
* value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
* matching value.
*/
BPF_LD_MAP_FD(BPF_REG_1, map_fd),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
};
/* Jump label fixup */
insn[0].off = ELEMENTSOF(insn) - 1;
r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
if (r < 0)
return r;
} while (false);
return 0;
}
static int bpf_firewall_compile_bpf(
Unit *u,
bool is_ingress,
BPFProgram **ret) {
struct bpf_insn pre_insn[] = {
/*
* When the eBPF program is entered, R1 contains the address of the skb.
* However, R1-R5 are scratch registers that are not preserved when calling
* into kernel functions, so we need to save anything that's supposed to
* stay around to R6-R9. Save the skb to R6.
*/
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
/*
* Although we cannot access the skb data directly from eBPF programs used in this
* scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
* Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
* for later use.
*/
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
/*
* R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
* through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
*/
BPF_MOV32_IMM(BPF_REG_8, 0),
};
/*
* The access checkers compiled for the configured allowance and denial lists
* write to R8 at runtime. The following code prepares for an early exit that
* skip the accounting if the packet is denied.
*
* R0 = 1
* if (R8 == ACCESS_DENIED)
* R0 = 0
*
* This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
* is allowed to pass.
*/
struct bpf_insn post_insn[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
BPF_MOV64_IMM(BPF_REG_0, 0),
};
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
int accounting_map_fd, r;
bool access_enabled;
assert(u);
assert(ret);
accounting_map_fd = is_ingress ?
u->ip_accounting_ingress_map_fd :
u->ip_accounting_egress_map_fd;
access_enabled =
u->ipv4_allow_map_fd >= 0 ||
u->ipv6_allow_map_fd >= 0 ||
u->ipv4_deny_map_fd >= 0 ||
u->ipv6_deny_map_fd >= 0;
if (accounting_map_fd < 0 && !access_enabled) {
*ret = NULL;
return 0;
}
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
if (r < 0)
return r;
r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
if (r < 0)
return r;
if (access_enabled) {
/*
* The simple rule this function translates into eBPF instructions is:
*
* - Access will be granted when an address matches an entry in @list_allow
* - Otherwise, access will be denied when an address matches an entry in @list_deny
* - Otherwise, access will be granted
*/
if (u->ipv4_deny_map_fd >= 0) {
r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
if (r < 0)
return r;
}
if (u->ipv6_deny_map_fd >= 0) {
r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
if (r < 0)
return r;
}
if (u->ipv4_allow_map_fd >= 0) {
r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
if (r < 0)
return r;
}
if (u->ipv6_allow_map_fd >= 0) {
r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
if (r < 0)
return r;
}
}
r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
if (r < 0)
return r;
if (accounting_map_fd >= 0) {
struct bpf_insn insn[] = {
/*
* If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
* The jump label will be fixed up later.
*/
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
/* Count packets */
BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
/* Count bytes */
BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
/* Allow the packet to pass */
BPF_MOV64_IMM(BPF_REG_0, 1),
};
/* Jump label fixup */
insn[0].off = ELEMENTSOF(insn) - 1;
r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
if (r < 0)
return r;
}
do {
/*
* Exit from the eBPF program, R0 contains the verdict.
* 0 means the packet is denied, 1 means the packet may pass.
*/
struct bpf_insn insn[] = {
BPF_EXIT_INSN()
};
r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
if (r < 0)
return r;
} while (false);
*ret = p;
p = NULL;
return 0;
}
static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
IPAddressAccessItem *a;
assert(n_ipv4);
assert(n_ipv6);
LIST_FOREACH(items, a, list) {
switch (a->family) {
case AF_INET:
(*n_ipv4)++;
break;
case AF_INET6:
(*n_ipv6)++;
break;
default:
return -EAFNOSUPPORT;
}
}
return 0;
}
static int bpf_firewall_add_access_items(
IPAddressAccessItem *list,
int ipv4_map_fd,
int ipv6_map_fd,
int verdict) {
struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
uint64_t value = verdict;
IPAddressAccessItem *a;
int r;
key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
LIST_FOREACH(items, a, list) {
switch (a->family) {
case AF_INET:
key_ipv4->prefixlen = a->prefixlen;
memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
if (r < 0)
return r;
break;
case AF_INET6:
key_ipv6->prefixlen = a->prefixlen;
memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
if (r < 0)
return r;
break;
default:
return -EAFNOSUPPORT;
}
}
return 0;
}
static int bpf_firewall_prepare_access_maps(
Unit *u,
int verdict,
int *ret_ipv4_map_fd,
int *ret_ipv6_map_fd) {
_cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
size_t n_ipv4 = 0, n_ipv6 = 0;
Unit *p;
int r;
assert(ret_ipv4_map_fd);
assert(ret_ipv6_map_fd);
for (p = u; p; p = UNIT_DEREF(p->slice)) {
CGroupContext *cc;
cc = unit_get_cgroup_context(p);
if (!cc)
continue;
bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
}
if (n_ipv4 > 0) {
ipv4_map_fd = bpf_map_new(
BPF_MAP_TYPE_LPM_TRIE,
offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
sizeof(uint64_t),
n_ipv4,
BPF_F_NO_PREALLOC);
if (ipv4_map_fd < 0)
return ipv4_map_fd;
}
if (n_ipv6 > 0) {
ipv6_map_fd = bpf_map_new(
BPF_MAP_TYPE_LPM_TRIE,
offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
sizeof(uint64_t),
n_ipv6,
BPF_F_NO_PREALLOC);
if (ipv6_map_fd < 0)
return ipv6_map_fd;
}
for (p = u; p; p = UNIT_DEREF(p->slice)) {
CGroupContext *cc;
cc = unit_get_cgroup_context(p);
if (!cc)
continue;
r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
ipv4_map_fd, ipv6_map_fd, verdict);
if (r < 0)
return r;
}
*ret_ipv4_map_fd = ipv4_map_fd;
*ret_ipv6_map_fd = ipv6_map_fd;
ipv4_map_fd = ipv6_map_fd = -1;
return 0;
}
static int bpf_firewall_prepare_accounting_maps(bool enabled, int *fd_ingress, int *fd_egress) {
int r;
assert(fd_ingress);
assert(fd_egress);
if (enabled) {
if (*fd_ingress < 0) {
r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
if (r < 0)
return r;
*fd_ingress = r;
}
if (*fd_egress < 0) {
r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
if (r < 0)
return r;
*fd_egress = r;
}
} else {
*fd_ingress = safe_close(*fd_ingress);
*fd_egress = safe_close(*fd_egress);
}
return 0;
}
int bpf_firewall_compile(Unit *u) {
CGroupContext *cc;
int r;
assert(u);
r = bpf_firewall_supported();
if (r < 0)
return r;
if (r == 0) {
log_debug("BPF firewalling not supported on this systemd, proceeding without.");
return -EOPNOTSUPP;
}
/* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
* but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
* configuration, but we don't flush out the accounting unnecessarily */
u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
cc = unit_get_cgroup_context(u);
if (!cc)
return -EINVAL;
r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
if (r < 0)
return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
if (r < 0)
return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
r = bpf_firewall_prepare_accounting_maps(cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
if (r < 0)
return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
if (r < 0)
return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
if (r < 0)
return log_error_errno(r, "Compilation for egress BPF program failed: %m");
return 0;
}
int bpf_firewall_install(Unit *u) {
_cleanup_free_ char *path = NULL;
CGroupContext *cc;
int r;
assert(u);
if (!u->cgroup_path)
return -EINVAL;
cc = unit_get_cgroup_context(u);
if (!cc)
return -EINVAL;
r = bpf_firewall_supported();
if (r < 0)
return r;
if (r == 0) {
log_debug("BPF firewalling not supported on this systemd, proceeding without.");
return -EOPNOTSUPP;
}
r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
if (r < 0)
return log_error_errno(r, "Failed to determine cgroup path: %m");
if (u->ip_bpf_egress) {
r = bpf_program_load_kernel(u->ip_bpf_egress, NULL, 0);
if (r < 0)
return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
if (r < 0)
return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
} else {
r = bpf_program_cgroup_detach(BPF_CGROUP_INET_EGRESS, path);
if (r < 0)
return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
"Detaching egress BPF program from cgroup failed: %m");
}
if (u->ip_bpf_ingress) {
r = bpf_program_load_kernel(u->ip_bpf_ingress, NULL, 0);
if (r < 0)
return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
if (r < 0)
return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
} else {
r = bpf_program_cgroup_detach(BPF_CGROUP_INET_INGRESS, path);
if (r < 0)
return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
"Detaching ingress BPF program from cgroup failed: %m");
}
return 0;
}
int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
uint64_t key, packets;
int r;
if (map_fd < 0)
return -EBADF;
if (ret_packets) {
key = MAP_KEY_PACKETS;
r = bpf_map_lookup_element(map_fd, &key, &packets);
if (r < 0)
return r;
}
if (ret_bytes) {
key = MAP_KEY_BYTES;
r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
if (r < 0)
return r;
}
if (ret_packets)
*ret_packets = packets;
return 0;
}
int bpf_firewall_reset_accounting(int map_fd) {
uint64_t key, value = 0;
int r;
if (map_fd < 0)
return -EBADF;
key = MAP_KEY_PACKETS;
r = bpf_map_update_element(map_fd, &key, &value);
if (r < 0)
return r;
key = MAP_KEY_BYTES;
return bpf_map_update_element(map_fd, &key, &value);
}
int bpf_firewall_supported(void) {
static int supported = -1;
int fd, r;
/* Checks whether BPF firewalling is supported. For this, we check three things:
*
* a) whether we are privileged
* b) whether the unified hierarchy is being used
* c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
*
*/
if (supported >= 0)
return supported;
if (geteuid() != 0)
return supported = false;
r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
if (r < 0)
return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
if (r == 0)
return supported = false;
fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
sizeof(uint64_t),
1,
BPF_F_NO_PREALLOC);
if (fd < 0) {
log_debug_errno(r, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
return supported = false;
}
safe_close(fd);
return supported = true;
}

32
src/core/bpf-firewall.h Normal file
View File

@ -0,0 +1,32 @@
#pragma once
/***
This file is part of systemd.
Copyright 2016 Daniel Mack
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <inttypes.h>
#include "unit.h"
int bpf_firewall_supported(void);
int bpf_firewall_compile(Unit *u);
int bpf_firewall_install(Unit *u);
int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
int bpf_firewall_reset_accounting(int map_fd);

View File

@ -21,6 +21,7 @@
#include <fnmatch.h>
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "cgroup-util.h"
#include "cgroup.h"
#include "fd-util.h"
@ -30,9 +31,9 @@
#include "path-util.h"
#include "process-util.h"
#include "special.h"
#include "stdio-util.h"
#include "string-table.h"
#include "string-util.h"
#include "stdio-util.h"
#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
@ -141,6 +142,9 @@ void cgroup_context_done(CGroupContext *c) {
while (c->device_allow)
cgroup_context_free_device_allow(c, c->device_allow);
c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
}
void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
@ -149,6 +153,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
CGroupBlockIODeviceBandwidth *b;
CGroupBlockIODeviceWeight *w;
CGroupDeviceAllow *a;
IPAddressAccessItem *iaai;
char u[FORMAT_TIMESPAN_MAX];
assert(c);
@ -162,6 +167,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
"%sBlockIOAccounting=%s\n"
"%sMemoryAccounting=%s\n"
"%sTasksAccounting=%s\n"
"%sIPAccounting=%s\n"
"%sCPUWeight=%" PRIu64 "\n"
"%sStartupCPUWeight=%" PRIu64 "\n"
"%sCPUShares=%" PRIu64 "\n"
@ -184,6 +190,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
prefix, yes_no(c->blockio_accounting),
prefix, yes_no(c->memory_accounting),
prefix, yes_no(c->tasks_accounting),
prefix, yes_no(c->ip_accounting),
prefix, c->cpu_weight,
prefix, c->startup_cpu_weight,
prefix, c->cpu_shares,
@ -253,6 +260,20 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
b->path,
format_bytes(buf, sizeof(buf), b->wbps));
}
LIST_FOREACH(items, iaai, c->ip_address_allow) {
_cleanup_free_ char *k = NULL;
(void) in_addr_to_string(iaai->family, &iaai->address, &k);
fprintf(f, "%sIPAddressAllow=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
}
LIST_FOREACH(items, iaai, c->ip_address_deny) {
_cleanup_free_ char *k = NULL;
(void) in_addr_to_string(iaai->family, &iaai->address, &k);
fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
}
}
static int lookup_block_device(const char *p, dev_t *dev) {
@ -645,7 +666,27 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
"Failed to set %s: %m", file);
}
static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
static void cgroup_apply_firewall(Unit *u, CGroupContext *c) {
int r;
if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
* not recursive we don't ever touch the bpf on them */
return;
r = bpf_firewall_compile(u);
if (r < 0)
return;
(void) bpf_firewall_install(u);
return;
}
static void cgroup_context_apply(
Unit *u,
CGroupMask apply_mask,
bool apply_bpf,
ManagerState state) {
const char *path;
CGroupContext *c;
bool is_root;
@ -659,7 +700,8 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
assert(c);
assert(path);
if (mask == 0)
/* Nothing to do? Exit early! */
if (apply_mask == 0 && !apply_bpf)
return;
/* Some cgroup attributes are not supported on the root cgroup,
@ -673,9 +715,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
* cgroup trees (assuming we are running in a container then),
* and missing cgroups, i.e. EROFS and ENOENT. */
if ((mask & CGROUP_MASK_CPU) && !is_root) {
bool has_weight = cgroup_context_has_cpu_weight(c);
bool has_shares = cgroup_context_has_cpu_shares(c);
if ((apply_mask & CGROUP_MASK_CPU) && !is_root) {
bool has_weight, has_shares;
has_weight = cgroup_context_has_cpu_weight(c);
has_shares = cgroup_context_has_cpu_shares(c);
if (cg_all_unified() > 0) {
uint64_t weight;
@ -712,7 +756,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
if (mask & CGROUP_MASK_IO) {
if (apply_mask & CGROUP_MASK_IO) {
bool has_io = cgroup_context_has_io_config(c);
bool has_blockio = cgroup_context_has_blockio_config(c);
@ -789,7 +833,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
if (mask & CGROUP_MASK_BLKIO) {
if (apply_mask & CGROUP_MASK_BLKIO) {
bool has_io = cgroup_context_has_io_config(c);
bool has_blockio = cgroup_context_has_blockio_config(c);
@ -856,7 +900,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) {
if (cg_all_unified() > 0) {
uint64_t max, swap_max = CGROUP_LIMIT_MAX;
@ -896,7 +940,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
CGroupDeviceAllow *a;
/* Changing the devices list of a populated cgroup
@ -960,7 +1004,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
}
}
if ((mask & CGROUP_MASK_PIDS) && !is_root) {
if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) {
if (c->tasks_max != CGROUP_LIMIT_MAX) {
char buf[DECIMAL_STR_MAX(uint64_t) + 2];
@ -974,6 +1018,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
"Failed to set pids.max: %m");
}
if (apply_bpf)
cgroup_apply_firewall(u, c);
}
CGroupMask cgroup_context_get_mask(CGroupContext *c) {
@ -1120,6 +1167,39 @@ CGroupMask unit_get_enable_mask(Unit *u) {
return mask;
}
bool unit_get_needs_bpf(Unit *u) {
CGroupContext *c;
Unit *p;
assert(u);
/* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the
* moment. */
if (u->type == UNIT_SLICE)
return false;
c = unit_get_cgroup_context(u);
if (!c)
return false;
if (c->ip_accounting ||
c->ip_address_allow ||
c->ip_address_deny)
return true;
/* If any parent slice has an IP access list defined, it applies too */
for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
c = unit_get_cgroup_context(p);
if (!c)
return false;
if (c->ip_address_allow ||
c->ip_address_deny)
return true;
}
return false;
}
/* Recurse from a unit up through its containing slices, propagating
* mask bits upward. A unit is also member of itself. */
void unit_update_cgroup_members_masks(Unit *u) {
@ -1295,7 +1375,8 @@ int unit_watch_cgroup(Unit *u) {
static int unit_create_cgroup(
Unit *u,
CGroupMask target_mask,
CGroupMask enable_mask) {
CGroupMask enable_mask,
bool needs_bpf) {
CGroupContext *c;
int r;
@ -1337,6 +1418,7 @@ static int unit_create_cgroup(
u->cgroup_realized = true;
u->cgroup_realized_mask = target_mask;
u->cgroup_enabled_mask = enable_mask;
u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
if (u->type != UNIT_SLICE && !c->delegate) {
@ -1386,10 +1468,19 @@ static void cgroup_xattr_apply(Unit *u) {
log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
}
static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
static bool unit_has_mask_realized(
Unit *u,
CGroupMask target_mask,
CGroupMask enable_mask,
bool needs_bpf) {
assert(u);
return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask;
return u->cgroup_realized &&
u->cgroup_realized_mask == target_mask &&
u->cgroup_enabled_mask == enable_mask &&
((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
(!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
}
/* Check if necessary controllers and attributes for a unit are in place.
@ -1400,6 +1491,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask e
* Returns 0 on success and < 0 on failure. */
static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
CGroupMask target_mask, enable_mask;
bool needs_bpf, apply_bpf;
int r;
assert(u);
@ -1411,10 +1503,16 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
target_mask = unit_get_target_mask(u);
enable_mask = unit_get_enable_mask(u);
needs_bpf = unit_get_needs_bpf(u);
if (unit_has_mask_realized(u, target_mask, enable_mask))
if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
return 0;
/* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
* the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
* this will trickle down properly to cgroupfs. */
apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
/* First, realize parents */
if (UNIT_ISSET(u->slice)) {
r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
@ -1423,18 +1521,19 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
}
/* And then do the real work */
r = unit_create_cgroup(u, target_mask, enable_mask);
r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
if (r < 0)
return r;
/* Finally, apply the necessary attributes. */
cgroup_context_apply(u, target_mask, state);
cgroup_context_apply(u, target_mask, apply_bpf, state);
cgroup_xattr_apply(u);
return 0;
}
static void unit_add_to_cgroup_queue(Unit *u) {
assert(u);
if (u->in_cgroup_queue)
return;
@ -1492,7 +1591,10 @@ static void unit_queue_siblings(Unit *u) {
/* If the unit doesn't need any new controllers
* and has current ones realized, it doesn't need
* any changes. */
if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m)))
if (unit_has_mask_realized(m,
unit_get_target_mask(m),
unit_get_enable_mask(m),
unit_get_needs_bpf(m)))
continue;
unit_add_to_cgroup_queue(m);
@ -1756,6 +1858,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,
int manager_setup_cgroup(Manager *m) {
_cleanup_free_ char *path = NULL;
const char *scope_path;
CGroupController c;
int r, all_unified;
char *e;
@ -1813,74 +1916,67 @@ int manager_setup_cgroup(Manager *m) {
log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
}
if (!m->test_run_flags) {
const char *scope_path;
/* 3. Install agent */
if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
/* 3. Install agent */
if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
/* In the unified hierarchy we can get
* cgroup empty notifications via inotify. */
/* In the unified hierarchy we can get
* cgroup empty notifications via inotify. */
m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
safe_close(m->cgroup_inotify_fd);
m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
safe_close(m->cgroup_inotify_fd);
m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
if (m->cgroup_inotify_fd < 0)
return log_error_errno(errno, "Failed to create control group inotify object: %m");
m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
if (m->cgroup_inotify_fd < 0)
return log_error_errno(errno, "Failed to create control group inotify object: %m");
r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
if (r < 0)
return log_error_errno(r, "Failed to watch control group inotify object: %m");
/* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
* see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
if (r < 0)
return log_error_errno(r, "Failed to set priority of inotify event source: %m");
(void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
} else if (MANAGER_IS_SYSTEM(m)) {
/* On the legacy hierarchy we only get
* notifications via cgroup agents. (Which
* isn't really reliable, since it does not
* generate events when control groups with
* children run empty. */
r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
if (r < 0)
log_warning_errno(r, "Failed to install release agent, ignoring: %m");
else if (r > 0)
log_debug("Installed release agent.");
else if (r == 0)
log_debug("Release agent already installed.");
}
/* 4. Make sure we are in the special "init.scope" unit in the root slice. */
scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
if (r < 0)
return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
return log_error_errno(r, "Failed to watch control group inotify object: %m");
/* also, move all other userspace processes remaining
* in the root cgroup into that scope. */
r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
/* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
* see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
if (r < 0)
log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
return log_error_errno(r, "Failed to set priority of inotify event source: %m");
/* 5. And pin it, so that it cannot be unmounted */
safe_close(m->pin_cgroupfs_fd);
m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
if (m->pin_cgroupfs_fd < 0)
return log_error_errno(errno, "Failed to open pin file: %m");
(void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
/* 6. Always enable hierarchical support if it exists... */
if (!all_unified)
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
} else if (MANAGER_IS_SYSTEM(m) && m->test_run_flags == 0) {
/* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
* since it does not generate events when control groups with children run empty. */
r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
if (r < 0)
log_warning_errno(r, "Failed to install release agent, ignoring: %m");
else if (r > 0)
log_debug("Installed release agent.");
else if (r == 0)
log_debug("Release agent already installed.");
}
/* 4. Make sure we are in the special "init.scope" unit in the root slice. */
scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
if (r < 0)
return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
/* also, move all other userspace processes remaining
* in the root cgroup into that scope. */
r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
if (r < 0)
log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
/* 5. And pin it, so that it cannot be unmounted */
safe_close(m->pin_cgroupfs_fd);
m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
if (m->pin_cgroupfs_fd < 0)
return log_error_errno(errno, "Failed to open pin file: %m");
/* 6. Always enable hierarchical support if it exists... */
if (!all_unified && m->test_run_flags == 0)
(void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
/* 7. Figure out which controllers are supported */
r = cg_mask_supported(&m->cgroup_supported);
if (r < 0)
@ -1992,11 +2088,18 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
int unit_get_memory_current(Unit *u, uint64_t *ret) {
_cleanup_free_ char *v = NULL;
CGroupContext *cc;
int r;
assert(u);
assert(ret);
cc = unit_get_cgroup_context(u);
if (!cc)
return -ENODATA;
if (!cc->memory_accounting)
return -ENODATA;
if (!u->cgroup_path)
return -ENODATA;
@ -2020,11 +2123,18 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {
int unit_get_tasks_current(Unit *u, uint64_t *ret) {
_cleanup_free_ char *v = NULL;
CGroupContext *cc;
int r;
assert(u);
assert(ret);
cc = unit_get_cgroup_context(u);
if (!cc)
return -ENODATA;
if (!cc->tasks_accounting)
return -ENODATA;
if (!u->cgroup_path)
return -ENODATA;
@ -2091,6 +2201,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
}
int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
CGroupContext *cc;
nsec_t ns;
int r;
@ -2100,6 +2211,12 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
* started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
* call this function with a NULL return value. */
cc = unit_get_cgroup_context(u);
if (!cc)
return -ENODATA;
if (!cc->cpu_accounting)
return -ENODATA;
r = unit_get_cpu_usage_raw(u, &ns);
if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
/* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
@ -2124,7 +2241,57 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
return 0;
}
int unit_reset_cpu_usage(Unit *u) {
int unit_get_ip_accounting(
Unit *u,
CGroupIPAccountingMetric metric,
uint64_t *ret) {
CGroupContext *cc;
uint64_t value;
int fd, r;
assert(u);
assert(metric >= 0);
assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
assert(ret);
/* IP accounting is currently not recursive, and hence we refuse to return any data for slice nodes. Slices are
* inner cgroup nodes and hence have no processes directly attached, hence their counters would be zero
* anyway. And if we block this now we can later open this up, if the kernel learns recursive BPF cgroup
* filters. */
if (u->type == UNIT_SLICE)
return -ENODATA;
cc = unit_get_cgroup_context(u);
if (!cc)
return -ENODATA;
if (!cc->ip_accounting)
return -ENODATA;
fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
u->ip_accounting_ingress_map_fd :
u->ip_accounting_egress_map_fd;
if (fd < 0)
return -ENODATA;
if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
r = bpf_firewall_read_accounting(fd, &value, NULL);
else
r = bpf_firewall_read_accounting(fd, NULL, &value);
if (r < 0)
return r;
/* Add in additional metrics from a previous runtime. Note that when reexecing/reloading the daemon we compile
* all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
* ip_accounting_extra[] field, and add them in here transparently. */
*ret = value + u->ip_accounting_extra[metric];
return r;
}
int unit_reset_cpu_accounting(Unit *u) {
nsec_t ns;
int r;
@ -2142,6 +2309,22 @@ int unit_reset_cpu_usage(Unit *u) {
return 0;
}
int unit_reset_ip_accounting(Unit *u) {
int r = 0, q = 0;
assert(u);
if (u->ip_accounting_ingress_map_fd >= 0)
r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
if (u->ip_accounting_egress_map_fd >= 0)
q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
zero(u->ip_accounting_extra);
return r < 0 ? r : q;
}
bool unit_cgroup_delegate(Unit *u) {
CGroupContext *c;
@ -2167,6 +2350,9 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;
if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
if ((u->cgroup_realized_mask & m) == 0)
return;
@ -2174,6 +2360,36 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
unit_add_to_cgroup_queue(u);
}
void unit_invalidate_cgroup_bpf(Unit *u) {
assert(u);
if (!UNIT_HAS_CGROUP_CONTEXT(u))
return;
if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED)
return;
u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
unit_add_to_cgroup_queue(u);
/* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
* list of our children includes our own. */
if (u->type == UNIT_SLICE) {
Unit *member;
Iterator i;
SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
if (member == u)
continue;
if (UNIT_DEREF(member->slice) != u)
continue;
unit_invalidate_cgroup_bpf(member);
}
}
}
void manager_invalidate_startup_units(Manager *m) {
Iterator i;
Unit *u;

View File

@ -21,9 +21,10 @@
#include <stdbool.h>
#include "cgroup-util.h"
#include "ip-address-access.h"
#include "list.h"
#include "time-util.h"
#include "cgroup-util.h"
typedef struct CGroupContext CGroupContext;
typedef struct CGroupDeviceAllow CGroupDeviceAllow;
@ -87,6 +88,7 @@ struct CGroupContext {
bool blockio_accounting;
bool memory_accounting;
bool tasks_accounting;
bool ip_accounting;
/* For unified hierarchy */
uint64_t cpu_weight;
@ -103,6 +105,9 @@ struct CGroupContext {
uint64_t memory_max;
uint64_t memory_swap_max;
LIST_HEAD(IPAddressAccessItem, ip_address_allow);
LIST_HEAD(IPAddressAccessItem, ip_address_deny);
/* For legacy hierarchies */
uint64_t cpu_shares;
uint64_t startup_cpu_shares;
@ -123,6 +128,16 @@ struct CGroupContext {
bool delegate;
};
/* Used when querying IP accounting data */
typedef enum CGroupIPAccountingMetric {
CGROUP_IP_INGRESS_BYTES,
CGROUP_IP_INGRESS_PACKETS,
CGROUP_IP_EGRESS_BYTES,
CGROUP_IP_EGRESS_PACKETS,
_CGROUP_IP_ACCOUNTING_METRIC_MAX,
_CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1,
} CGroupIPAccountingMetric;
#include "unit.h"
void cgroup_context_init(CGroupContext *c);
@ -145,6 +160,8 @@ CGroupMask unit_get_subtree_mask(Unit *u);
CGroupMask unit_get_target_mask(Unit *u);
CGroupMask unit_get_enable_mask(Unit *u);
bool unit_get_needs_bpf(Unit *u);
void unit_update_cgroup_members_masks(Unit *u);
char *unit_default_cgroup_path(Unit *u);
@ -172,7 +189,10 @@ int unit_watch_all_pids(Unit *u);
int unit_get_memory_current(Unit *u, uint64_t *ret);
int unit_get_tasks_current(Unit *u, uint64_t *ret);
int unit_get_cpu_usage(Unit *u, nsec_t *ret);
int unit_reset_cpu_usage(Unit *u);
int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
int unit_reset_cpu_accounting(Unit *u);
int unit_reset_ip_accounting(Unit *u);
bool unit_cgroup_delegate(Unit *u);
@ -180,6 +200,7 @@ int unit_notify_cgroup_empty(Unit *u);
int manager_notify_cgroup_empty(Manager *m, const char *group);
void unit_invalidate_cgroup(Unit *u, CGroupMask m);
void unit_invalidate_cgroup_bpf(Unit *u);
void manager_invalidate_startup_units(Manager *m);

View File

@ -17,7 +17,11 @@
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <arpa/inet.h>
#include "af-list.h"
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bus-util.h"
#include "cgroup-util.h"
#include "cgroup.h"
@ -206,6 +210,48 @@ static int property_get_device_allow(
return sd_bus_message_close_container(reply);
}
static int property_get_ip_address_access(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
IPAddressAccessItem** items = userdata, *i;
int r;
r = sd_bus_message_open_container(reply, 'a', "(iayu)");
if (r < 0)
return r;
LIST_FOREACH(items, i, *items) {
r = sd_bus_message_open_container(reply, 'r', "iayu");
if (r < 0)
return r;
r = sd_bus_message_append(reply, "i", i->family);
if (r < 0)
return r;
r = sd_bus_message_append_array(reply, 'y', &i->address, FAMILY_ADDRESS_SIZE(i->family));
if (r < 0)
return r;
r = sd_bus_message_append(reply, "u", (uint32_t) i->prefixlen);
if (r < 0)
return r;
r = sd_bus_message_close_container(reply);
if (r < 0)
return r;
}
return sd_bus_message_close_container(reply);
}
const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
@ -239,6 +285,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
SD_BUS_VTABLE_END
};
@ -1133,6 +1182,7 @@ int bus_cgroup_set_property(
}
return 1;
} else if (streq(name, "TasksMaxScale")) {
uint64_t limit;
uint32_t raw;
@ -1152,6 +1202,137 @@ int bus_cgroup_set_property(
(uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
}
return 1;
} else if (streq(name, "IPAccounting")) {
int b;
r = sd_bus_message_read(message, "b", &b);
if (r < 0)
return r;
if (mode != UNIT_CHECK) {
c->ip_accounting = b;
unit_invalidate_cgroup_bpf(u);
unit_write_drop_in_private(u, mode, name, b ? "IPAccounting=yes" : "IPAccounting=no");
}
return 1;
} else if (STR_IN_SET(name, "IPAddressAllow", "IPAddressDeny")) {
IPAddressAccessItem **list;
size_t n = 0;
list = streq(name, "IPAddressAllow") ? &c->ip_address_allow : &c->ip_address_deny;
r = sd_bus_message_enter_container(message, 'a', "(iayu)");
if (r < 0)
return r;
for (;;) {
const void *ap;
int32_t family;
uint32_t prefixlen;
size_t an;
r = sd_bus_message_enter_container(message, 'r', "iayu");
if (r < 0)
return r;
if (r == 0)
break;
r = sd_bus_message_read(message, "i", &family);
if (r < 0)
return r;
if (!IN_SET(family, AF_INET, AF_INET6))
return sd_bus_error_set_errnof(error, EINVAL, "IPAddressAllow= expects IPv4 or IPv6 addresses only.");
r = sd_bus_message_read_array(message, 'y', &ap, &an);
if (r < 0)
return r;
if (an != FAMILY_ADDRESS_SIZE(family))
return sd_bus_error_set_errnof(error, EINVAL, "IP address has wrong size for family (%s, expected %zu, got %zu)",
af_to_name(family), FAMILY_ADDRESS_SIZE(family), an);
r = sd_bus_message_read(message, "u", &prefixlen);
if (r < 0)
return r;
if (prefixlen > FAMILY_ADDRESS_SIZE(family)*8)
return sd_bus_error_set_errnof(error, EINVAL, "Prefix length too large for family.");
if (mode != UNIT_CHECK) {
IPAddressAccessItem *item;
item = new0(IPAddressAccessItem, 1);
if (!item)
return -ENOMEM;
item->family = family;
item->prefixlen = prefixlen;
memcpy(&item->address, ap, an);
LIST_PREPEND(items, *list, item);
}
r = sd_bus_message_exit_container(message);
if (r < 0)
return r;
n++;
}
r = sd_bus_message_exit_container(message);
if (r < 0)
return r;
*list = ip_address_access_reduce(*list);
if (mode != UNIT_CHECK) {
_cleanup_free_ char *buf = NULL;
_cleanup_fclose_ FILE *f = NULL;
IPAddressAccessItem *item;
size_t size = 0;
if (n == 0)
*list = ip_address_access_free_all(*list);
unit_invalidate_cgroup_bpf(u);
f = open_memstream(&buf, &size);
if (!f)
return -ENOMEM;
fputs_unlocked(name, f);
fputs_unlocked("=\n", f);
LIST_FOREACH(items, item, *list) {
char buffer[CONST_MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
errno = 0;
if (!inet_ntop(item->family, &item->address, buffer, sizeof(buffer)))
return errno > 0 ? -errno : -EINVAL;
fprintf(f, "%s=%s/%u\n", name, buffer, item->prefixlen);
}
r = fflush_and_check(f);
if (r < 0)
return r;
unit_write_drop_in_private(u, mode, name, buf);
if (*list) {
r = bpf_firewall_supported();
if (r < 0)
return r;
if (r == 0)
log_warning("Transient unit %s configures an IP firewall, but the local system does not support BPF/cgroup firewalling.\n"
"Proceeding WITHOUT firewalling in effect!", u->id);
}
}
return 1;
}

View File

@ -20,6 +20,7 @@
#include "sd-bus.h"
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bus-common-errors.h"
#include "cgroup-util.h"
#include "dbus-job.h"
@ -1051,6 +1052,39 @@ int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bu
return sd_bus_send(NULL, reply, NULL);
}
static int property_get_ip_counter(
sd_bus *bus,
const char *path,
const char *interface,
const char *property,
sd_bus_message *reply,
void *userdata,
sd_bus_error *error) {
CGroupIPAccountingMetric metric;
uint64_t value = (uint64_t) -1;
Unit *u = userdata;
assert(bus);
assert(reply);
assert(property);
assert(u);
if (streq(property, "IPIngressBytes"))
metric = CGROUP_IP_INGRESS_BYTES;
else if (streq(property, "IPIngressPackets"))
metric = CGROUP_IP_INGRESS_PACKETS;
else if (streq(property, "IPEgressBytes"))
metric = CGROUP_IP_EGRESS_BYTES;
else {
assert(streq(property, "IPEgressPackets"));
metric = CGROUP_IP_EGRESS_PACKETS;
}
(void) unit_get_ip_accounting(u, metric, &value);
return sd_bus_message_append(reply, "t", value);
}
const sd_bus_vtable bus_unit_cgroup_vtable[] = {
SD_BUS_VTABLE_START(0),
SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
@ -1058,6 +1092,10 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),
SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0),
SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0),
SD_BUS_METHOD("GetProcesses", NULL, "a(sus)", bus_unit_method_get_processes, SD_BUS_VTABLE_UNPRIVILEGED),
SD_BUS_VTABLE_END
};

View File

@ -23,13 +23,14 @@
#include "dynamic-user.h"
#include "fd-util.h"
#include "fileio.h"
#include "fs-util.h"
#include "io-util.h"
#include "parse-util.h"
#include "random-util.h"
#include "stdio-util.h"
#include "string-util.h"
#include "user-util.h"
#include "fileio.h"
/* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
#define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
@ -245,8 +246,8 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
/* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
l = pwritev(lock_fd,
(struct iovec[2]) {
{ .iov_base = (char*) name, .iov_len = strlen(name) },
{ .iov_base = (char[1]) { '\n' }, .iov_len = 1 }
IOVEC_INIT_STRING(name),
IOVEC_INIT((char[1]) { '\n' }, 1),
}, 2, 0);
if (l < 0) {
(void) unlink(lock_path);
@ -271,10 +272,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
uid_t uid = UID_INVALID;
struct iovec iov = {
.iov_base = &uid,
.iov_len = sizeof(uid),
};
struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
union {
struct cmsghdr cmsghdr;
uint8_t buf[CMSG_SPACE(sizeof(int))];
@ -314,10 +312,7 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
}
static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
struct iovec iov = {
.iov_base = &uid,
.iov_len = sizeof(uid),
};
struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
union {
struct cmsghdr cmsghdr;
uint8_t buf[CMSG_SPACE(sizeof(int))];

View File

@ -2351,9 +2351,9 @@ static int send_user_lookup(
if (writev(user_lookup_fd,
(struct iovec[]) {
{ .iov_base = &uid, .iov_len = sizeof(uid) },
{ .iov_base = &gid, .iov_len = sizeof(gid) },
{ .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
IOVEC_INIT(&uid, sizeof(uid)),
IOVEC_INIT(&gid, sizeof(gid)),
IOVEC_INIT_STRING(unit->id) }, 3) < 0)
return -errno;
return 0;
@ -3150,6 +3150,7 @@ static int exec_child(
"EXECUTABLE=%s", command->path,
LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
LOG_UNIT_ID(unit),
LOG_UNIT_INVOCATION_ID(unit),
NULL);
log_close();
}
@ -3223,6 +3224,7 @@ int exec_spawn(Unit *unit,
LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
"EXECUTABLE=%s", command->path,
LOG_UNIT_ID(unit),
LOG_UNIT_INVOCATION_ID(unit),
NULL);
pid = fork();
if (pid < 0)
@ -3254,6 +3256,7 @@ int exec_spawn(Unit *unit,
log_struct_errno(LOG_ERR, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_UNIT_ID(unit),
LOG_UNIT_INVOCATION_ID(unit),
LOG_UNIT_MESSAGE(unit, "%s: %m",
error_message),
"EXECUTABLE=%s", command->path,
@ -3262,6 +3265,7 @@ int exec_spawn(Unit *unit,
log_struct_errno(LOG_INFO, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_UNIT_ID(unit),
LOG_UNIT_INVOCATION_ID(unit),
LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
command->path),
"EXECUTABLE=%s", command->path,
@ -3270,6 +3274,7 @@ int exec_spawn(Unit *unit,
log_struct_errno(LOG_ERR, r,
"MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
LOG_UNIT_ID(unit),
LOG_UNIT_INVOCATION_ID(unit),
LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
command->path),

View File

@ -0,0 +1,217 @@
/***
This file is part of systemd.
Copyright 2016 Daniel Mack
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <stdio.h>
#include <stdlib.h>
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "extract-word.h"
#include "hostname-util.h"
#include "ip-address-access.h"
#include "parse-util.h"
#include "string-util.h"
int config_parse_ip_address_access(
const char *unit,
const char *filename,
unsigned line,
const char *section,
unsigned section_line,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
IPAddressAccessItem **list = data;
const char *p;
int r;
assert(list);
if (isempty(rvalue)) {
*list = ip_address_access_free_all(*list);
return 0;
}
p = rvalue;
for (;;) {
_cleanup_free_ IPAddressAccessItem *a = NULL;
_cleanup_free_ char *word = NULL;
r = extract_first_word(&p, &word, NULL, 0);
if (r == 0)
break;
if (r == -ENOMEM)
return log_oom();
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
break;
}
a = new0(IPAddressAccessItem, 1);
if (!a)
return log_oom();
if (streq(word, "any")) {
/* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
a->family = AF_INET;
LIST_APPEND(items, *list, a);
a = new0(IPAddressAccessItem, 1);
if (!a)
return log_oom();
a->family = AF_INET6;
} else if (is_localhost(word)) {
/* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
a->family = AF_INET;
a->address.in.s_addr = htobe32(0x7f000000);
a->prefixlen = 8;
LIST_APPEND(items, *list, a);
a = new0(IPAddressAccessItem, 1);
if (!a)
return log_oom();
a->family = AF_INET6;
a->address.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
a->prefixlen = 128;
} else if (streq(word, "link-local")) {
/* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
a->family = AF_INET;
a->address.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
a->prefixlen = 16;
LIST_APPEND(items, *list, a);
a = new0(IPAddressAccessItem, 1);
if (!a)
return log_oom();
a->family = AF_INET6;
a->address.in6 = (struct in6_addr) {
.__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
};
a->prefixlen = 64;
} else if (streq(word, "multicast")) {
/* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
a->family = AF_INET;
a->address.in.s_addr = htobe32((UINT32_C(224) << 24));
a->prefixlen = 4;
LIST_APPEND(items, *list, a);
a = new0(IPAddressAccessItem, 1);
if (!a)
return log_oom();
a->family = AF_INET6;
a->address.in6 = (struct in6_addr) {
.__in6_u.__u6_addr32[0] = htobe32(0xff000000)
};
a->prefixlen = 8;
} else {
r = in_addr_prefix_from_string_auto(word, &a->family, &a->address, &a->prefixlen);
if (r < 0) {
log_syntax(unit, LOG_WARNING, filename, line, r, "Address prefix is invalid, ignoring assignment: %s", word);
return 0;
}
}
LIST_APPEND(items, *list, a);
a = NULL;
}
*list = ip_address_access_reduce(*list);
if (*list) {
r = bpf_firewall_supported();
if (r < 0)
return r;
if (r == 0)
log_warning("File %s:%u configures an IP firewall (%s=%s), but the local system does not support BPF/cgroup based firewalling.\n"
"Proceeding WITHOUT firewalling in effect!", filename, line, lvalue, rvalue);
}
return 0;
}
IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first) {
IPAddressAccessItem *next, *p = first;
while (p) {
next = p->items_next;
free(p);
p = next;
}
return NULL;
}
IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first) {
IPAddressAccessItem *a, *b, *tmp;
int r;
/* Drops all entries from the list that are covered by another entry in full, thus removing all redundant
* entries. */
LIST_FOREACH_SAFE(items, a, tmp, first) {
/* Drop irrelevant bits */
(void) in_addr_mask(a->family, &a->address, a->prefixlen);
LIST_FOREACH(items, b, first) {
if (a == b)
continue;
if (a->family != b->family)
continue;
if (b->prefixlen > a->prefixlen)
continue;
r = in_addr_prefix_covers(b->family,
&b->address,
b->prefixlen,
&a->address);
if (r <= 0)
continue;
/* b covers a fully, then let's drop a */
LIST_REMOVE(items, first, a);
free(a);
}
}
return first;
}

View File

@ -0,0 +1,38 @@
#pragma once
/***
This file is part of systemd.
Copyright 2016 Daniel Mack
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include "in-addr-util.h"
#include "list.h"
typedef struct IPAddressAccessItem IPAddressAccessItem;
struct IPAddressAccessItem {
int family;
unsigned char prefixlen;
union in_addr_union address;
LIST_FIELDS(IPAddressAccessItem, items);
};
int config_parse_ip_address_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first);
IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first);

View File

@ -806,21 +806,26 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
default:
log_struct(job_result_log_level[result],
LOG_MESSAGE("%s", buf),
"RESULT=%s", job_result_to_string(result),
"JOB_TYPE=%s", job_type_to_string(t),
"JOB_RESULT=%s", job_result_to_string(result),
LOG_UNIT_ID(u),
LOG_UNIT_INVOCATION_ID(u),
NULL);
return;
}
log_struct(job_result_log_level[result],
LOG_MESSAGE("%s", buf),
"RESULT=%s", job_result_to_string(result),
"JOB_TYPE=%s", job_type_to_string(t),
"JOB_RESULT=%s", job_result_to_string(result),
LOG_UNIT_ID(u),
LOG_UNIT_INVOCATION_ID(u),
mid,
NULL);
}
static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
assert(u);
/* No message if the job did not actually do anything due to failed condition. */
if (t == JOB_START && result == JOB_DONE && !u->condition_result)
@ -903,7 +908,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
* the unit itself. We don't treat JOB_CANCELED as failure in
* this context. And JOB_FAILURE is already handled by the
* unit itself. */
if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
if (IN_SET(result, JOB_TIMEOUT, JOB_DEPENDENCY)) {
log_struct(LOG_NOTICE,
"JOB_TYPE=%s", job_type_to_string(t),
"JOB_RESULT=%s", job_result_to_string(result),

View File

@ -174,6 +174,9 @@ $1.BlockIOWriteBandwidth, config_parse_blockio_bandwidth, 0,
$1.TasksAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.tasks_accounting)
$1.TasksMax, config_parse_tasks_max, 0, offsetof($1, cgroup_context.tasks_max)
$1.Delegate, config_parse_bool, 0, offsetof($1, cgroup_context.delegate)
$1.IPAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.ip_accounting)
$1.IPAddressAllow, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_allow)
$1.IPAddressDeny, config_parse_ip_address_access, 0, offsetof($1, cgroup_context.ip_address_deny)
$1.NetClass, config_parse_warn_compat, DISABLED_LEGACY, 0'
)m4_dnl
Unit.Description, config_parse_unit_string_printf, 0, offsetof(Unit, description)

View File

@ -128,6 +128,7 @@ static Set* arg_syscall_archs = NULL;
static FILE* arg_serialization = NULL;
static bool arg_default_cpu_accounting = false;
static bool arg_default_io_accounting = false;
static bool arg_default_ip_accounting = false;
static bool arg_default_blockio_accounting = false;
static bool arg_default_memory_accounting = false;
static bool arg_default_tasks_accounting = true;
@ -748,6 +749,7 @@ static int parse_config_file(void) {
{ "Manager", "DefaultLimitRTTIME", config_parse_limit, RLIMIT_RTTIME, arg_default_rlimit },
{ "Manager", "DefaultCPUAccounting", config_parse_bool, 0, &arg_default_cpu_accounting },
{ "Manager", "DefaultIOAccounting", config_parse_bool, 0, &arg_default_io_accounting },
{ "Manager", "DefaultIPAccounting", config_parse_bool, 0, &arg_default_ip_accounting },
{ "Manager", "DefaultBlockIOAccounting", config_parse_bool, 0, &arg_default_blockio_accounting },
{ "Manager", "DefaultMemoryAccounting", config_parse_bool, 0, &arg_default_memory_accounting },
{ "Manager", "DefaultTasksAccounting", config_parse_bool, 0, &arg_default_tasks_accounting },
@ -792,6 +794,7 @@ static void manager_set_defaults(Manager *m) {
m->default_start_limit_burst = arg_default_start_limit_burst;
m->default_cpu_accounting = arg_default_cpu_accounting;
m->default_io_accounting = arg_default_io_accounting;
m->default_ip_accounting = arg_default_ip_accounting;
m->default_blockio_accounting = arg_default_blockio_accounting;
m->default_memory_accounting = arg_default_memory_accounting;
m->default_tasks_accounting = arg_default_tasks_accounting;
@ -1202,6 +1205,26 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
return 0;
}
static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
int r;
assert(saved_rlimit);
assert(getuid() == 0);
/* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
* should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
* bump the value high enough for the root user. */
if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
if (r < 0)
return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
return 0;
}
static void test_usr(void) {
/* Check that /usr is not a separate fs */
@ -1385,7 +1408,7 @@ int main(int argc, char *argv[]) {
bool queue_default_job = false;
bool empty_etc = false;
char *switch_root_dir = NULL, *switch_root_init = NULL;
struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0);
struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
const char *error_message = NULL;
#ifdef HAVE_SYSV_COMPAT
@ -1812,9 +1835,11 @@ int main(int argc, char *argv[]) {
if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
log_warning_errno(errno, "Failed to make us a subreaper: %m");
if (arg_system)
if (arg_system) {
/* Bump up RLIMIT_NOFILE for systemd itself */
(void) bump_rlimit_nofile(&saved_rlimit_nofile);
(void) bump_rlimit_memlock(&saved_rlimit_memlock);
}
}
r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
@ -2048,6 +2073,8 @@ finish:
* its child processes */
if (saved_rlimit_nofile.rlim_cur > 0)
(void) setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
if (saved_rlimit_memlock.rlim_cur != (rlim_t) -1)
(void) setrlimit(RLIMIT_MEMLOCK, &saved_rlimit_memlock);
if (switch_root_dir) {
/* Kill all remaining processes from the

View File

@ -616,6 +616,9 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
m->default_timer_accuracy_usec = USEC_PER_MINUTE;
m->default_tasks_accounting = true;
m->default_tasks_max = UINT64_MAX;
m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
m->default_restart_usec = DEFAULT_RESTART_USEC;
#ifdef ENABLE_EFI
if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
@ -628,13 +631,13 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
m->unit_log_format_string = "UNIT=%s";
m->invocation_log_field = "INVOCATION_ID=";
m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
m->invocation_log_format_string = "INVOCATION_ID=%s";
} else {
m->unit_log_field = "USER_UNIT=";
m->unit_log_format_string = "USER_UNIT=%s";
m->invocation_log_field = "USER_INVOCATION_ID=";
m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
}
m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;

View File

@ -29,6 +29,7 @@
#include "cgroup-util.h"
#include "fdset.h"
#include "hashmap.h"
#include "ip-address-access.h"
#include "list.h"
#include "ratelimit.h"
@ -271,6 +272,7 @@ struct Manager {
bool default_io_accounting;
bool default_blockio_accounting;
bool default_tasks_accounting;
bool default_ip_accounting;
uint64_t default_tasks_max;
usec_t default_timer_accuracy_usec;

View File

@ -1,114 +1,118 @@
libcore_la_sources = '''
unit.c
unit.h
unit-printf.c
unit-printf.h
job.c
job.h
manager.c
manager.h
transaction.c
transaction.h
load-fragment.c
load-fragment.h
service.c
service.h
socket.c
socket.h
target.c
target.h
device.c
device.h
mount.c
mount.h
audit-fd.c
audit-fd.h
automount.c
automount.h
swap.c
swap.h
timer.c
timer.h
path.c
path.h
slice.c
slice.h
scope.c
scope.h
load-dropin.c
load-dropin.h
execute.c
execute.h
dynamic-user.c
dynamic-user.h
kill.c
kill.h
dbus.c
dbus.h
dbus-manager.c
dbus-manager.h
dbus-unit.c
dbus-unit.h
dbus-job.c
dbus-job.h
dbus-service.c
dbus-service.h
dbus-socket.c
dbus-socket.h
dbus-target.c
dbus-target.h
dbus-device.c
dbus-device.h
dbus-mount.c
dbus-mount.h
dbus-automount.c
dbus-automount.h
dbus-swap.c
dbus-swap.h
dbus-timer.c
dbus-timer.h
dbus-path.c
dbus-path.h
dbus-slice.c
dbus-slice.h
dbus-scope.c
dbus-scope.h
dbus-execute.c
dbus-execute.h
dbus-kill.c
dbus-kill.h
dbus-cgroup.c
dbus-cgroup.h
bpf-firewall.c
bpf-firewall.h
cgroup.c
cgroup.h
dbus-automount.c
dbus-automount.h
dbus-cgroup.c
dbus-cgroup.h
dbus-device.c
dbus-device.h
dbus-execute.c
dbus-execute.h
dbus-job.c
dbus-job.h
dbus-kill.c
dbus-kill.h
dbus-manager.c
dbus-manager.h
dbus-mount.c
dbus-mount.h
dbus-path.c
dbus-path.h
dbus-scope.c
dbus-scope.h
dbus-service.c
dbus-service.h
dbus-slice.c
dbus-slice.h
dbus-socket.c
dbus-socket.h
dbus-swap.c
dbus-swap.h
dbus-target.c
dbus-target.h
dbus-timer.c
dbus-timer.h
dbus-unit.c
dbus-unit.h
dbus.c
dbus.h
device.c
device.h
dynamic-user.c
dynamic-user.h
emergency-action.c
emergency-action.h
execute.c
execute.h
hostname-setup.c
hostname-setup.h
ima-setup.c
ima-setup.h
ip-address-access.c
ip-address-access.h
job.c
job.h
kill.c
kill.h
killall.c
killall.h
kmod-setup.c
kmod-setup.h
load-dropin.c
load-dropin.h
load-fragment.c
load-fragment.h
locale-setup.c
locale-setup.h
loopback-setup.c
loopback-setup.h
machine-id-setup.c
machine-id-setup.h
manager.c
manager.h
mount-setup.c
mount-setup.h
mount.c
mount.h
namespace.c
namespace.h
path.c
path.h
scope.c
scope.h
selinux-access.c
selinux-access.h
selinux-setup.c
selinux-setup.h
smack-setup.c
smack-setup.h
ima-setup.c
ima-setup.h
locale-setup.h
locale-setup.c
hostname-setup.c
hostname-setup.h
machine-id-setup.c
machine-id-setup.h
mount-setup.c
mount-setup.h
kmod-setup.c
kmod-setup.h
loopback-setup.h
loopback-setup.c
namespace.c
namespace.h
killall.h
killall.c
audit-fd.c
audit-fd.h
service.c
service.h
show-status.c
show-status.h
emergency-action.c
emergency-action.h
slice.c
slice.h
smack-setup.c
smack-setup.h
socket.c
socket.h
swap.c
swap.h
target.c
target.h
timer.c
timer.h
transaction.c
transaction.h
unit-printf.c
unit-printf.h
unit.c
unit.h
'''.split()
load_fragment_gperf_gperf = custom_target(

View File

@ -736,6 +736,7 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {
exec_context_dump(&m->exec_context, f, prefix);
kill_context_dump(&m->kill_context, f, prefix);
cgroup_context_dump(&m->cgroup_context, f, prefix);
}
static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
@ -753,9 +754,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
assert(_pid);
(void) unit_realize_cgroup(UNIT(m));
if (m->reset_cpu_usage) {
(void) unit_reset_cpu_usage(UNIT(m));
m->reset_cpu_usage = false;
if (m->reset_accounting) {
(void) unit_reset_cpu_accounting(UNIT(m));
(void) unit_reset_ip_accounting(UNIT(m));
m->reset_accounting = false;
}
r = unit_setup_exec_runtime(UNIT(m));
@ -1043,7 +1045,7 @@ static int mount_start(Unit *u) {
m->result = MOUNT_SUCCESS;
m->reload_result = MOUNT_SUCCESS;
m->reset_cpu_usage = true;
m->reset_accounting = true;
mount_enter_mounting(m);
return 1;

View File

@ -67,7 +67,7 @@ struct Mount {
bool just_mounted:1;
bool just_changed:1;
bool reset_cpu_usage:1;
bool reset_accounting:1;
bool sloppy_options;

View File

@ -333,7 +333,8 @@ static int scope_start(Unit *u) {
return r;
(void) unit_realize_cgroup(u);
(void) unit_reset_cpu_usage(u);
(void) unit_reset_cpu_accounting(u);
(void) unit_reset_ip_accounting(u);
r = unit_attach_pids_to_cgroup(u);
if (r < 0) {

View File

@ -803,6 +803,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
"%sFile Descriptor Store Current: %u\n",
prefix, s->n_fd_store_max,
prefix, s->n_fd_store);
cgroup_context_dump(&s->cgroup_context, f, prefix);
}
static int service_load_pid_file(Service *s, bool may_warn) {
@ -1242,9 +1244,10 @@ static int service_spawn(
}
(void) unit_realize_cgroup(UNIT(s));
if (s->reset_cpu_usage) {
(void) unit_reset_cpu_usage(UNIT(s));
s->reset_cpu_usage = false;
if (s->reset_accounting) {
(void) unit_reset_cpu_accounting(UNIT(s));
(void) unit_reset_ip_accounting(UNIT(s));
s->reset_accounting = false;
}
r = unit_setup_exec_runtime(UNIT(s));
@ -1953,6 +1956,7 @@ static void service_enter_restart(Service *s) {
log_struct(LOG_INFO,
"MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
LOG_UNIT_ID(UNIT(s)),
LOG_UNIT_INVOCATION_ID(UNIT(s)),
LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
"N_RESTARTS=%u", s->n_restarts,
NULL);
@ -2136,7 +2140,7 @@ static int service_start(Unit *u) {
s->main_pid_known = false;
s->main_pid_alien = false;
s->forbid_restart = false;
s->reset_cpu_usage = true;
s->reset_accounting = true;
s->status_text = mfree(s->status_text);
s->status_errno = 0;
@ -2948,6 +2952,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
"EXIT_CODE=%s", sigchld_code_to_string(code),
"EXIT_STATUS=%i", status,
LOG_UNIT_ID(u),
LOG_UNIT_INVOCATION_ID(u),
NULL);
if (s->result == SERVICE_SUCCESS)

View File

@ -165,7 +165,7 @@ struct Service {
bool forbid_restart:1;
bool start_timeout_defined:1;
bool reset_cpu_usage:1;
bool reset_accounting:1;
char *bus_name;
char *bus_name_owner; /* unique name of the current owner */

View File

@ -93,21 +93,21 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
}
if (prev_ephemeral)
IOVEC_SET_STRING(iovec[n++], "\r" ANSI_ERASE_TO_END_OF_LINE);
iovec[n++] = IOVEC_MAKE_STRING("\r" ANSI_ERASE_TO_END_OF_LINE);
prev_ephemeral = ephemeral;
if (status) {
if (!isempty(status)) {
IOVEC_SET_STRING(iovec[n++], "[");
IOVEC_SET_STRING(iovec[n++], status);
IOVEC_SET_STRING(iovec[n++], "] ");
iovec[n++] = IOVEC_MAKE_STRING("[");
iovec[n++] = IOVEC_MAKE_STRING(status);
iovec[n++] = IOVEC_MAKE_STRING("] ");
} else
IOVEC_SET_STRING(iovec[n++], status_indent);
iovec[n++] = IOVEC_MAKE_STRING(status_indent);
}
IOVEC_SET_STRING(iovec[n++], s);
iovec[n++] = IOVEC_MAKE_STRING(s);
if (!ephemeral)
IOVEC_SET_STRING(iovec[n++], "\n");
iovec[n++] = IOVEC_MAKE_STRING("\n");
if (writev(fd, iovec, n) < 0)
return -errno;

View File

@ -222,7 +222,8 @@ static int slice_start(Unit *u) {
return r;
(void) unit_realize_cgroup(u);
(void) unit_reset_cpu_usage(u);
(void) unit_reset_cpu_accounting(u);
(void) unit_reset_ip_accounting(u);
slice_set_state(t, SLICE_ACTIVE);
return 1;

View File

@ -29,6 +29,7 @@
#include <linux/sctp.h>
#include "alloc-util.h"
#include "bpf-firewall.h"
#include "bus-error.h"
#include "bus-util.h"
#include "copy.h"
@ -37,6 +38,7 @@
#include "exit-status.h"
#include "fd-util.h"
#include "format-util.h"
#include "in-addr-util.h"
#include "io-util.h"
#include "label.h"
#include "log.h"
@ -56,7 +58,6 @@
#include "unit-name.h"
#include "unit.h"
#include "user-util.h"
#include "in-addr-util.h"
struct SocketPeer {
unsigned n_ref;
@ -852,6 +853,8 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {
exec_command_dump_list(s->exec_command[c], f, prefix2);
}
cgroup_context_dump(&s->cgroup_context, f, prefix);
}
static int instance_from_socket(int fd, unsigned nr, char **instance) {
@ -1435,6 +1438,102 @@ no_label:
return 0;
}
static int socket_address_listen_do(
Socket *s,
const SocketAddress *address,
const char *label) {
assert(s);
assert(address);
return socket_address_listen(
address,
SOCK_CLOEXEC|SOCK_NONBLOCK,
s->backlog,
s->bind_ipv6_only,
s->bind_to_device,
s->reuse_port,
s->free_bind,
s->transparent,
s->directory_mode,
s->socket_mode,
label);
}
static int socket_address_listen_in_cgroup(
Socket *s,
const SocketAddress *address,
const char *label) {
_cleanup_close_pair_ int pair[2] = { -1, -1 };
int fd, r;
pid_t pid;
assert(s);
assert(address);
/* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
* in which the socket is actually created. This way we ensure the socket is actually properly attached to the
* unit's cgroup for the purpose of BPF filtering and such. */
if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
r = bpf_firewall_supported();
if (r < 0)
return r;
if (r == 0) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
goto shortcut;
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
r = unit_fork_helper_process(UNIT(s), &pid);
if (r < 0)
return log_unit_error_errno(UNIT(s), r, "Failed to fork off listener stub process: %m");
if (r == 0) {
/* Child */
pair[0] = safe_close(pair[0]);
fd = socket_address_listen_do(s, address, label);
if (fd < 0) {
log_unit_error_errno(UNIT(s), fd, "Failed to create listening socket: %m");
_exit(EXIT_FAILURE);
}
r = send_one_fd(pair[1], fd, 0);
if (r < 0) {
log_unit_error_errno(UNIT(s), r, "Failed to send listening socket to parent: %m");
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
pair[1] = safe_close(pair[1]);
fd = receive_one_fd(pair[0], 0);
/* We synchronously wait for the helper, as it shouldn't be slow */
r = wait_for_terminate_and_warn("listen-cgroup-helper", pid, false);
if (r < 0) {
safe_close(fd);
return r;
}
if (fd < 0)
return log_unit_error_errno(UNIT(s), fd, "Failed to receive listening socket: %m");
return fd;
shortcut:
fd = socket_address_listen_do(s, address, label);
if (fd < 0)
return log_error_errno(fd, "Failed to create listening socket: %m");
return fd;
}
static int socket_open_fds(Socket *s) {
_cleanup_(mac_selinux_freep) char *label = NULL;
bool know_label = false;
@ -1478,18 +1577,7 @@ static int socket_open_fds(Socket *s) {
break;
}
r = socket_address_listen(
&p->address,
SOCK_CLOEXEC|SOCK_NONBLOCK,
s->backlog,
s->bind_ipv6_only,
s->bind_to_device,
s->reuse_port,
s->free_bind,
s->transparent,
s->directory_mode,
s->socket_mode,
label);
r = socket_address_listen_in_cgroup(s, &p->address, label);
if (r < 0)
goto rollback;
@ -1773,9 +1861,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
assert(_pid);
(void) unit_realize_cgroup(UNIT(s));
if (s->reset_cpu_usage) {
(void) unit_reset_cpu_usage(UNIT(s));
s->reset_cpu_usage = false;
if (s->reset_accounting) {
(void) unit_reset_cpu_accounting(UNIT(s));
(void) unit_reset_ip_accounting(UNIT(s));
s->reset_accounting = false;
}
r = unit_setup_exec_runtime(UNIT(s));
@ -1826,27 +1915,23 @@ static int socket_chown(Socket *s, pid_t *_pid) {
/* We have to resolve the user names out-of-process, hence
* let's fork here. It's messy, but well, what can we do? */
pid = fork();
if (pid < 0)
return -errno;
if (pid == 0) {
SocketPort *p;
r = unit_fork_helper_process(UNIT(s), &pid);
if (r < 0)
return r;
if (r == 0) {
uid_t uid = UID_INVALID;
gid_t gid = GID_INVALID;
int ret;
SocketPort *p;
(void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
(void) ignore_signals(SIGPIPE, -1);
log_forget_fds();
/* Child */
if (!isempty(s->user)) {
const char *user = s->user;
r = get_user_creds(&user, &uid, &gid, NULL, NULL);
if (r < 0) {
ret = EXIT_USER;
goto fail_child;
log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user);
_exit(EXIT_USER);
}
}
@ -1855,8 +1940,8 @@ static int socket_chown(Socket *s, pid_t *_pid) {
r = get_group_creds(&group, &gid);
if (r < 0) {
ret = EXIT_GROUP;
goto fail_child;
log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group);
_exit(EXIT_GROUP);
}
}
@ -1872,19 +1957,12 @@ static int socket_chown(Socket *s, pid_t *_pid) {
continue;
if (chown(path, uid, gid) < 0) {
r = -errno;
ret = EXIT_CHOWN;
goto fail_child;
log_unit_error_errno(UNIT(s), errno, "Failed to chown(): %m");
_exit(EXIT_CHOWN);
}
}
_exit(0);
fail_child:
log_open();
log_error_errno(r, "Failed to chown socket at step %s: %m", exit_status_to_string(ret, EXIT_STATUS_SYSTEMD));
_exit(ret);
_exit(EXIT_SUCCESS);
}
r = unit_watch_pid(UNIT(s), pid);
@ -2371,7 +2449,7 @@ static int socket_start(Unit *u) {
return r;
s->result = SOCKET_SUCCESS;
s->reset_cpu_usage = true;
s->reset_accounting = true;
socket_enter_start_pre(s);
return 1;
@ -2696,6 +2774,97 @@ _pure_ static bool socket_check_gc(Unit *u) {
return s->n_connections > 0;
}
static int socket_accept_do(Socket *s, int fd) {
int cfd;
assert(s);
assert(fd >= 0);
for (;;) {
cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
if (cfd < 0) {
if (errno == EINTR)
continue;
return -errno;
}
break;
}
return cfd;
}
static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) {
_cleanup_close_pair_ int pair[2] = { -1, -1 };
int cfd, r;
pid_t pid;
assert(s);
assert(p);
assert(fd >= 0);
/* Similar to socket_address_listen_in_cgroup(), but for accept() rathern than socket(): make sure that any
* connection socket is also properly associated with the cgroup. */
if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6))
goto shortcut;
r = bpf_firewall_supported();
if (r < 0)
return r;
if (r == 0)
goto shortcut;
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
r = unit_fork_helper_process(UNIT(s), &pid);
if (r < 0)
return log_unit_error_errno(UNIT(s), r, "Failed to fork off accept stub process: %m");
if (r == 0) {
/* Child */
pair[0] = safe_close(pair[0]);
cfd = socket_accept_do(s, fd);
if (cfd < 0) {
log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
_exit(EXIT_FAILURE);
}
r = send_one_fd(pair[1], cfd, 0);
if (r < 0) {
log_unit_error_errno(UNIT(s), r, "Failed to send connection socket to parent: %m");
_exit(EXIT_FAILURE);
}
_exit(EXIT_SUCCESS);
}
pair[1] = safe_close(pair[1]);
cfd = receive_one_fd(pair[0], 0);
/* We synchronously wait for the helper, as it shouldn't be slow */
r = wait_for_terminate_and_warn("accept-cgroup-helper", pid, false);
if (r < 0) {
safe_close(cfd);
return r;
}
if (cfd < 0)
return log_unit_error_errno(UNIT(s), cfd, "Failed to receive connection socket: %m");
return cfd;
shortcut:
cfd = socket_accept_do(s, fd);
if (cfd < 0)
return log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
return cfd;
}
static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
SocketPort *p = userdata;
int cfd = -1;
@ -2721,20 +2890,9 @@ static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
p->type == SOCKET_SOCKET &&
socket_address_can_accept(&p->address)) {
for (;;) {
cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
if (cfd < 0) {
if (errno == EINTR)
continue;
log_unit_error_errno(UNIT(p->socket), errno, "Failed to accept socket: %m");
goto fail;
}
break;
}
cfd = socket_accept_in_cgroup(p->socket, p, fd);
if (cfd < 0)
goto fail;
socket_apply_socket_options(p->socket, cfd);
}

View File

@ -161,7 +161,7 @@ struct Socket {
char *user, *group;
bool reset_cpu_usage:1;
bool reset_accounting:1;
char *fdname;

View File

@ -602,6 +602,7 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {
exec_context_dump(&s->exec_context, f, prefix);
kill_context_dump(&s->kill_context, f, prefix);
cgroup_context_dump(&s->cgroup_context, f, prefix);
}
static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
@ -619,9 +620,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
assert(_pid);
(void) unit_realize_cgroup(UNIT(s));
if (s->reset_cpu_usage) {
(void) unit_reset_cpu_usage(UNIT(s));
s->reset_cpu_usage = false;
if (s->reset_accounting) {
(void) unit_reset_cpu_accounting(UNIT(s));
(void) unit_reset_ip_accounting(UNIT(s));
s->reset_accounting = false;
}
r = unit_setup_exec_runtime(UNIT(s));
@ -860,7 +862,7 @@ static int swap_start(Unit *u) {
return r;
s->result = SWAP_SUCCESS;
s->reset_cpu_usage = true;
s->reset_accounting = true;
swap_enter_activating(s);
return 1;

View File

@ -70,7 +70,7 @@ struct Swap {
bool is_active:1;
bool just_activated:1;
bool reset_cpu_usage:1;
bool reset_accounting:1;
SwapResult result;

View File

@ -40,6 +40,7 @@
#DefaultEnvironment=
#DefaultCPUAccounting=no
#DefaultIOAccounting=no
#DefaultIPAccounting=no
#DefaultBlockIOAccounting=no
#DefaultMemoryAccounting=no
#DefaultTasksAccounting=yes
@ -60,3 +61,5 @@
#DefaultLimitNICE=
#DefaultLimitRTPRIO=
#DefaultLimitRTTIME=
#IPAddressAllow=
#IPAddressDeny=

View File

@ -35,9 +35,11 @@
#include "dropin.h"
#include "escape.h"
#include "execute.h"
#include "fd-util.h"
#include "fileio-label.h"
#include "format-util.h"
#include "id128-util.h"
#include "io-util.h"
#include "load-dropin.h"
#include "load-fragment.h"
#include "log.h"
@ -103,6 +105,13 @@ Unit *unit_new(Manager *m, size_t size) {
u->ref_gid = GID_INVALID;
u->cpu_usage_last = NSEC_INFINITY;
u->ip_accounting_ingress_map_fd = -1;
u->ip_accounting_egress_map_fd = -1;
u->ipv4_allow_map_fd = -1;
u->ipv6_allow_map_fd = -1;
u->ipv4_deny_map_fd = -1;
u->ipv6_deny_map_fd = -1;
RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);
@ -153,9 +162,11 @@ static void unit_init(Unit *u) {
cc->cpu_accounting = u->manager->default_cpu_accounting;
cc->io_accounting = u->manager->default_io_accounting;
cc->ip_accounting = u->manager->default_ip_accounting;
cc->blockio_accounting = u->manager->default_blockio_accounting;
cc->memory_accounting = u->manager->default_memory_accounting;
cc->tasks_accounting = u->manager->default_tasks_accounting;
cc->ip_accounting = u->manager->default_ip_accounting;
if (u->type != UNIT_SLICE)
cc->tasks_max = u->manager->default_tasks_max;
@ -610,6 +621,17 @@ void unit_free(Unit *u) {
while (u->refs)
unit_ref_unset(u->refs);
safe_close(u->ip_accounting_ingress_map_fd);
safe_close(u->ip_accounting_egress_map_fd);
safe_close(u->ipv4_allow_map_fd);
safe_close(u->ipv6_allow_map_fd);
safe_close(u->ipv4_deny_map_fd);
safe_close(u->ipv6_deny_map_fd);
bpf_program_unref(u->ip_bpf_ingress);
bpf_program_unref(u->ip_bpf_egress);
free(u);
}
@ -1523,6 +1545,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
log_struct(LOG_INFO,
LOG_MESSAGE("%s", buf),
LOG_UNIT_ID(u),
LOG_UNIT_INVOCATION_ID(u),
mid,
NULL);
}
@ -1979,6 +2002,134 @@ void unit_trigger_notify(Unit *u) {
UNIT_VTABLE(other)->trigger_notify(other, u);
}
static int unit_log_resources(Unit *u) {
struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4];
size_t n_message_parts = 0, n_iovec = 0;
char* message_parts[3 + 1], *t;
nsec_t nsec = NSEC_INFINITY;
CGroupIPAccountingMetric m;
size_t i;
int r;
const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
[CGROUP_IP_INGRESS_BYTES] = "IP_METRIC_INGRESS_BYTES",
[CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
[CGROUP_IP_EGRESS_BYTES] = "IP_METRIC_EGRESS_BYTES",
[CGROUP_IP_EGRESS_PACKETS] = "IP_METRIC_EGRESS_PACKETS",
};
assert(u);
/* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
* accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
* information and the complete data in structured fields. */
(void) unit_get_cpu_usage(u, &nsec);
if (nsec != NSEC_INFINITY) {
char buf[FORMAT_TIMESPAN_MAX] = "";
/* Format the CPU time for inclusion in the structured log message */
if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
r = log_oom();
goto finish;
}
iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
/* Format the CPU time for inclusion in the human language message string */
format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
t = strjoin(n_message_parts > 0 ? "consumed " : "Consumed ", buf, " CPU time");
if (!t) {
r = log_oom();
goto finish;
}
message_parts[n_message_parts++] = t;
}
for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
char buf[FORMAT_BYTES_MAX] = "";
uint64_t value = UINT64_MAX;
assert(ip_fields[m]);
(void) unit_get_ip_accounting(u, m, &value);
if (value == UINT64_MAX)
continue;
/* Format IP accounting data for inclusion in the structured log message */
if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
r = log_oom();
goto finish;
}
iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
/* Format the IP accounting data for inclusion in the human language message string, but only for the
* bytes counters (and not for the packets counters) */
if (m == CGROUP_IP_INGRESS_BYTES)
t = strjoin(n_message_parts > 0 ? "received " : "Received ",
format_bytes(buf, sizeof(buf), value),
" IP traffic");
else if (m == CGROUP_IP_EGRESS_BYTES)
t = strjoin(n_message_parts > 0 ? "sent " : "Sent ",
format_bytes(buf, sizeof(buf), value),
" IP traffic");
else
continue;
if (!t) {
r = log_oom();
goto finish;
}
message_parts[n_message_parts++] = t;
}
/* Is there any accounting data available at all? */
if (n_iovec == 0) {
r = 0;
goto finish;
}
if (n_message_parts == 0)
t = strjoina("MESSAGE=", u->id, ": Completed");
else {
_cleanup_free_ char *joined;
message_parts[n_message_parts] = NULL;
joined = strv_join(message_parts, ", ");
if (!joined) {
r = log_oom();
goto finish;
}
t = strjoina("MESSAGE=", u->id, ": ", joined);
}
/* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
* and hence don't increase n_iovec for them */
iovec[n_iovec] = IOVEC_MAKE_STRING(t);
iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
t = strjoina(u->manager->unit_log_field, u->id);
iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
log_struct_iovec(LOG_INFO, iovec, n_iovec + 4);
r = 0;
finish:
for (i = 0; i < n_message_parts; i++)
free(message_parts[i]);
for (i = 0; i < n_iovec; i++)
free(iovec[i].iov_base);
return r;
}
void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success) {
Manager *m;
bool unexpected;
@ -2150,28 +2301,33 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
manager_send_unit_plymouth(m, u);
} else {
/* We don't care about D-Bus going down here, since we'll get an asynchronous notification for it
* anyway. */
/* We don't care about D-Bus here, since we'll get an
* asynchronous notification for it anyway. */
if (UNIT_IS_INACTIVE_OR_FAILED(ns) &&
!UNIT_IS_INACTIVE_OR_FAILED(os)
&& !MANAGER_IS_RELOADING(m)) {
if (u->type == UNIT_SERVICE &&
UNIT_IS_INACTIVE_OR_FAILED(ns) &&
!UNIT_IS_INACTIVE_OR_FAILED(os) &&
!MANAGER_IS_RELOADING(m)) {
/* This unit just stopped/failed. */
if (u->type == UNIT_SERVICE) {
/* Hmm, if there was no start record written
* write it now, so that we always have a nice
* pair */
if (!u->in_audit) {
manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
/* Hmm, if there was no start record written
* write it now, so that we always have a nice
* pair */
if (!u->in_audit) {
manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
if (ns == UNIT_INACTIVE)
manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
} else
/* Write audit record if we have just finished shutting down */
manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
if (ns == UNIT_INACTIVE)
manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
} else
/* Write audit record if we have just finished shutting down */
manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
u->in_audit = false;
u->in_audit = false;
}
/* Write a log message about consumed resources */
unit_log_resources(u);
}
}
@ -2749,7 +2905,15 @@ static int unit_serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask)
return r;
}
static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
[CGROUP_IP_INGRESS_BYTES] = "ip-accounting-ingress-bytes",
[CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
[CGROUP_IP_EGRESS_BYTES] = "ip-accounting-egress-bytes",
[CGROUP_IP_EGRESS_PACKETS] = "ip-accounting-egress-packets",
};
int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
CGroupIPAccountingMetric m;
int r;
assert(u);
@ -2798,6 +2962,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
(void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
(void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);
if (uid_is_valid(u->ref_uid))
unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
@ -2809,6 +2974,14 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
bus_track_serialize(u->bus_track, f, "ref");
for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
uint64_t v;
r = unit_get_ip_accounting(u, m, &v);
if (r >= 0)
unit_serialize_item_format(u, f, ip_accounting_metric_field[m], "%" PRIu64, v);
}
if (serialize_jobs) {
if (u->job) {
fprintf(f, "job\n");
@ -2915,6 +3088,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
for (;;) {
char line[LINE_MAX], *l, *v;
CGroupIPAccountingMetric m;
size_t k;
if (!fgets(line, sizeof(line), f)) {
@ -3069,6 +3243,20 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
continue;
} else if (streq(l, "cgroup-bpf-realized")) {
int i;
r = safe_atoi(v, &i);
if (r < 0)
log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
else
u->cgroup_bpf_state =
i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
i > 0 ? UNIT_CGROUP_BPF_ON :
UNIT_CGROUP_BPF_OFF;
continue;
} else if (streq(l, "ref-uid")) {
uid_t uid;
@ -3111,6 +3299,21 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
continue;
}
/* Check if this is an IP accounting metric serialization field */
for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++)
if (streq(l, ip_accounting_metric_field[m]))
break;
if (m < _CGROUP_IP_ACCOUNTING_METRIC_MAX) {
uint64_t c;
r = safe_atou64(v, &c);
if (r < 0)
log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
else
u->ip_accounting_extra[m] = c;
continue;
}
if (unit_can_serialize(u)) {
if (rt) {
r = exec_runtime_deserialize_item(u, rt, l, v, fds);
@ -3137,6 +3340,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
if (!dual_timestamp_is_set(&u->state_change_timestamp))
dual_timestamp_get(&u->state_change_timestamp);
/* Let's make sure that everything that is deserialized also gets any potential new cgroup settings applied
* after we are done. For that we invalidate anything already realized, so that we can realize it again. */
unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
unit_invalidate_cgroup_bpf(u);
return 0;
}
@ -4169,6 +4377,7 @@ void unit_warn_if_dir_nonempty(Unit *u, const char* where) {
log_struct(LOG_NOTICE,
"MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
LOG_UNIT_ID(u),
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "Directory %s to mount over is not empty, mounting anyway.", where),
"WHERE=%s", where,
NULL);
@ -4191,6 +4400,7 @@ int unit_fail_if_symlink(Unit *u, const char* where) {
log_struct(LOG_ERR,
"MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
LOG_UNIT_ID(u),
LOG_UNIT_INVOCATION_ID(u),
LOG_UNIT_MESSAGE(u, "Mount on symlink %s not allowed.", where),
"WHERE=%s", where,
NULL);
@ -4436,3 +4646,43 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
p->cgroup_path = u->cgroup_path;
SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
}
int unit_fork_helper_process(Unit *u, pid_t *ret) {
pid_t pid;
int r;
assert(u);
assert(ret);
/* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child,
* and > 0 in the parent. The pid parameter is always filled in with the child's PID. */
(void) unit_realize_cgroup(u);
pid = fork();
if (pid < 0)
return -errno;
if (pid == 0) {
(void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
(void) ignore_signals(SIGPIPE, -1);
log_close();
log_open();
if (u->cgroup_path) {
r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
if (r < 0) {
log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", u->cgroup_path);
_exit(EXIT_CGROUP);
}
}
*ret = getpid_cached();
return 0;
}
*ret = pid;
return 1;
}

View File

@ -28,11 +28,13 @@ typedef struct UnitVTable UnitVTable;
typedef struct UnitRef UnitRef;
typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;
#include "bpf-program.h"
#include "condition.h"
#include "emergency-action.h"
#include "install.h"
#include "list.h"
#include "unit-name.h"
#include "cgroup.h"
typedef enum KillOperation {
KILL_TERMINATE,
@ -70,6 +72,12 @@ struct UnitRef {
LIST_FIELDS(UnitRef, refs);
};
typedef enum UnitCGroupBPFState {
UNIT_CGROUP_BPF_OFF = 0,
UNIT_CGROUP_BPF_ON = 1,
UNIT_CGROUP_BPF_INVALIDATED = -1,
} UnitCGroupBPFState;
struct Unit {
Manager *manager;
@ -205,6 +213,20 @@ struct Unit {
CGroupMask cgroup_members_mask;
int cgroup_inotify_wd;
/* IP BPF Firewalling/accounting */
int ip_accounting_ingress_map_fd;
int ip_accounting_egress_map_fd;
int ipv4_allow_map_fd;
int ipv6_allow_map_fd;
int ipv4_deny_map_fd;
int ipv6_deny_map_fd;
BPFProgram *ip_bpf_ingress;
BPFProgram *ip_bpf_egress;
uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
/* How to start OnFailure units */
JobMode on_failure_job_mode;
@ -254,6 +276,8 @@ struct Unit {
bool cgroup_members_mask_valid:1;
bool cgroup_subtree_mask_valid:1;
UnitCGroupBPFState cgroup_bpf_state:2;
bool start_limit_hit:1;
/* Did we already invoke unit_coldplug() for this unit? */
@ -661,6 +685,8 @@ bool unit_shall_confirm_spawn(Unit *u);
void unit_set_exec_params(Unit *s, ExecParameters *p);
int unit_fork_helper_process(Unit *u, pid_t *ret);
/* Macros which append UNIT= or USER_UNIT= to the message */
#define log_unit_full(unit, level, error, ...) \
@ -684,3 +710,4 @@ void unit_set_exec_params(Unit *s, ExecParameters *p);
#define LOG_UNIT_MESSAGE(unit, fmt, ...) "MESSAGE=%s: " fmt, (unit)->id, ##__VA_ARGS__
#define LOG_UNIT_ID(unit) (unit)->manager->unit_log_format_string, (unit)->id
#define LOG_UNIT_INVOCATION_ID(unit) (unit)->manager->invocation_log_format_string, (unit)->invocation_id_string

View File

@ -749,7 +749,7 @@ static int submit_coredump(
const char *coredump_filename;
coredump_filename = strjoina("COREDUMP_FILENAME=", filename);
IOVEC_SET_STRING(iovec[n_iovec++], coredump_filename);
iovec[n_iovec++] = IOVEC_MAKE_STRING(coredump_filename);
} else if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
coredump_size, arg_external_size_max);
@ -804,10 +804,10 @@ log:
return 0;
}
IOVEC_SET_STRING(iovec[n_iovec++], core_message);
iovec[n_iovec++] = IOVEC_MAKE_STRING(core_message);
if (truncated)
IOVEC_SET_STRING(iovec[n_iovec++], "COREDUMP_TRUNCATED=1");
iovec[n_iovec++] = IOVEC_MAKE_STRING("COREDUMP_TRUNCATED=1");
/* Optionally store the entire coredump in the journal */
if (arg_storage == COREDUMP_STORAGE_JOURNAL) {
@ -817,11 +817,9 @@ log:
/* Store the coredump itself in the journal */
r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
if (r >= 0) {
iovec[n_iovec].iov_base = coredump_data;
iovec[n_iovec].iov_len = sz;
n_iovec++;
} else
if (r >= 0)
iovec[n_iovec++] = IOVEC_MAKE(coredump_data, sz);
else
log_warning_errno(r, "Failed to attach the core to the journal entry: %m");
} else
log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
@ -1070,7 +1068,7 @@ static char* set_iovec_field(struct iovec iovec[27], size_t *n_iovec, const char
x = strappend(field, value);
if (x)
IOVEC_SET_STRING(iovec[(*n_iovec)++], x);
iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
return x;
}
@ -1162,7 +1160,7 @@ static int gather_pid_metadata(
if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
r = asprintf(&t, "COREDUMP_OWNER_UID=" UID_FMT, owner_uid);
if (r > 0)
IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
}
if (sd_pid_get_slice(pid, &t) >= 0)
@ -1218,7 +1216,7 @@ static int gather_pid_metadata(
t = strjoin("COREDUMP_TIMESTAMP=", context[CONTEXT_TIMESTAMP], "000000", NULL);
if (t)
IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo))
set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo));
@ -1253,10 +1251,10 @@ static int process_kernel(int argc, char* argv[]) {
n_iovec = n_to_free;
IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
assert_cc(2 == LOG_CRIT);
IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
assert(n_iovec <= ELEMENTSOF(iovec));
@ -1344,15 +1342,15 @@ static int process_backtrace(int argc, char *argv[]) {
r = log_oom();
goto finish;
}
IOVEC_SET_STRING(iovec[n_iovec++], message);
iovec[n_iovec++] = IOVEC_MAKE_STRING(message);
} else {
for (i = 0; i < importer.iovw.count; i++)
iovec[n_iovec++] = importer.iovw.iovec[i];
}
IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
assert_cc(2 == LOG_CRIT);
IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");
assert(n_iovec <= n_allocated);

View File

@ -114,9 +114,8 @@ _public_ int sd_journal_printv(int priority, const char *format, va_list ap) {
if (isempty(buffer+8))
return 0;
zero(iov);
IOVEC_SET_STRING(iov[0], buffer);
IOVEC_SET_STRING(iov[1], p);
iov[0] = IOVEC_MAKE_STRING(buffer);
iov[1] = IOVEC_MAKE_STRING(p);
return sd_journal_sendv(iov, 2);
}
@ -167,7 +166,7 @@ _printf_(1, 0) static int fill_iovec_sprintf(const char *format, va_list ap, int
(void) strstrip(buffer); /* strip trailing whitespace, keep prefixing whitespace */
IOVEC_SET_STRING(iov[i++], buffer);
iov[i++] = IOVEC_MAKE_STRING(buffer);
format = va_arg(ap, char *);
}
@ -259,27 +258,19 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
* newline, then the size (64bit LE), followed
* by the data and a final newline */
w[j].iov_base = iov[i].iov_base;
w[j].iov_len = c - (char*) iov[i].iov_base;
j++;
IOVEC_SET_STRING(w[j++], "\n");
w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base);
w[j++] = IOVEC_MAKE_STRING("\n");
l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
w[j].iov_base = &l[i];
w[j].iov_len = sizeof(uint64_t);
j++;
w[j].iov_base = c + 1;
w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1;
j++;
w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t));
w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
} else
/* Nothing special? Then just add the line and
* append a newline */
w[j++] = iov[i];
IOVEC_SET_STRING(w[j++], "\n");
w[j++] = IOVEC_MAKE_STRING("\n");
}
if (!have_syslog_identifier &&
@ -291,9 +282,9 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
* since everything else is much nicer to retrieve
* from the outside. */
IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER=");
IOVEC_SET_STRING(w[j++], program_invocation_short_name);
IOVEC_SET_STRING(w[j++], "\n");
w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=");
w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name);
w[j++] = IOVEC_MAKE_STRING("\n");
}
fd = journal_fd();
@ -380,9 +371,9 @@ static int fill_iovec_perror_and_send(const char *message, int skip, struct iove
xsprintf(error, "ERRNO=%i", _saved_errno_);
assert_cc(3 == LOG_ERR);
IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3");
IOVEC_SET_STRING(iov[skip+1], buffer);
IOVEC_SET_STRING(iov[skip+2], error);
iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3");
iov[skip+1] = IOVEC_MAKE_STRING(buffer);
iov[skip+2] = IOVEC_MAKE_STRING(error);
return sd_journal_sendv(iov, skip + 3);
}
@ -492,20 +483,19 @@ _public_ int sd_journal_printv_with_location(int priority, const char *file, con
* CODE_FUNC=, hence let's do it manually here. */
ALLOCA_CODE_FUNC(f, func);
zero(iov);
IOVEC_SET_STRING(iov[0], buffer);
IOVEC_SET_STRING(iov[1], p);
IOVEC_SET_STRING(iov[2], file);
IOVEC_SET_STRING(iov[3], line);
IOVEC_SET_STRING(iov[4], f);
iov[0] = IOVEC_MAKE_STRING(buffer);
iov[1] = IOVEC_MAKE_STRING(p);
iov[2] = IOVEC_MAKE_STRING(file);
iov[3] = IOVEC_MAKE_STRING(line);
iov[4] = IOVEC_MAKE_STRING(f);
return sd_journal_sendv(iov, ELEMENTSOF(iov));
}
_public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
_cleanup_free_ struct iovec *iov = NULL;
int r, i, j;
va_list ap;
struct iovec *iov = NULL;
char *f;
va_start(ap, format);
@ -519,9 +509,9 @@ _public_ int sd_journal_send_with_location(const char *file, const char *line, c
ALLOCA_CODE_FUNC(f, func);
IOVEC_SET_STRING(iov[0], file);
IOVEC_SET_STRING(iov[1], line);
IOVEC_SET_STRING(iov[2], f);
iov[0] = IOVEC_MAKE_STRING(file);
iov[1] = IOVEC_MAKE_STRING(line);
iov[2] = IOVEC_MAKE_STRING(f);
r = sd_journal_sendv(iov, i);
@ -529,8 +519,6 @@ finish:
for (j = 3; j < i; j++)
free(iov[j].iov_base);
free(iov);
return r;
}
@ -550,9 +538,9 @@ _public_ int sd_journal_sendv_with_location(
ALLOCA_CODE_FUNC(f, func);
IOVEC_SET_STRING(niov[n++], file);
IOVEC_SET_STRING(niov[n++], line);
IOVEC_SET_STRING(niov[n++], f);
niov[n++] = IOVEC_MAKE_STRING(file);
niov[n++] = IOVEC_MAKE_STRING(line);
niov[n++] = IOVEC_MAKE_STRING(f);
return sd_journal_sendv(niov, n);
}
@ -567,9 +555,9 @@ _public_ int sd_journal_perror_with_location(
ALLOCA_CODE_FUNC(f, func);
IOVEC_SET_STRING(iov[0], file);
IOVEC_SET_STRING(iov[1], line);
IOVEC_SET_STRING(iov[2], f);
iov[0] = IOVEC_MAKE_STRING(file);
iov[1] = IOVEC_MAKE_STRING(line);
iov[2] = IOVEC_MAKE_STRING(f);
return fill_iovec_perror_and_send(message, 3, iov);
}

View File

@ -383,26 +383,26 @@ static void process_audit_string(Server *s, int type, const char *data, size_t s
return;
}
IOVEC_SET_STRING(iov[n_iov++], "_TRANSPORT=audit");
iov[n_iov++] = IOVEC_MAKE_STRING("_TRANSPORT=audit");
sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64,
(usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC);
IOVEC_SET_STRING(iov[n_iov++], source_time_field);
iov[n_iov++] = IOVEC_MAKE_STRING(source_time_field);
sprintf(type_field, "_AUDIT_TYPE=%i", type);
IOVEC_SET_STRING(iov[n_iov++], type_field);
iov[n_iov++] = IOVEC_MAKE_STRING(type_field);
sprintf(id_field, "_AUDIT_ID=%" PRIu64, id);
IOVEC_SET_STRING(iov[n_iov++], id_field);
iov[n_iov++] = IOVEC_MAKE_STRING(id_field);
assert_cc(4 == LOG_FAC(LOG_AUTH));
IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_FACILITY=4");
IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_IDENTIFIER=audit");
iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=4");
iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=audit");
type_name = audit_type_name_alloca(type);
m = strjoina("MESSAGE=", type_name, " ", p);
IOVEC_SET_STRING(iov[n_iov++], m);
iov[n_iov++] = IOVEC_MAKE_STRING(m);
z = n_iov;

View File

@ -59,9 +59,10 @@ void server_forward_console(
struct timespec ts;
char tbuf[sizeof("[] ")-1 + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1];
char header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t)];
int n = 0, fd;
_cleanup_free_ char *ident_buf = NULL;
_cleanup_close_ int fd = -1;
const char *tty;
int n = 0;
assert(s);
assert(message);
@ -75,7 +76,8 @@ void server_forward_console(
xsprintf(tbuf, "[%5"PRI_TIME".%06"PRI_NSEC"] ",
ts.tv_sec,
(nsec_t)ts.tv_nsec / 1000);
IOVEC_SET_STRING(iovec[n++], tbuf);
iovec[n++] = IOVEC_MAKE_STRING(tbuf);
}
/* Second: identifier and PID */
@ -88,19 +90,19 @@ void server_forward_console(
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
if (identifier)
IOVEC_SET_STRING(iovec[n++], identifier);
iovec[n++] = IOVEC_MAKE_STRING(identifier);
IOVEC_SET_STRING(iovec[n++], header_pid);
iovec[n++] = IOVEC_MAKE_STRING(header_pid);
} else if (identifier) {
IOVEC_SET_STRING(iovec[n++], identifier);
IOVEC_SET_STRING(iovec[n++], ": ");
iovec[n++] = IOVEC_MAKE_STRING(identifier);
iovec[n++] = IOVEC_MAKE_STRING(": ");
}
/* Fourth: message */
IOVEC_SET_STRING(iovec[n++], message);
IOVEC_SET_STRING(iovec[n++], "\n");
iovec[n++] = IOVEC_MAKE_STRING(message);
iovec[n++] = IOVEC_MAKE_STRING("\n");
tty = s->tty_path ? s->tty_path : "/dev/console";
tty = s->tty_path ?: "/dev/console";
/* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
* good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
@ -115,6 +117,4 @@ void server_forward_console(
if (writev(fd, iovec, n) < 0)
log_debug_errno(errno, "Failed to write to %s for logging: %m", tty);
safe_close(fd);
}

View File

@ -26,6 +26,7 @@
#include "libudev.h"
#include "sd-messages.h"
#include "alloc-util.h"
#include "escape.h"
#include "fd-util.h"
#include "format-util.h"
@ -45,11 +46,11 @@ void server_forward_kmsg(
const char *message,
const struct ucred *ucred) {
_cleanup_free_ char *ident_buf = NULL;
struct iovec iovec[5];
char header_priority[DECIMAL_STR_MAX(priority) + 3],
header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t) + 1];
int n = 0;
char *ident_buf = NULL;
assert(s);
assert(priority >= 0);
@ -68,7 +69,7 @@ void server_forward_kmsg(
/* First: priority field */
xsprintf(header_priority, "<%i>", priority);
IOVEC_SET_STRING(iovec[n++], header_priority);
iovec[n++] = IOVEC_MAKE_STRING(header_priority);
/* Second: identifier and PID */
if (ucred) {
@ -80,22 +81,20 @@ void server_forward_kmsg(
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
if (identifier)
IOVEC_SET_STRING(iovec[n++], identifier);
iovec[n++] = IOVEC_MAKE_STRING(identifier);
IOVEC_SET_STRING(iovec[n++], header_pid);
iovec[n++] = IOVEC_MAKE_STRING(header_pid);
} else if (identifier) {
IOVEC_SET_STRING(iovec[n++], identifier);
IOVEC_SET_STRING(iovec[n++], ": ");
iovec[n++] = IOVEC_MAKE_STRING(identifier);
iovec[n++] = IOVEC_MAKE_STRING(": ");
}
/* Fourth: message */
IOVEC_SET_STRING(iovec[n++], message);
IOVEC_SET_STRING(iovec[n++], "\n");
iovec[n++] = IOVEC_MAKE_STRING(message);
iovec[n++] = IOVEC_MAKE_STRING("\n");
if (writev(s->dev_kmsg_fd, iovec, n) < 0)
log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
free(ident_buf);
}
static bool is_us(const char *pid) {
@ -111,11 +110,11 @@ static bool is_us(const char *pid) {
static void dev_kmsg_record(Server *s, const char *p, size_t l) {
struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
_cleanup_free_ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL, *identifier = NULL, *pid = NULL;
int priority, r;
unsigned n = 0, z = 0, j;
unsigned long long usec;
char *identifier = NULL, *pid = NULL, *e, *f, *k;
char *e, *f, *k;
uint64_t serial;
size_t pl;
char *kernel_device = NULL;
@ -216,7 +215,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (startswith(m, "_KERNEL_DEVICE="))
kernel_device = m + 15;
IOVEC_SET_STRING(iovec[n++], m);
iovec[n++] = IOVEC_MAKE_STRING(m);
z++;
l -= (e - k) + 1;
@ -236,7 +235,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (g) {
b = strappend("_UDEV_DEVNODE=", g);
if (b) {
IOVEC_SET_STRING(iovec[n++], b);
iovec[n++] = IOVEC_MAKE_STRING(b);
z++;
}
}
@ -245,7 +244,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (g) {
b = strappend("_UDEV_SYSNAME=", g);
if (b) {
IOVEC_SET_STRING(iovec[n++], b);
iovec[n++] = IOVEC_MAKE_STRING(b);
z++;
}
}
@ -261,7 +260,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (g) {
b = strappend("_UDEV_DEVLINK=", g);
if (b) {
IOVEC_SET_STRING(iovec[n++], b);
iovec[n++] = IOVEC_MAKE_STRING(b);
z++;
}
}
@ -274,18 +273,18 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
}
if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
IOVEC_SET_STRING(iovec[n++], source_time);
iovec[n++] = IOVEC_MAKE_STRING(source_time);
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel");
if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
IOVEC_SET_STRING(iovec[n++], syslog_priority);
iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
IOVEC_SET_STRING(iovec[n++], syslog_facility);
iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
if ((priority & LOG_FACMASK) == LOG_KERN)
IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel");
else {
pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);
@ -297,33 +296,24 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
if (identifier) {
syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
if (syslog_identifier)
IOVEC_SET_STRING(iovec[n++], syslog_identifier);
iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
}
if (pid) {
syslog_pid = strappend("SYSLOG_PID=", pid);
if (syslog_pid)
IOVEC_SET_STRING(iovec[n++], syslog_pid);
iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
}
}
if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
IOVEC_SET_STRING(iovec[n++], message);
iovec[n++] = IOVEC_MAKE_STRING(message);
server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority, 0);
finish:
for (j = 0; j < z; j++)
free(iovec[j].iov_base);
free(message);
free(syslog_priority);
free(syslog_identifier);
free(syslog_pid);
free(syslog_facility);
free(source_time);
free(identifier);
free(pid);
}
static int server_read_dev_kmsg(Server *s) {

View File

@ -282,7 +282,7 @@ static int server_process_entry(
}
tn = n++;
IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
entry_size += strlen("_TRANSPORT=journal");
if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */

View File

@ -724,14 +724,14 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
char *k; \
k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
sprintf(k, field "=" format, value); \
IOVEC_SET_STRING(iovec[n++], k); \
iovec[n++] = IOVEC_MAKE_STRING(k); \
}
#define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
if (!isempty(value)) { \
char *k; \
k = strjoina(field "=", value); \
IOVEC_SET_STRING(iovec[n++], k); \
iovec[n++] = IOVEC_MAKE_STRING(k); \
}
#define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
@ -739,7 +739,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
char *k; \
k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
sd_id128_to_string(value, stpcpy(k, field "=")); \
IOVEC_SET_STRING(iovec[n++], k); \
iovec[n++] = IOVEC_MAKE_STRING(k); \
}
#define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
@ -747,7 +747,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
char *k; \
k = newa(char, strlen(field "=") + value_size + 1); \
*((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
IOVEC_SET_STRING(iovec[n++], k); \
iovec[n++] = IOVEC_MAKE_STRING(k); \
} \
static void dispatch_message_real(
@ -826,20 +826,20 @@ static void dispatch_message_real(
if (tv) {
sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
IOVEC_SET_STRING(iovec[n++], source_time);
iovec[n++] = IOVEC_MAKE_STRING(source_time);
}
/* Note that strictly speaking storing the boot id here is
* redundant since the entry includes this in-line
* anyway. However, we need this indexed, too. */
if (!isempty(s->boot_id_field))
IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
if (!isempty(s->machine_id_field))
IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
if (!isempty(s->hostname_field))
IOVEC_SET_STRING(iovec[n++], s->hostname_field);
iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
assert(n <= m);
@ -870,15 +870,15 @@ void server_driver_message(Server *s, const char *message_id, const char *format
assert(format);
assert_cc(3 == LOG_FAC(LOG_DAEMON));
IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
assert_cc(6 == LOG_INFO);
IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
if (message_id)
IOVEC_SET_STRING(iovec[n++], message_id);
iovec[n++] = IOVEC_MAKE_STRING(message_id);
m = n;
va_start(ap, format);
@ -899,8 +899,8 @@ void server_driver_message(Server *s, const char *message_id, const char *format
xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
n = 3;
IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
IOVEC_SET_STRING(iovec[n++], buf);
iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
iovec[n++] = IOVEC_MAKE_STRING(buf);
dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
}
}

View File

@ -282,22 +282,21 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
if (s->server->forward_to_wall)
server_forward_wall(s->server, priority, s->identifier, p, &s->ucred);
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
IOVEC_SET_STRING(iovec[n++], s->id_field);
iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=stdout");
iovec[n++] = IOVEC_MAKE_STRING(s->id_field);
syslog_priority[strlen("PRIORITY=")] = '0' + LOG_PRI(priority);
IOVEC_SET_STRING(iovec[n++], syslog_priority);
iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
if (priority & LOG_FACMASK) {
xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
IOVEC_SET_STRING(iovec[n++], syslog_facility);
iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
}
if (s->identifier) {
syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
if (syslog_identifier)
IOVEC_SET_STRING(iovec[n++], syslog_identifier);
iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
}
if (line_break != LINE_BREAK_NEWLINE) {
@ -309,12 +308,12 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
c = line_break == LINE_BREAK_NUL ? "_LINE_BREAK=nul" :
line_break == LINE_BREAK_LINE_MAX ? "_LINE_BREAK=line-max" :
"_LINE_BREAK=eof";
IOVEC_SET_STRING(iovec[n++], c);
iovec[n++] = IOVEC_MAKE_STRING(c);
}
message = strappend("MESSAGE=", p);
if (message)
IOVEC_SET_STRING(iovec[n++], message);
iovec[n++] = IOVEC_MAKE_STRING(message);
if (s->context)
(void) client_context_maybe_refresh(s->server, s->context, NULL, NULL, 0, NULL, USEC_INFINITY);

View File

@ -124,7 +124,7 @@ static void forward_syslog_raw(Server *s, int priority, const char *buffer, cons
if (LOG_PRI(priority) > s->max_level_syslog)
return;
IOVEC_SET_STRING(iovec, buffer);
iovec = IOVEC_MAKE_STRING(buffer);
forward_syslog_iovec(s, &iovec, 1, ucred, tv);
}
@ -135,7 +135,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
int n = 0;
time_t t;
struct tm *tm;
char *ident_buf = NULL;
_cleanup_free_ char *ident_buf = NULL;
assert(s);
assert(priority >= 0);
@ -147,7 +147,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
/* First: priority field */
xsprintf(header_priority, "<%i>", priority);
IOVEC_SET_STRING(iovec[n++], header_priority);
iovec[n++] = IOVEC_MAKE_STRING(header_priority);
/* Second: timestamp */
t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
@ -156,7 +156,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
return;
if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
return;
IOVEC_SET_STRING(iovec[n++], header_time);
iovec[n++] = IOVEC_MAKE_STRING(header_time);
/* Third: identifier and PID */
if (ucred) {
@ -168,20 +168,18 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);
if (identifier)
IOVEC_SET_STRING(iovec[n++], identifier);
iovec[n++] = IOVEC_MAKE_STRING(identifier);
IOVEC_SET_STRING(iovec[n++], header_pid);
iovec[n++] = IOVEC_MAKE_STRING(header_pid);
} else if (identifier) {
IOVEC_SET_STRING(iovec[n++], identifier);
IOVEC_SET_STRING(iovec[n++], ": ");
iovec[n++] = IOVEC_MAKE_STRING(identifier);
iovec[n++] = IOVEC_MAKE_STRING(": ");
}
/* Fourth: message */
IOVEC_SET_STRING(iovec[n++], message);
iovec[n++] = IOVEC_MAKE_STRING(message);
forward_syslog_iovec(s, iovec, n, ucred, tv);
free(ident_buf);
}
int syslog_fixup_facility(int priority) {
@ -353,29 +351,29 @@ void server_process_syslog_message(
if (s->forward_to_wall)
server_forward_wall(s, priority, identifier, buf, ucred);
IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog");
xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK);
IOVEC_SET_STRING(iovec[n++], syslog_priority);
iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);
if (priority & LOG_FACMASK) {
xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
IOVEC_SET_STRING(iovec[n++], syslog_facility);
iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
}
if (identifier) {
syslog_identifier = strjoina("SYSLOG_IDENTIFIER=", identifier);
IOVEC_SET_STRING(iovec[n++], syslog_identifier);
iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
}
if (pid) {
syslog_pid = strjoina("SYSLOG_PID=", pid);
IOVEC_SET_STRING(iovec[n++], syslog_pid);
iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
}
message = strjoina("MESSAGE=", buf);
if (message)
IOVEC_SET_STRING(iovec[n++], message);
iovec[n++] = IOVEC_MAKE_STRING(message);
if (ucred && pid_is_valid(ucred->pid)) {
r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);

View File

@ -471,7 +471,7 @@ static int lease_parse_routes(
struct sd_dhcp_route *route = *routes + *routes_size;
int r;
r = in_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
r = in4_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
if (r < 0) {
log_debug("Failed to determine destination prefix length from class based IP, ignoring");
continue;
@ -1253,7 +1253,7 @@ int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease) {
address.s_addr = lease->address;
/* fall back to the default subnet masks based on address class */
r = in_addr_default_subnet_mask(&address, &mask);
r = in4_addr_default_subnet_mask(&address, &mask);
if (r < 0)
return r;

View File

@ -56,7 +56,7 @@ int sd_dhcp_server_configure_pool(sd_dhcp_server *server, struct in_addr *addres
assert_return(address->s_addr != INADDR_ANY, -EINVAL);
assert_return(prefixlen <= 32, -ERANGE);
assert_se(in_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
assert_se(in4_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
netmask = netmask_addr.s_addr;
server_off = be32toh(address->s_addr & ~netmask);

View File

@ -768,7 +768,7 @@ int config_parse_address(const char *unit,
}
if (!e && f == AF_INET) {
r = in_addr_default_prefixlen(&buffer.in, &n->prefixlen);
r = in4_addr_default_prefixlen(&buffer.in, &n->prefixlen);
if (r < 0) {
log_syntax(unit, LOG_ERR, filename, line, r, "Prefix length not specified, and a default one can not be deduced for '%s', ignoring assignment", address);
return 0;

View File

@ -237,7 +237,7 @@ static int dhcp_lease_lost(Link *link) {
if (r >= 0) {
r = sd_dhcp_lease_get_netmask(link->dhcp_lease, &netmask);
if (r >= 0)
prefixlen = in_addr_netmask_to_prefixlen(&netmask);
prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
address->family = AF_INET;
address->in_addr.in = addr;
@ -316,7 +316,7 @@ static int dhcp4_update_address(Link *link,
assert(netmask);
assert(lifetime);
prefixlen = in_addr_netmask_to_prefixlen(netmask);
prefixlen = in4_addr_netmask_to_prefixlen(netmask);
r = address_new(&addr);
if (r < 0)
@ -406,7 +406,7 @@ static int dhcp_lease_acquired(sd_dhcp_client *client, Link *link) {
if (r < 0)
return log_link_error_errno(link, r, "DHCP error: No netmask: %m");
prefixlen = in_addr_netmask_to_prefixlen(&netmask);
prefixlen = in4_addr_netmask_to_prefixlen(&netmask);
r = sd_dhcp_lease_get_router(lease, &gateway);
if (r < 0 && r != -ENODATA)

View File

@ -1616,6 +1616,27 @@ static int setup_dev_console(const char *dest, const char *console) {
return mount_verbose(LOG_ERR, console, to, NULL, MS_BIND, NULL);
}
static int setup_keyring(void) {
key_serial_t keyring;
/* Allocate a new session keyring for the container. This makes sure the keyring of the session systemd-nspawn
* was invoked from doesn't leak into the container. Note that by default we block keyctl() and request_key()
* anyway via seccomp so doing this operation isn't strictly necessary, but in case people explicitly whitelist
* these system calls let's make sure we don't leak anything into the container. */
keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
if (keyring == -1) {
if (errno == ENOSYS)
log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
else if (IN_SET(errno, EACCES, EPERM))
log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
else
return log_error_errno(errno, "Setting up kernel keyring failed: %m");
}
return 0;
}
static int setup_kmsg(const char *dest, int kmsg_socket) {
const char *from, *to;
_cleanup_umask_ mode_t u;
@ -2642,6 +2663,10 @@ static int outer_child(
if (r < 0)
return r;
r = setup_keyring();
if (r < 0)
return r;
r = setup_seccomp(arg_caps_retain, arg_syscall_whitelist, arg_syscall_blacklist);
if (r < 0)
return r;

View File

@ -28,6 +28,8 @@
#include "errno-list.h"
#include "escape.h"
#include "hashmap.h"
#include "hostname-util.h"
#include "in-addr-util.h"
#include "list.h"
#include "locale-util.h"
#include "mount-util.h"
@ -66,6 +68,31 @@ int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
&u->job_path);
}
static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
int r;
assert(m);
assert(prefix);
r = sd_bus_message_open_container(m, 'r', "iayu");
if (r < 0)
return r;
r = sd_bus_message_append(m, "i", family);
if (r < 0)
return r;
r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
if (r < 0)
return r;
r = sd_bus_message_append(m, "u", prefixlen);
if (r < 0)
return r;
return sd_bus_message_close_container(m);
}
int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignment) {
const char *eq, *field;
UnitDependency dep;
@ -207,13 +234,13 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "sv", sn, "t", l.rlim_cur);
} else if (STR_IN_SET(field,
"CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting",
"SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies",
"IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", "RemainAfterExit",
"PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
"SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
"RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
"ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
"CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting",
"TasksAccounting", "IPAccounting", "SendSIGHUP", "SendSIGKILL", "WakeSystem",
"DefaultDependencies", "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
"RemainAfterExit", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
"NoNewPrivileges", "SyslogLevelPrefix", "Delegate", "RemainAfterElapse",
"MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
"ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
"CPUSchedulingResetOnFork", "LockPersonality")) {
r = parse_boolean(eq);
@ -433,6 +460,98 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
r = sd_bus_message_append(m, "v", "a(st)", 1, path, u);
}
} else if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) {
if (isempty(eq))
r = sd_bus_message_append(m, "v", "a(iayu)", 0);
else {
unsigned char prefixlen;
union in_addr_union prefix = {};
int family;
r = sd_bus_message_open_container(m, 'v', "a(iayu)");
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_open_container(m, 'a', "(iayu)");
if (r < 0)
return bus_log_create_error(r);
if (streq(eq, "any")) {
/* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
if (r < 0)
return bus_log_create_error(r);
r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
if (r < 0)
return bus_log_create_error(r);
} else if (is_localhost(eq)) {
/* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
prefix.in.s_addr = htobe32(0x7f000000);
r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
if (r < 0)
return bus_log_create_error(r);
prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
if (r < 0)
return r;
} else if (streq(eq, "link-local")) {
/* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
if (r < 0)
return bus_log_create_error(r);
prefix.in6 = (struct in6_addr) {
.__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
};
r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
if (r < 0)
return bus_log_create_error(r);
} else if (streq(eq, "multicast")) {
/* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
if (r < 0)
return bus_log_create_error(r);
prefix.in6 = (struct in6_addr) {
.__in6_u.__u6_addr32[0] = htobe32(0xff000000)
};
r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
if (r < 0)
return bus_log_create_error(r);
} else {
r = in_addr_prefix_from_string_auto(eq, &family, &prefix, &prefixlen);
if (r < 0)
return log_error_errno(r, "Failed to parse IP address prefix: %s", eq);
r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
if (r < 0)
return bus_log_create_error(r);
}
r = sd_bus_message_close_container(m);
if (r < 0)
return bus_log_create_error(r);
r = sd_bus_message_close_container(m);
if (r < 0)
return bus_log_create_error(r);
}
} else if (streq(field, "CPUSchedulingPolicy")) {
int n;

View File

@ -72,7 +72,7 @@ static int entry_fill_basics(
}
if (source) {
entry->ip.src = source->in;
in_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
}
if (out_interface) {
@ -84,7 +84,7 @@ static int entry_fill_basics(
}
if (destination) {
entry->ip.dst = destination->in;
in_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
}
return 0;

673
src/shared/linux/bpf.h Normal file
View File

@ -0,0 +1,673 @@
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#ifndef __LINUX_BPF_H__
#define __LINUX_BPF_H__
#include <linux/types.h>
#include <linux/bpf_common.h>
/* Extended instruction set based on top of classic BPF */
/* instruction classes */
#define BPF_ALU64 0x07 /* alu mode in double word width */
/* ld/ldx fields */
#define BPF_DW 0x18 /* double word */
#define BPF_XADD 0xc0 /* exclusive add */
/* alu/jmp fields */
#define BPF_MOV 0xb0 /* mov reg to reg */
#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
/* change endianness of a register */
#define BPF_END 0xd0 /* flags for endianness conversion: */
#define BPF_TO_LE 0x00 /* convert to little-endian */
#define BPF_TO_BE 0x08 /* convert to big-endian */
#define BPF_FROM_LE BPF_TO_LE
#define BPF_FROM_BE BPF_TO_BE
#define BPF_JNE 0x50 /* jump != */
#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
/* Register numbers */
enum {
BPF_REG_0 = 0,
BPF_REG_1,
BPF_REG_2,
BPF_REG_3,
BPF_REG_4,
BPF_REG_5,
BPF_REG_6,
BPF_REG_7,
BPF_REG_8,
BPF_REG_9,
BPF_REG_10,
__MAX_BPF_REG,
};
/* BPF has 10 general purpose 64-bit registers and stack frame. */
#define MAX_BPF_REG __MAX_BPF_REG
struct bpf_insn {
__u8 code; /* opcode */
__u8 dst_reg:4; /* dest register */
__u8 src_reg:4; /* source register */
__s16 off; /* signed offset */
__s32 imm; /* signed immediate constant */
};
/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
struct bpf_lpm_trie_key {
__u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
__u8 data[0]; /* Arbitrary size */
};
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
BPF_MAP_UPDATE_ELEM,
BPF_MAP_DELETE_ELEM,
BPF_MAP_GET_NEXT_KEY,
BPF_PROG_LOAD,
BPF_OBJ_PIN,
BPF_OBJ_GET,
BPF_PROG_ATTACH,
BPF_PROG_DETACH,
BPF_PROG_TEST_RUN,
};
enum bpf_map_type {
BPF_MAP_TYPE_UNSPEC,
BPF_MAP_TYPE_HASH,
BPF_MAP_TYPE_ARRAY,
BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
BPF_MAP_TYPE_STACK_TRACE,
BPF_MAP_TYPE_CGROUP_ARRAY,
BPF_MAP_TYPE_LRU_HASH,
BPF_MAP_TYPE_LRU_PERCPU_HASH,
BPF_MAP_TYPE_LPM_TRIE,
BPF_MAP_TYPE_ARRAY_OF_MAPS,
BPF_MAP_TYPE_HASH_OF_MAPS,
};
enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
BPF_PROG_TYPE_SOCKET_FILTER,
BPF_PROG_TYPE_KPROBE,
BPF_PROG_TYPE_SCHED_CLS,
BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
BPF_PROG_TYPE_PERF_EVENT,
BPF_PROG_TYPE_CGROUP_SKB,
BPF_PROG_TYPE_CGROUP_SOCK,
BPF_PROG_TYPE_LWT_IN,
BPF_PROG_TYPE_LWT_OUT,
BPF_PROG_TYPE_LWT_XMIT,
};
enum bpf_attach_type {
BPF_CGROUP_INET_INGRESS,
BPF_CGROUP_INET_EGRESS,
BPF_CGROUP_INET_SOCK_CREATE,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
* to the given target_fd cgroup the descendent cgroup will be able to
* override effective bpf program that was inherited from this cgroup
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
* has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
* and NET_IP_ALIGN defined to 2.
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
#define BPF_PSEUDO_MAP_FD 1
/* flags for BPF_MAP_UPDATE_ELEM command */
#define BPF_ANY 0 /* create new element or update existing */
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */
#define BPF_F_NO_PREALLOC (1U << 0)
/* Instead of having one common LRU list in the
* BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
* which can scale and perform better.
* Note, the LRU nodes (including free nodes) cannot be moved
* across different LRU lists.
*/
#define BPF_F_NO_COMMON_LRU (1U << 1)
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
__u32 key_size; /* size of key in bytes */
__u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */
__u32 map_flags; /* prealloc or not */
__u32 inner_map_fd; /* fd pointing to the inner map */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
__u32 map_fd;
__aligned_u64 key;
union {
__aligned_u64 value;
__aligned_u64 next_key;
};
__u64 flags;
};
struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
__aligned_u64 insns;
__aligned_u64 license;
__u32 log_level; /* verbosity level of verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe */
__u32 prog_flags;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
__aligned_u64 pathname;
__u32 bpf_fd;
};
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
__u32 target_fd; /* container object to attach to */
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
__u32 prog_fd;
__u32 retval;
__u32 data_size_in;
__u32 data_size_out;
__aligned_u64 data_in;
__aligned_u64 data_out;
__u32 repeat;
__u32 duration;
} test;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
*
* void *bpf_map_lookup_elem(&map, &key)
* Return: Map value or NULL
*
* int bpf_map_update_elem(&map, &key, &value, flags)
* Return: 0 on success or negative error
*
* int bpf_map_delete_elem(&map, &key)
* Return: 0 on success or negative error
*
* int bpf_probe_read(void *dst, int size, void *src)
* Return: 0 on success or negative error
*
* u64 bpf_ktime_get_ns(void)
* Return: current ktime
*
* int bpf_trace_printk(const char *fmt, int fmt_size, ...)
* Return: length of buffer written or negative error
*
* u32 bpf_prandom_u32(void)
* Return: random value
*
* u32 bpf_raw_smp_processor_id(void)
* Return: SMP processor ID
*
* int bpf_skb_store_bytes(skb, offset, from, len, flags)
* store bytes into packet
* @skb: pointer to skb
* @offset: offset within packet from skb->mac_header
* @from: pointer where to copy bytes from
* @len: number of bytes to store into packet
* @flags: bit 0 - if true, recompute skb->csum
* other bits - reserved
* Return: 0 on success or negative error
*
* int bpf_l3_csum_replace(skb, offset, from, to, flags)
* recompute IP checksum
* @skb: pointer to skb
* @offset: offset within packet where IP checksum is located
* @from: old value of header field
* @to: new value of header field
* @flags: bits 0-3 - size of header field
* other bits - reserved
* Return: 0 on success or negative error
*
* int bpf_l4_csum_replace(skb, offset, from, to, flags)
* recompute TCP/UDP checksum
* @skb: pointer to skb
* @offset: offset within packet where TCP/UDP checksum is located
* @from: old value of header field
* @to: new value of header field
* @flags: bits 0-3 - size of header field
* bit 4 - is pseudo header
* other bits - reserved
* Return: 0 on success or negative error
*
* int bpf_tail_call(ctx, prog_array_map, index)
* jump into another BPF program
* @ctx: context pointer passed to next program
* @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
* @index: index inside array that selects specific program to run
* Return: 0 on success or negative error
*
* int bpf_clone_redirect(skb, ifindex, flags)
* redirect to another netdev
* @skb: pointer to skb
* @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved
* Return: 0 on success or negative error
*
* u64 bpf_get_current_pid_tgid(void)
* Return: current->tgid << 32 | current->pid
*
* u64 bpf_get_current_uid_gid(void)
* Return: current_gid << 32 | current_uid
*
* int bpf_get_current_comm(char *buf, int size_of_buf)
* stores current->comm into buf
* Return: 0 on success or negative error
*
* u32 bpf_get_cgroup_classid(skb)
* retrieve a proc's classid
* @skb: pointer to skb
* Return: classid if != 0
*
* int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
* Return: 0 on success or negative error
*
* int bpf_skb_vlan_pop(skb)
* Return: 0 on success or negative error
*
* int bpf_skb_get_tunnel_key(skb, key, size, flags)
* int bpf_skb_set_tunnel_key(skb, key, size, flags)
* retrieve or populate tunnel metadata
* @skb: pointer to skb
* @key: pointer to 'struct bpf_tunnel_key'
* @size: size of 'struct bpf_tunnel_key'
* @flags: room for future extensions
* Return: 0 on success or negative error
*
* u64 bpf_perf_event_read(&map, index)
* Return: Number events read or error code
*
* int bpf_redirect(ifindex, flags)
* redirect to another netdev
* @ifindex: ifindex of the net device
* @flags: bit 0 - if set, redirect to ingress instead of egress
* other bits - reserved
* Return: TC_ACT_REDIRECT
*
* u32 bpf_get_route_realm(skb)
* retrieve a dst's tclassid
* @skb: pointer to skb
* Return: realm if != 0
*
* int bpf_perf_event_output(ctx, map, index, data, size)
* output perf raw sample
* @ctx: struct pt_regs*
* @map: pointer to perf_event_array map
* @index: index of event in the map
* @data: data on stack to be output as raw data
* @size: size of data
* Return: 0 on success or negative error
*
* int bpf_get_stackid(ctx, map, flags)
* walk user or kernel stack and return id
* @ctx: struct pt_regs*
* @map: pointer to stack_trace map
* @flags: bits 0-7 - numer of stack frames to skip
* bit 8 - collect user stack instead of kernel
* bit 9 - compare stacks by hash only
* bit 10 - if two different stacks hash into the same stackid
* discard old
* other bits - reserved
* Return: >= 0 stackid on success or negative error
*
* s64 bpf_csum_diff(from, from_size, to, to_size, seed)
* calculate csum diff
* @from: raw from buffer
* @from_size: length of from buffer
* @to: raw to buffer
* @to_size: length of to buffer
* @seed: optional seed
* Return: csum result or negative error code
*
* int bpf_skb_get_tunnel_opt(skb, opt, size)
* retrieve tunnel options metadata
* @skb: pointer to skb
* @opt: pointer to raw tunnel option data
* @size: size of @opt
* Return: option size
*
* int bpf_skb_set_tunnel_opt(skb, opt, size)
* populate tunnel options metadata
* @skb: pointer to skb
* @opt: pointer to raw tunnel option data
* @size: size of @opt
* Return: 0 on success or negative error
*
* int bpf_skb_change_proto(skb, proto, flags)
* Change protocol of the skb. Currently supported is v4 -> v6,
* v6 -> v4 transitions. The helper will also resize the skb. eBPF
* program is expected to fill the new headers via skb_store_bytes
* and lX_csum_replace.
* @skb: pointer to skb
* @proto: new skb->protocol type
* @flags: reserved
* Return: 0 on success or negative error
*
* int bpf_skb_change_type(skb, type)
* Change packet type of skb.
* @skb: pointer to skb
* @type: new skb->pkt_type type
* Return: 0 on success or negative error
*
* int bpf_skb_under_cgroup(skb, map, index)
* Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 skb failed the cgroup2 descendant test
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*
* u32 bpf_get_hash_recalc(skb)
* Retrieve and possibly recalculate skb->hash.
* @skb: pointer to skb
* Return: hash
*
* u64 bpf_get_current_task(void)
* Returns current task_struct
* Return: current
*
* int bpf_probe_write_user(void *dst, void *src, int len)
* safely attempt to write to a location
* @dst: destination address in userspace
* @src: source address on stack
* @len: number of bytes to copy
* Return: 0 on success or negative error
*
* int bpf_current_task_under_cgroup(map, index)
* Check cgroup2 membership of current task
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 current failed the cgroup2 descendant test
* == 1 current succeeded the cgroup2 descendant test
* < 0 error
*
* int bpf_skb_change_tail(skb, len, flags)
* The helper will resize the skb to the given new size, to be used f.e.
* with control messages.
* @skb: pointer to skb
* @len: new skb length
* @flags: reserved
* Return: 0 on success or negative error
*
* int bpf_skb_pull_data(skb, len)
* The helper will pull in non-linear data in case the skb is non-linear
* and not all of len are part of the linear section. Only needed for
* read/write with direct packet access.
* @skb: pointer to skb
* @len: len to make read/writeable
* Return: 0 on success or negative error
*
* s64 bpf_csum_update(skb, csum)
* Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
* @skb: pointer to skb
* @csum: csum to add
* Return: csum on success or negative error
*
* void bpf_set_hash_invalid(skb)
* Invalidate current skb->hash.
* @skb: pointer to skb
*
* int bpf_get_numa_node_id()
* Return: Id of current NUMA node.
*
* int bpf_skb_change_head()
* Grows headroom of skb and adjusts MAC header offset accordingly.
* Will extends/reallocae as required automatically.
* May change skb data pointer and will thus invalidate any check
* performed for direct packet access.
* @skb: pointer to skb
* @len: length of header to be pushed in front
* @flags: Flags (unused for now)
* Return: 0 on success or negative error
*
* int bpf_xdp_adjust_head(xdp_md, delta)
* Adjust the xdp_md.data by delta
* @xdp_md: pointer to xdp_md
* @delta: An positive/negative integer to be added to xdp_md.data
* Return: 0 on success or negative on error
*
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
* Copy a NUL terminated string from unsafe address. In case the string
* length is smaller than size, the target is not padded with further NUL
* bytes. In case the string length is larger than size, just count-1
* bytes are copied and the last byte is set to NUL.
* @dst: destination address
* @size: maximum number of bytes to copy, including the trailing NUL
* @unsafe_ptr: unsafe address
* Return:
* > 0 length of the string including the trailing NUL on success
* < 0 error
*
* u64 bpf_get_socket_cookie(skb)
* Get the cookie for the socket stored inside sk_buff.
* @skb: pointer to skb
* Return: 8 Bytes non-decreasing number on success or 0 if the socket
* field is missing inside sk_buff
*
* u32 bpf_get_socket_uid(skb)
* Get the owner uid of the socket stored inside sk_buff.
* @skb: pointer to skb
* Return: uid of the socket owner on success or overflowuid if failed.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(map_lookup_elem), \
FN(map_update_elem), \
FN(map_delete_elem), \
FN(probe_read), \
FN(ktime_get_ns), \
FN(trace_printk), \
FN(get_prandom_u32), \
FN(get_smp_processor_id), \
FN(skb_store_bytes), \
FN(l3_csum_replace), \
FN(l4_csum_replace), \
FN(tail_call), \
FN(clone_redirect), \
FN(get_current_pid_tgid), \
FN(get_current_uid_gid), \
FN(get_current_comm), \
FN(get_cgroup_classid), \
FN(skb_vlan_push), \
FN(skb_vlan_pop), \
FN(skb_get_tunnel_key), \
FN(skb_set_tunnel_key), \
FN(perf_event_read), \
FN(redirect), \
FN(get_route_realm), \
FN(perf_event_output), \
FN(skb_load_bytes), \
FN(get_stackid), \
FN(csum_diff), \
FN(skb_get_tunnel_opt), \
FN(skb_set_tunnel_opt), \
FN(skb_change_proto), \
FN(skb_change_type), \
FN(skb_under_cgroup), \
FN(get_hash_recalc), \
FN(get_current_task), \
FN(probe_write_user), \
FN(current_task_under_cgroup), \
FN(skb_change_tail), \
FN(skb_pull_data), \
FN(csum_update), \
FN(set_hash_invalid), \
FN(get_numa_node_id), \
FN(skb_change_head), \
FN(xdp_adjust_head), \
FN(probe_read_str), \
FN(get_socket_cookie), \
FN(get_socket_uid),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
*/
#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
enum bpf_func_id {
__BPF_FUNC_MAPPER(__BPF_ENUM_FN)
__BPF_FUNC_MAX_ID,
};
#undef __BPF_ENUM_FN
/* All flags used by eBPF helper functions, placed here. */
/* BPF_FUNC_skb_store_bytes flags. */
#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
#define BPF_F_INVALIDATE_HASH (1ULL << 1)
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
* First 4 bits are for passing the header field size.
*/
#define BPF_F_HDR_FIELD_MASK 0xfULL
/* BPF_FUNC_l4_csum_replace flags. */
#define BPF_F_PSEUDO_HDR (1ULL << 4)
#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
#define BPF_F_MARK_ENFORCE (1ULL << 6)
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
#define BPF_F_INGRESS (1ULL << 0)
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
/* BPF_FUNC_get_stackid flags. */
#define BPF_F_SKIP_FIELD_MASK 0xffULL
#define BPF_F_USER_STACK (1ULL << 8)
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
#define BPF_F_REUSE_STACKID (1ULL << 10)
/* BPF_FUNC_skb_set_tunnel_key flags. */
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
struct __sk_buff {
__u32 len;
__u32 pkt_type;
__u32 mark;
__u32 queue_mapping;
__u32 protocol;
__u32 vlan_present;
__u32 vlan_tci;
__u32 vlan_proto;
__u32 priority;
__u32 ingress_ifindex;
__u32 ifindex;
__u32 tc_index;
__u32 cb[5];
__u32 hash;
__u32 tc_classid;
__u32 data;
__u32 data_end;
__u32 napi_id;
};
struct bpf_tunnel_key {
__u32 tunnel_id;
union {
__u32 remote_ipv4;
__u32 remote_ipv6[4];
};
__u8 tunnel_tos;
__u8 tunnel_ttl;
__u16 tunnel_ext;
__u32 tunnel_label;
};
/* Generic BPF return codes which all BPF program types may support.
* The values are binary compatible with their TC_ACT_* counter-part to
* provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
* programs.
*
* XDP is handled seprately, see XDP_*.
*/
enum bpf_ret_code {
BPF_OK = 0,
/* 1 reserved */
BPF_DROP = 2,
/* 3-6 reserved */
BPF_REDIRECT = 7,
/* >127 are reserved for prog type specific return codes */
};
struct bpf_sock {
__u32 bound_dev_if;
__u32 family;
__u32 type;
__u32 protocol;
};
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
* in packet drop.
*/
enum xdp_action {
XDP_ABORTED = 0,
XDP_DROP,
XDP_PASS,
XDP_TX,
};
/* user accessible metadata for XDP packet hook
* new fields must be added to the end of this structure
*/
struct xdp_md {
__u32 data;
__u32 data_end;
};
#endif /* __LINUX_BPF_H__ */

View File

@ -0,0 +1,55 @@
#ifndef __LINUX_BPF_COMMON_H__
#define __LINUX_BPF_COMMON_H__
/* Instruction classes */
#define BPF_CLASS(code) ((code) & 0x07)
#define BPF_LD 0x00
#define BPF_LDX 0x01
#define BPF_ST 0x02
#define BPF_STX 0x03
#define BPF_ALU 0x04
#define BPF_JMP 0x05
#define BPF_RET 0x06
#define BPF_MISC 0x07
/* ld/ldx fields */
#define BPF_SIZE(code) ((code) & 0x18)
#define BPF_W 0x00
#define BPF_H 0x08
#define BPF_B 0x10
#define BPF_MODE(code) ((code) & 0xe0)
#define BPF_IMM 0x00
#define BPF_ABS 0x20
#define BPF_IND 0x40
#define BPF_MEM 0x60
#define BPF_LEN 0x80
#define BPF_MSH 0xa0
/* alu/jmp fields */
#define BPF_OP(code) ((code) & 0xf0)
#define BPF_ADD 0x00
#define BPF_SUB 0x10
#define BPF_MUL 0x20
#define BPF_DIV 0x30
#define BPF_OR 0x40
#define BPF_AND 0x50
#define BPF_LSH 0x60
#define BPF_RSH 0x70
#define BPF_NEG 0x80
#define BPF_MOD 0x90
#define BPF_XOR 0xa0
#define BPF_JA 0x00
#define BPF_JEQ 0x10
#define BPF_JGT 0x20
#define BPF_JGE 0x30
#define BPF_JSET 0x40
#define BPF_SRC(code) ((code) & 0x08)
#define BPF_K 0x00
#define BPF_X 0x08
#ifndef BPF_MAXINSNS
#define BPF_MAXINSNS 4096
#endif
#endif /* __LINUX_BPF_COMMON_H__ */

198
src/shared/linux/libbpf.h Normal file
View File

@ -0,0 +1,198 @@
/* eBPF mini library */
#ifndef __LIBBPF_H
#define __LIBBPF_H
#include <linux/bpf.h>
struct bpf_insn;
/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
#define BPF_ALU64_REG(OP, DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = 0 })
#define BPF_ALU32_REG(OP, DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = 0 })
/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
#define BPF_ALU64_IMM(OP, DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
#define BPF_ALU32_IMM(OP, DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
/* Short form of mov, dst_reg = src_reg */
#define BPF_MOV64_REG(DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_MOV | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = 0 })
#define BPF_MOV32_REG(DST, SRC) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_MOV | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = 0 })
/* Short form of mov, dst_reg = imm32 */
#define BPF_MOV64_IMM(DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU64 | BPF_MOV | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
#define BPF_MOV32_IMM(DST, IMM) \
((struct bpf_insn) { \
.code = BPF_ALU | BPF_MOV | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
#define BPF_LD_IMM64(DST, IMM) \
BPF_LD_IMM64_RAW(DST, 0, IMM)
#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
((struct bpf_insn) { \
.code = BPF_LD | BPF_DW | BPF_IMM, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = 0, \
.imm = (__u32) (IMM) }), \
((struct bpf_insn) { \
.code = 0, /* zero is reserved opcode */ \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = ((__u64) (IMM)) >> 32 })
#ifndef BPF_PSEUDO_MAP_FD
# define BPF_PSEUDO_MAP_FD 1
#endif
/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
#define BPF_LD_MAP_FD(DST, MAP_FD) \
BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
#define BPF_LD_ABS(SIZE, IMM) \
((struct bpf_insn) { \
.code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = IMM })
/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0 })
/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0 })
/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0 })
/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
((struct bpf_insn) { \
.code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
.dst_reg = DST, \
.src_reg = 0, \
.off = OFF, \
.imm = IMM })
/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
#define BPF_JMP_REG(OP, DST, SRC, OFF) \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_OP(OP) | BPF_X, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = 0 })
/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_OP(OP) | BPF_K, \
.dst_reg = DST, \
.src_reg = 0, \
.off = OFF, \
.imm = IMM })
/* Raw code statement block */
#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
((struct bpf_insn) { \
.code = CODE, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
.imm = IMM })
/* Program exit */
#define BPF_EXIT_INSN() \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_EXIT, \
.dst_reg = 0, \
.src_reg = 0, \
.off = 0, \
.imm = 0 })
#endif

View File

@ -3878,6 +3878,9 @@ typedef struct UnitStatusInfo {
uint64_t tasks_current;
uint64_t tasks_max;
uint64_t ip_ingress_bytes;
uint64_t ip_egress_bytes;
LIST_HEAD(ExecStatusInfo, exec);
} UnitStatusInfo;
@ -4194,6 +4197,14 @@ static void print_status_info(
if (i->status_errno > 0)
printf(" Error: %i (%s)\n", i->status_errno, strerror(i->status_errno));
if (i->ip_ingress_bytes != (uint64_t) -1 && i->ip_egress_bytes != (uint64_t) -1) {
char buf_in[FORMAT_BYTES_MAX], buf_out[FORMAT_BYTES_MAX];
printf(" IP: %s in, %s out\n",
format_bytes(buf_in, sizeof(buf_in), i->ip_ingress_bytes),
format_bytes(buf_out, sizeof(buf_out), i->ip_egress_bytes));
}
if (i->tasks_current != (uint64_t) -1) {
printf(" Tasks: %" PRIu64, i->tasks_current);
@ -4484,6 +4495,10 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
i->next_elapse_monotonic = u;
else if (streq(name, "NextElapseUSecRealtime"))
i->next_elapse_real = u;
else if (streq(name, "IPIngressBytes"))
i->ip_ingress_bytes = u;
else if (streq(name, "IPEgressBytes"))
i->ip_egress_bytes = u;
break;
}
@ -4998,6 +5013,8 @@ static int show_one(
.cpu_usage_nsec = (uint64_t) -1,
.tasks_current = (uint64_t) -1,
.tasks_max = (uint64_t) -1,
.ip_ingress_bytes = (uint64_t) -1,
.ip_egress_bytes = (uint64_t) -1,
};
int r;

View File

@ -103,6 +103,9 @@ _SD_BEGIN_DECLARATIONS;
#define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR \
SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)
#define SD_MESSAGE_UNIT_RESOURCES SD_ID128_MAKE(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
#define SD_MESSAGE_UNIT_RESOURCES_STR SD_ID128_MAKE_STR(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
#define SD_MESSAGE_SPAWN_FAILED SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
#define SD_MESSAGE_SPAWN_FAILED_STR SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)

View File

@ -277,6 +277,10 @@ tests += [
[],
[]],
[['src/test/test-in-addr-util.c'],
[],
[]],
[['src/test/test-barrier.c'],
[],
[]],
@ -335,6 +339,17 @@ tests += [
[libbasic],
[]],
[['src/test/test-bpf.c',
'src/test/test-helper.c'],
[libcore,
libshared],
[libmount,
threads,
librt,
libseccomp,
libselinux,
libblkid]],
[['src/test/test-hashmap.c',
'src/test/test-hashmap-plain.c',
test_hashmap_ordered_c],

162
src/test/test-bpf.c Normal file
View File

@ -0,0 +1,162 @@
/***
This file is part of systemd.
Copyright 2016 Daniel Mack
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <linux/libbpf.h>
#include <string.h>
#include <unistd.h>
#include "bpf-firewall.h"
#include "bpf-program.h"
#include "load-fragment.h"
#include "manager.h"
#include "rm-rf.h"
#include "service.h"
#include "test-helper.h"
#include "tests.h"
#include "unit.h"
int main(int argc, char *argv[]) {
struct bpf_insn exit_insn[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN()
};
_cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
CGroupContext *cc = NULL;
_cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
Manager *m = NULL;
Unit *u;
char log_buf[65535];
int r;
log_set_max_level(LOG_DEBUG);
log_parse_environment();
log_open();
enter_cgroup_subroot();
assert_se(set_unit_path(get_testdata_dir("")) >= 0);
assert_se(runtime_dir = setup_fake_runtime_dir());
r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
assert(r == 0);
r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn));
assert(r == 0);
if (getuid() != 0) {
log_notice("Not running as root, skipping kernel related tests.");
return EXIT_TEST_SKIP;
}
r = bpf_firewall_supported();
if (r == 0) {
log_notice("BPF firewalling not supported, skipping");
return EXIT_TEST_SKIP;
}
assert_se(r > 0);
r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
assert(r >= 0);
p = bpf_program_unref(p);
/* The simple tests suceeded. Now let's try full unit-based use-case. */
assert_se(manager_new(UNIT_FILE_USER, true, &m) >= 0);
assert_se(manager_startup(m, NULL, NULL) >= 0);
assert_se(u = unit_new(m, sizeof(Service)));
assert_se(unit_add_name(u, "foo.service") == 0);
assert_se(cc = unit_get_cgroup_context(u));
u->perpetual = true;
cc->ip_accounting = true;
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "10.0.1.0/24", &cc->ip_address_allow, NULL) == 0);
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "127.0.0.2", &cc->ip_address_allow, NULL) == 0);
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.3", &cc->ip_address_deny, NULL) == 0);
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "10.0.3.2/24", &cc->ip_address_deny, NULL) == 0);
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.1/25", &cc->ip_address_deny, NULL) == 0);
assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.4", &cc->ip_address_deny, NULL) == 0);
assert(cc->ip_address_allow);
assert(cc->ip_address_allow->items_next);
assert(!cc->ip_address_allow->items_next->items_next);
/* The deny list is defined redundantly, let's ensure it got properly reduced */
assert(cc->ip_address_deny);
assert(cc->ip_address_deny->items_next);
assert(!cc->ip_address_deny->items_next->items_next);
assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.2 -W 5", SERVICE(u)->exec_command, u) == 0);
assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.3 -W 5", SERVICE(u)->exec_command, u) == 0);
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]);
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next);
assert_se(!SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->command_next);
SERVICE(u)->type = SERVICE_ONESHOT;
u->load_state = UNIT_LOADED;
unit_dump(u, stdout, NULL);
r = bpf_firewall_compile(u);
if (IN_SET(r, -ENOTTY, -ENOSYS, -EPERM )) {
/* Kernel doesn't support the necessary bpf bits, or masked out via seccomp? */
manager_free(m);
return EXIT_TEST_SKIP;
}
assert_se(r >= 0);
assert(u->ip_bpf_ingress);
assert(u->ip_bpf_egress);
r = bpf_program_load_kernel(u->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
log_notice("log:");
log_notice("-------");
log_notice("%s", log_buf);
log_notice("-------");
assert(r >= 0);
r = bpf_program_load_kernel(u->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
log_notice("log:");
log_notice("-------");
log_notice("%s", log_buf);
log_notice("-------");
assert(r >= 0);
assert(unit_start(u) >= 0);
while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code == CLD_EXITED &&
SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status == EXIT_SUCCESS);
assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
manager_free(m);
return 0;
}

View File

@ -609,9 +609,9 @@ static void test_writing_tmpfile(void) {
int fd, r;
struct iovec iov[3];
IOVEC_SET_STRING(iov[0], "abc\n");
IOVEC_SET_STRING(iov[1], ALPHANUMERICAL "\n");
IOVEC_SET_STRING(iov[2], "");
iov[0] = IOVEC_MAKE_STRING("abc\n");
iov[1] = IOVEC_MAKE_STRING(ALPHANUMERICAL "\n");
iov[2] = IOVEC_MAKE_STRING("");
fd = mkostemp_safe(name);
printf("tmpfile: %s", name);

View File

@ -0,0 +1,75 @@
/***
This file is part of systemd
Copyright 2017 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <netinet/in.h>
#include "in-addr-util.h"
static void test_in_addr_prefix_from_string(const char *p, int family, int ret, const union in_addr_union *u, unsigned char prefixlen) {
union in_addr_union q;
unsigned char l;
int r;
r = in_addr_prefix_from_string(p, family, &q, &l);
assert_se(r == ret);
if (r >= 0) {
int f;
assert_se(in_addr_equal(family, &q, u));
assert_se(l == prefixlen);
r = in_addr_prefix_from_string_auto(p, &f, &q, &l);
assert_se(r >= 0);
assert_se(f == family);
assert_se(in_addr_equal(family, &q, u));
assert_se(l == prefixlen);
}
}
int main(int argc, char *argv[]) {
test_in_addr_prefix_from_string("", AF_INET, -EINVAL, NULL, 0);
test_in_addr_prefix_from_string("/", AF_INET, -EINVAL, NULL, 0);
test_in_addr_prefix_from_string("/8", AF_INET, -EINVAL, NULL, 0);
test_in_addr_prefix_from_string("1.2.3.4", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
test_in_addr_prefix_from_string("1.2.3.4/0", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0);
test_in_addr_prefix_from_string("1.2.3.4/1", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1);
test_in_addr_prefix_from_string("1.2.3.4/2", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2);
test_in_addr_prefix_from_string("1.2.3.4/32", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
test_in_addr_prefix_from_string("1.2.3.4/33", AF_INET, -ERANGE, NULL, 0);
test_in_addr_prefix_from_string("1.2.3.4/-1", AF_INET, -ERANGE, NULL, 0);
test_in_addr_prefix_from_string("::1", AF_INET, -EINVAL, NULL, 0);
test_in_addr_prefix_from_string("", AF_INET6, -EINVAL, NULL, 0);
test_in_addr_prefix_from_string("/", AF_INET6, -EINVAL, NULL, 0);
test_in_addr_prefix_from_string("/8", AF_INET6, -EINVAL, NULL, 0);
test_in_addr_prefix_from_string("::1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
test_in_addr_prefix_from_string("::1/0", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
test_in_addr_prefix_from_string("::1/1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1);
test_in_addr_prefix_from_string("::1/2", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2);
test_in_addr_prefix_from_string("::1/32", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32);
test_in_addr_prefix_from_string("::1/33", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33);
test_in_addr_prefix_from_string("::1/64", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64);
test_in_addr_prefix_from_string("::1/128", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
test_in_addr_prefix_from_string("::1/129", AF_INET6, -ERANGE, NULL, 0);
test_in_addr_prefix_from_string("::1/-1", AF_INET6, -ERANGE, NULL, 0);
return 0;
}