test-cpu-set-util.c: fix typo in comment (#6916)

2017-09-26 16:07:34 +02:00 · 2017-09-26 16:07:34 +02:00 · 0cde65e263
parent 2ebc688fc1 4005677730
commit 0cde65e263
76 changed files with 4399 additions and 581 deletions
--- a/ENVIRONMENT.md
+++ b/ENVIRONMENT.md
@ -64,3 +64,17 @@ installed systemd tests:

 * `$SYSTEMD_TEST_DATA` — override the location of test data. This is useful if
  a test executable is moved to an arbitrary location.
+
+nss-systemd:
+
+* `$SYSTEMD_NSS_BYPASS_SYNTHETIC=1` — if set, `nss-systemd` won't synthesize
+  user/group records for the `root` and `nobody` users if they are missing from
+  `/etc/passwd`.
+
+* `$SYSTEMD_NSS_DYNAMIC_BYPASS=1` — if set, `nss-systemd` won't return
+  user/group records for dynamically registered service users (i.e. users
+  registered through `DynamicUser=1`).
+
+* `$SYSTEMD_NSS_BYPASS_BUS=1` — if set, `nss-systemd` won't use D-Bus to do
+  dynamic user lookups. This is primarily useful to make `nss-systemd` work
+  safely from within `dbus-daemon`.
--- a/9
+++ b/9
@ -26,6 +26,15 @@ Features:

 * replace all uses of fgets() + LINE_MAX by read_line()

+* fix logging in execute.c: extend log.c to have an optional mode where
+  log_open() is implicitly done before each log line and log_close() right
+  after. This way we don't have open fds around but logs will still
+  work. Because it is slow this mode should used exclusively in the execute.c
+  case.
+
+* set IPAddressDeny=any on all services that shouldn't do networking (possibly
+  combined with IPAddressAllow=localhost).
+
 * dissect: when we discover squashfs, don't claim we had a "writable" partition
  in systemd-dissect

--- a/man/systemd-system.conf.xml
+++ b/man/systemd-system.conf.xml
@ -319,17 +319,14 @@
        <term><varname>DefaultBlockIOAccounting=</varname></term>
        <term><varname>DefaultMemoryAccounting=</varname></term>
        <term><varname>DefaultTasksAccounting=</varname></term>
+        <term><varname>DefaultIPAccounting=</varname></term>

-        <listitem><para>Configure the default resource accounting
-        settings, as configured per-unit by
-        <varname>CPUAccounting=</varname>,
-        <varname>BlockIOAccounting=</varname>,
-        <varname>MemoryAccounting=</varname> and
-        <varname>TasksAccounting=</varname>. See
+        <listitem><para>Configure the default resource accounting settings, as configured per-unit by
+        <varname>CPUAccounting=</varname>, <varname>BlockIOAccounting=</varname>, <varname>MemoryAccounting=</varname>,
+        <varname>TasksAccounting=</varname> and <varname>IPAccounting=</varname>. See
        <citerefentry><refentrytitle>systemd.resource-control</refentrytitle><manvolnum>5</manvolnum></citerefentry>
-        for details on the per-unit
-        settings. <varname>DefaultTasksAccounting=</varname> defaults
-        to on, the other three settings to off.</para></listitem>
+        for details on the per-unit settings. <varname>DefaultTasksAccounting=</varname> defaults to on, the other
+        four settings to off.</para></listitem>
      </varlistentry>

      <varlistentry>
--- a/man/systemd.resource-control.xml
+++ b/man/systemd.resource-control.xml
@ -480,6 +480,123 @@
        </listitem>
      </varlistentry>

+      <varlistentry>
+        <term><varname>IPAccounting=</varname></term>
+
+        <listitem>
+          <para>Takes a boolean argument. If true, turns on IPv4 and IPv6 network traffic accounting for packets sent
+          or received by the unit. When this option is turned on, all IPv4 and IPv6 sockets created by any process of
+          the unit are accounted for. When this option is used in socket units, it applies to all IPv4 and IPv6 sockets
+          associated with it (including both listening and connection sockets where this applies). Note that for
+          socket-activated services, this configuration setting and the accounting data of the service unit and the
+          socket unit are kept separate, and displayed separately. No propagation of the setting and the collected
+          statistics is done, in either direction. Moreover, any traffic sent or received on any of the socket unit's
+          sockets is accounted to the socket unit — and never to the service unit it might have activated, even if the
+          socket is used by it. Note that IP accounting is currently not supported for slice units, and enabling this
+          option for them has no effect. The system default for this setting may be controlled with
+          <varname>DefaultIPAccounting=</varname> in
+          <citerefentry><refentrytitle>systemd-system.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><varname>IPAddressAllow=<replaceable>ADDDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
+        <term><varname>IPAddressDeny=<replaceable>ADDRESS[/PREFIXLENGTH]…</replaceable></varname></term>
+
+        <listitem>
+          <para>Turn on address range network traffic filtering for packets sent and received over AF_INET and AF_INET6
+          sockets.  Both directives take a space separated list of IPv4 or IPv6 addresses, each optionally suffixed
+          with an address prefix length (separated by a <literal>/</literal> character). If the latter is omitted, the
+          address is considered a host address, i.e. the prefix covers the whole address (32 for IPv4, 128 for IPv6).
+          </para>
+
+          <para>The access lists configured with this option are applied to all sockets created by processes of this
+          unit (or in the case of socket units, associated with it). The lists are implicitly combined with any lists
+          configured for any of the parent slice units this unit might be a member of. By default all access lists are
+          empty. When configured the lists are enforced as follows:</para>
+
+          <itemizedlist>
+            <listitem><para>Access will be granted in case its destination/source address matches any entry in the
+            <varname>IPAddressAllow=</varname> setting.</para></listitem>
+
+            <listitem><para>Otherwise, access will be denied in case its destination/source address matches any entry
+            in the <varname>IPAddressDeny=</varname> setting.</para></listitem>
+
+            <listitem><para>Otherwise, access will be granted.</para></listitem>
+          </itemizedlist>
+
+          <para>In order to implement a whitelisting IP firewall, it is recommended to use a
+          <varname>IPAddressDeny=</varname><constant>any</constant> setting on an upper-level slice unit (such as the
+          root slice <filename>-.slice</filename> or the slice containing all system services
+          <filename>system.slice</filename> – see
+          <citerefentry><refentrytitle>systemd.special</refentrytitle><manvolnum>7</manvolnum></citerefentry> for
+          details on these slice units), plus individual per-service <varname>IPAddressAllow=</varname> lines
+          permitting network access to relevant services, and only them.</para>
+
+          <para>Note that for socket-activated services, the IP access list configured on the socket unit applies to
+          all sockets associated with it directly, but not to any sockets created by the ultimately activated services
+          for it. Conversely, the IP access list configured for the service is not applied to any sockets passed into
+          the service via socket activation. Thus, it is usually a good idea, to replicate the IP access lists on both
+          the socket and the service unit, however it often makes sense to maintain one list more open and the other
+          one more restricted, depending on the usecase.</para>
+
+          <para>If these settings are used multiple times in the same unit the specified lists are combined. If an
+          empty string is assigned to these settings the specific access list is reset and all previous settings undone.</para>
+
+          <para>In place of explicit IPv4 or IPv6 address and prefix length specifications a small set of symbolic
+          names may be used. The following names are defined:</para>
+
+          <table>
+            <title>Special address/network names</title>
+
+            <tgroup cols='3'>
+              <colspec colname='name'/>
+              <colspec colname='definition'/>
+              <colspec colname='meaning'/>
+
+              <thead>
+                <row>
+                  <entry>Symbolic Name</entry>
+                  <entry>Definition</entry>
+                  <entry>Meaning</entry>
+                </row>
+              </thead>
+
+            <tbody>
+              <row>
+                <entry><constant>any</constant></entry>
+                <entry>0.0.0.0/0 ::/0</entry>
+                <entry>Any host</entry>
+              </row>
+
+              <row>
+                <entry><constant>localhost</constant></entry>
+                <entry>127.0.0.0/8 ::1/128</entry>
+                <entry>All addresses on the local loopback</entry>
+              </row>
+
+              <row>
+                <entry><constant>link-local</constant></entry>
+                <entry>169.254.0.0/16 fe80::/64</entry>
+                <entry>All link-local IP addresses</entry>
+              </row>
+
+              <row>
+                <entry><constant>multicast</constant></entry>
+                <entry>224.0.0.0/4 ff00::/8</entry>
+                <entry>All IP multicasting addresses</entry>
+              </row>
+            </tbody>
+            </tgroup>
+          </table>
+
+          <para>Note that these settings might not be supported on some systems (for example if eBPF control group
+          support is not enabled in the underlying kernel or container manager). These settings will have no effect in
+          that case. If compatibility with such systems is desired it is hence recommended to not exclusively rely on
+          them for IP security.</para>
+        </listitem>
+      </varlistentry>
+
      <varlistentry>
        <term><varname>DeviceAllow=</varname></term>

--- a/man/systemd.slice.xml
+++ b/man/systemd.slice.xml
@ -53,22 +53,15 @@
  <refsect1>
    <title>Description</title>

-    <para>A unit configuration file whose name ends in
-    <literal>.slice</literal> encodes information about a slice which
-    is a concept for hierarchically managing resources of a group of
-    processes. This management is performed by creating a node in the
-    Linux Control Group (cgroup) tree. Units that manage processes
-    (primarily scope and service units) may be assigned to a specific
-    slice. For each slice, certain resource limits may be set that
-    apply to all processes of all units contained in that
-    slice. Slices are organized hierarchically in a tree. The name of
-    the slice encodes the location in the tree. The name consists of a
-    dash-separated series of names, which describes the path to the
-    slice from the root slice. The root slice is named,
-    <filename>-.slice</filename>. Example:
-    <filename>foo-bar.slice</filename> is a slice that is located
-    within <filename>foo.slice</filename>, which in turn is located in
-    the root slice <filename>-.slice</filename>.
+    <para>A unit configuration file whose name ends in <literal>.slice</literal> encodes information about a slice
+    unit. A slice unit is a concept for hierarchically managing resources of a group of processes. This management is
+    performed by creating a node in the Linux Control Group (cgroup) tree. Units that manage processes (primarily scope
+    and service units) may be assigned to a specific slice. For each slice, certain resource limits may be set that
+    apply to all processes of all units contained in that slice. Slices are organized hierarchically in a tree. The
+    name of the slice encodes the location in the tree. The name consists of a dash-separated series of names, which
+    describes the path to the slice from the root slice. The root slice is named <filename>-.slice</filename>. Example:
+    <filename>foo-bar.slice</filename> is a slice that is located within <filename>foo.slice</filename>, which in turn
+    is located in the root slice <filename>-.slice</filename>.
    </para>

    <para>Note that slice units cannot be templated, nor is possible to add multiple names to a slice unit by creating
--- a/man/systemd.special.xml
+++ b/man/systemd.special.xml
@ -48,8 +48,7 @@
  </refnamediv>

  <refsynopsisdiv><para>
-    <!-- sort alphabetically, targets first -->
-    <filename>basic.target</filename>,
+    <!-- sort alphabetically, targets first --><filename>basic.target</filename>,
    <filename>bluetooth.target</filename>,
    <filename>cryptsetup-pre.target</filename>,
    <filename>cryptsetup.target</filename>,
@ -107,15 +106,15 @@
    <filename>time-sync.target</filename>,
    <filename>timers.target</filename>,
    <filename>umount.target</filename>,
-    <!-- slices -->
-    <filename>-.slice</filename>,
+    <!-- slices --><filename>-.slice</filename>,
    <filename>system.slice</filename>,
    <filename>user.slice</filename>,
    <filename>machine.slice</filename>,
-    <!-- the rest -->
+    <!-- the rest --><filename>-.mount</filename>,
    <filename>dbus.service</filename>,
    <filename>dbus.socket</filename>,
    <filename>display-manager.service</filename>,
+    <filename>init.scope</filename>,
    <filename>system-update-cleanup.service</filename>
  </para></refsynopsisdiv>

@ -131,6 +130,15 @@
    <title>Special System Units</title>

    <variablelist>
+      <varlistentry>
+        <term><filename>-.mount</filename></term>
+        <listitem>
+          <para>The root mount point, i.e. the mount unit for the <filename>/</filename> path. This unit is
+          unconditionally active, during the entire time the system is up, as this mount point is where the basic
+          userspace is running from.</para>
+        </listitem>
+      </varlistentry>
+
      <varlistentry>
        <term><filename>basic.target</filename></term>
        <listitem>
@ -326,6 +334,13 @@
          directly.</para>
        </listitem>
      </varlistentry>
+      <varlistentry>
+        <term><filename>init.scope</filename></term>
+        <listitem>
+          <para>This scope unit is where the system and service manager (PID 1) itself resides. It is active as long as
+          the system is running.</para>
+        </listitem>
+      </varlistentry>
      <varlistentry>
        <term><filename>initrd-fs.target</filename></term>
        <listitem>
@ -1009,17 +1024,17 @@ PartOf=graphical-session.target
  <refsect1>
    <title>Special Slice Units</title>

-    <para>There are four <literal>.slice</literal> units which form
-    the basis of the hierarchy for assignment of resources for
-    services, users, and virtual machines or containers.</para>
+    <para>There are four <literal>.slice</literal> units which form the basis of the hierarchy for assignment of
+    resources for services, users, and virtual machines or containers. See
+    <citerefentry><refentrytitle>-.slice</refentrytitle><manvolnum>7</manvolnum></citerefentry> for details about slice
+    units.</para>

    <variablelist>
      <varlistentry>
        <term><filename>-.slice</filename></term>
        <listitem>
-          <para>The root slice is the root of the hierarchy. It
-          usually does not contain units directly, but may be used to
-          set defaults for the whole tree.</para>
+          <para>The root slice is the root of the slice hierarchy. It usually does not contain units directly, but may
+          be used to set defaults for the whole tree.</para>
        </listitem>
      </varlistentry>

--- a/meson.build
+++ b/meson.build
@ -443,6 +443,8 @@ foreach ident : [
                                 #include <keyutils.h>'''],
        ['copy_file_range',   '''#include <sys/syscall.h>
                                 #include <unistd.h>'''],
+        ['bpf',               '''#include <sys/syscall.h>
+                                 #include <unistd.h>'''],
        ['explicit_bzero' ,   '''#include <string.h>'''],
 ]

--- a/mkosi.build
+++ b/mkosi.build
@ -28,7 +28,7 @@ export LC_CTYPE=C.UTF-8

 [ -f "$BUILDDIR"/build.ninja ] || meson "$BUILDDIR"
 ninja -C "$BUILDDIR" all
-[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test
+[ "$WITH_TESTS" = 0 ] || ninja -C "$BUILDDIR" test || ( RET="$?" ; cat "$BUILDDIR"/meson-logs/testlog.txt ; exit "$RET" )
 ninja -C "$BUILDDIR" install

 mkdir -p "$DESTDIR"/etc
--- a/src/basic/bpf-program.c
+++ b/src/basic/bpf-program.c
@ -0,0 +1,183 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "log.h"
+#include "missing.h"
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret) {
+        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+
+        p = new0(BPFProgram, 1);
+        if (!p)
+                return log_oom();
+
+        p->prog_type = prog_type;
+        p->kernel_fd = -1;
+
+        *ret = p;
+        p = NULL;
+        return 0;
+}
+
+BPFProgram *bpf_program_unref(BPFProgram *p) {
+        if (!p)
+                return NULL;
+
+        safe_close(p->kernel_fd);
+        free(p->instructions);
+
+        return mfree(p);
+}
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *instructions, size_t count) {
+
+        assert(p);
+
+        if (!GREEDY_REALLOC(p->instructions, p->allocated, p->n_instructions + count))
+                return -ENOMEM;
+
+        memcpy(p->instructions + p->n_instructions, instructions, sizeof(struct bpf_insn) * count);
+        p->n_instructions += count;
+
+        return 0;
+}
+
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size) {
+        union bpf_attr attr;
+
+        assert(p);
+
+        if (p->kernel_fd >= 0)
+                return -EBUSY;
+
+        attr = (union bpf_attr) {
+                .prog_type = p->prog_type,
+                .insns = PTR_TO_UINT64(p->instructions),
+                .insn_cnt = p->n_instructions,
+                .license = PTR_TO_UINT64("GPL"),
+                .log_buf = PTR_TO_UINT64(log_buf),
+                .log_level = !!log_buf,
+                .log_size = log_size,
+        };
+
+        p->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+        if (p->kernel_fd < 0)
+                return -errno;
+
+        return 0;
+}
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags) {
+        _cleanup_close_ int fd = -1;
+        union bpf_attr attr;
+
+        assert(p);
+        assert(type >= 0);
+        assert(path);
+
+        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+        if (fd < 0)
+                return -errno;
+
+        attr = (union bpf_attr) {
+                .attach_type = type,
+                .target_fd = fd,
+                .attach_bpf_fd = p->kernel_fd,
+                .attach_flags = flags,
+        };
+
+        if (bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int bpf_program_cgroup_detach(int type, const char *path) {
+        _cleanup_close_ int fd = -1;
+        union bpf_attr attr;
+
+        assert(path);
+
+        fd = open(path, O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+        if (fd < 0)
+                return -errno;
+
+        attr = (union bpf_attr) {
+                .attach_type = type,
+                .target_fd = fd,
+        };
+
+        if (bpf(BPF_PROG_DETACH, &attr, sizeof(attr)) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags) {
+        union bpf_attr attr = {
+                .map_type = type,
+                .key_size = key_size,
+                .value_size = value_size,
+                .max_entries = max_entries,
+                .map_flags = flags,
+        };
+        int fd;
+
+        fd = bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+        if (fd < 0)
+                return -errno;
+
+        return fd;
+}
+
+int bpf_map_update_element(int fd, const void *key, void *value) {
+
+        union bpf_attr attr = {
+                .map_fd = fd,
+                .key = PTR_TO_UINT64(key),
+                .value = PTR_TO_UINT64(value),
+        };
+
+        if (bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)) < 0)
+                return -errno;
+
+        return 0;
+}
+
+int bpf_map_lookup_element(int fd, const void *key, void *value) {
+
+        union bpf_attr attr = {
+                .map_fd = fd,
+                .key = PTR_TO_UINT64(key),
+                .value = PTR_TO_UINT64(value),
+        };
+
+        if (bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)) < 0)
+                return -errno;
+
+        return 0;
+}
--- a/src/basic/bpf-program.h
+++ b/src/basic/bpf-program.h
@ -0,0 +1,55 @@
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+
+  [Except for the stuff copy/pasted from the kernel sources, see below]
+***/
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+
+#include "list.h"
+#include "macro.h"
+
+typedef struct BPFProgram BPFProgram;
+
+struct BPFProgram {
+        int kernel_fd;
+        uint32_t prog_type;
+
+        size_t n_instructions;
+        size_t allocated;
+        struct bpf_insn *instructions;
+};
+
+int bpf_program_new(uint32_t prog_type, BPFProgram **ret);
+BPFProgram *bpf_program_unref(BPFProgram *p);
+
+int bpf_program_add_instructions(BPFProgram *p, const struct bpf_insn *insn, size_t count);
+int bpf_program_load_kernel(BPFProgram *p, char *log_buf, size_t log_size);
+
+int bpf_program_cgroup_attach(BPFProgram *p, int type, const char *path, uint32_t flags);
+int bpf_program_cgroup_detach(int type, const char *path);
+
+int bpf_map_new(enum bpf_map_type type, size_t key_size, size_t value_size, size_t max_entries, uint32_t flags);
+int bpf_map_update_element(int fd, const void *key, void *value);
+int bpf_map_lookup_element(int fd, const void *key, void *value);
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BPFProgram*, bpf_program_unref);
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@ -103,9 +103,12 @@ int cg_read_pid(FILE *f, pid_t *_pid) {
        return 1;
 }

-int cg_read_event(const char *controller, const char *path, const char *event,
-                  char **val)
-{
+int cg_read_event(
+                const char *controller,
+                const char *path,
+                const char *event,
+                char **val) {
+
        _cleanup_free_ char *events = NULL, *content = NULL;
        char *p, *line;
        int r;
--- a/src/basic/in-addr-util.c
+++ b/src/basic/in-addr-util.c
@ -308,22 +308,22 @@ int in_addr_from_string(int family, const char *s, union in_addr_union *ret) {
        return 0;
 }

-int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret) {
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret) {
        int r;

        assert(s);

        r = in_addr_from_string(AF_INET, s, ret);
        if (r >= 0) {
-                if (family)
-                        *family = AF_INET;
+                if (ret_family)
+                        *ret_family = AF_INET;
                return 0;
        }

        r = in_addr_from_string(AF_INET6, s, ret);
        if (r >= 0) {
-                if (family)
-                        *family = AF_INET6;
+                if (ret_family)
+                        *ret_family = AF_INET6;
                return 0;
        }

@ -371,13 +371,13 @@ int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_u
        return r;
 }

-unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr) {
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr) {
        assert(addr);

        return 32 - u32ctz(be32toh(addr->s_addr));
 }

-struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen) {
        assert(addr);
        assert(prefixlen <= 32);

@ -390,7 +390,7 @@ struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char
        return addr;
 }

-int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen) {
        uint8_t msb_octet = *(uint8_t*) addr;

        /* addr may not be aligned, so make sure we only access it byte-wise */
@ -414,18 +414,18 @@ int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixl
        return 0;
 }

-int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask) {
        unsigned char prefixlen;
        int r;

        assert(addr);
        assert(mask);

-        r = in_addr_default_prefixlen(addr, &prefixlen);
+        r = in4_addr_default_prefixlen(addr, &prefixlen);
        if (r < 0)
                return r;

-        in_addr_prefixlen_to_netmask(mask, prefixlen);
+        in4_addr_prefixlen_to_netmask(mask, prefixlen);
        return 0;
 }

@ -435,7 +435,7 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
        if (family == AF_INET) {
                struct in_addr mask;

-                if (!in_addr_prefixlen_to_netmask(&mask, prefixlen))
+                if (!in4_addr_prefixlen_to_netmask(&mask, prefixlen))
                        return -EINVAL;

                addr->in.s_addr &= mask.s_addr;
@ -465,10 +465,57 @@ int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen)
        return -EAFNOSUPPORT;
 }

-int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen) {
+int in_addr_prefix_covers(int family,
+                          const union in_addr_union *prefix,
+                          unsigned char prefixlen,
+                          const union in_addr_union *address) {
+
+        union in_addr_union masked_prefix, masked_address;
+        int r;
+
+        assert(prefix);
+        assert(address);
+
+        masked_prefix = *prefix;
+        r = in_addr_mask(family, &masked_prefix, prefixlen);
+        if (r < 0)
+                return r;
+
+        masked_address = *address;
+        r = in_addr_mask(family, &masked_address, prefixlen);
+        if (r < 0)
+                return r;
+
+        return in_addr_equal(family, &masked_prefix, &masked_address);
+}
+
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret) {
+        uint8_t u;
+        int r;
+
+        if (!IN_SET(family, AF_INET, AF_INET6))
+                return -EAFNOSUPPORT;
+
+        r = safe_atou8(p, &u);
+        if (r < 0)
+                return r;
+
+        if (u > FAMILY_ADDRESS_SIZE(family) * 8)
+                return -ERANGE;
+
+        *ret = u;
+        return 0;
+}
+
+int in_addr_prefix_from_string(
+                const char *p,
+                int family,
+                union in_addr_union *ret_prefix,
+                unsigned char *ret_prefixlen) {
+
        union in_addr_union buffer;
        const char *e, *l;
-        uint8_t k;
+        unsigned char k;
        int r;

        assert(p);
@ -486,23 +533,58 @@ int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *r
        if (r < 0)
                return r;

-        k = FAMILY_ADDRESS_SIZE(family) * 8;
-
        if (e) {
-                uint8_t n;
-
-                r = safe_atou8(e + 1, &n);
+                r = in_addr_parse_prefixlen(family, e+1, &k);
                if (r < 0)
                        return r;
+        } else
+                k = FAMILY_ADDRESS_SIZE(family) * 8;

-                if (n > k)
-                        return -ERANGE;
-
-                k = n;
-        }
-
-        *ret_prefix = buffer;
-        *ret_prefixlen = k;
+        if (ret_prefix)
+                *ret_prefix = buffer;
+        if (ret_prefixlen)
+                *ret_prefixlen = k;

        return 0;
 }
+
+int in_addr_prefix_from_string_auto(
+                const char *p,
+                int *ret_family,
+                union in_addr_union *ret_prefix,
+                unsigned char *ret_prefixlen) {
+
+        union in_addr_union buffer;
+        const char *e, *l;
+        unsigned char k;
+        int family, r;
+
+        assert(p);
+
+        e = strchr(p, '/');
+        if (e)
+                l = strndupa(p, e - p);
+        else
+                l = p;
+
+        r = in_addr_from_string_auto(l, &family, &buffer);
+        if (r < 0)
+                return r;
+
+        if (e) {
+                r = in_addr_parse_prefixlen(family, e+1, &k);
+                if (r < 0)
+                        return r;
+        } else
+                k = FAMILY_ADDRESS_SIZE(family) * 8;
+
+        if (ret_family)
+                *ret_family = family;
+        if (ret_prefix)
+                *ret_prefix = buffer;
+        if (ret_prefixlen)
+                *ret_prefixlen = k;
+
+        return 0;
+
+}
--- a/src/basic/in-addr-util.h
+++ b/src/basic/in-addr-util.h
@ -53,14 +53,17 @@ int in_addr_prefix_next(int family, union in_addr_union *u, unsigned prefixlen);
 int in_addr_to_string(int family, const union in_addr_union *u, char **ret);
 int in_addr_ifindex_to_string(int family, const union in_addr_union *u, int ifindex, char **ret);
 int in_addr_from_string(int family, const char *s, union in_addr_union *ret);
-int in_addr_from_string_auto(const char *s, int *family, union in_addr_union *ret);
+int in_addr_from_string_auto(const char *s, int *ret_family, union in_addr_union *ret);
 int in_addr_ifindex_from_string_auto(const char *s, int *family, union in_addr_union *ret, int *ifindex);
-unsigned char in_addr_netmask_to_prefixlen(const struct in_addr *addr);
-struct in_addr* in_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
-int in_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
-int in_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
+unsigned char in4_addr_netmask_to_prefixlen(const struct in_addr *addr);
+struct in_addr* in4_addr_prefixlen_to_netmask(struct in_addr *addr, unsigned char prefixlen);
+int in4_addr_default_prefixlen(const struct in_addr *addr, unsigned char *prefixlen);
+int in4_addr_default_subnet_mask(const struct in_addr *addr, struct in_addr *mask);
 int in_addr_mask(int family, union in_addr_union *addr, unsigned char prefixlen);
-int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, uint8_t *ret_prefixlen);
+int in_addr_prefix_covers(int family, const union in_addr_union *prefix, unsigned char prefixlen, const union in_addr_union *address);
+int in_addr_parse_prefixlen(int family, const char *p, unsigned char *ret);
+int in_addr_prefix_from_string(const char *p, int family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);
+int in_addr_prefix_from_string_auto(const char *p, int *ret_family, union in_addr_union *ret_prefix, unsigned char *ret_prefixlen);

 static inline size_t FAMILY_ADDRESS_SIZE(int family) {
        assert(family == AF_INET || family == AF_INET6);
--- a/src/basic/io-util.h
+++ b/src/basic/io-util.h
@ -40,14 +40,6 @@ int fd_wait_for_event(int fd, int event, usec_t timeout);

 ssize_t sparse_write(int fd, const void *p, size_t sz, size_t run_length);

-#define IOVEC_SET_STRING(i, s)                  \
-        do {                                    \
-                struct iovec *_i = &(i);        \
-                char *_s = (char *)(s);         \
-                _i->iov_base = _s;              \
-                _i->iov_len = strlen(_s);       \
-        } while (false)
-
 static inline size_t IOVEC_TOTAL_SIZE(const struct iovec *i, unsigned n) {
        unsigned j;
        size_t r = 0;
@ -93,3 +85,8 @@ static inline bool FILE_SIZE_VALID_OR_INFINITY(uint64_t l) {
        return FILE_SIZE_VALID(l);

 }
+
+#define IOVEC_INIT(base, len) { .iov_base = (base), .iov_len = (len) }
+#define IOVEC_MAKE(base, len) (struct iovec) IOVEC_INIT(base, len)
+#define IOVEC_INIT_STRING(string) IOVEC_INIT((char*) string, strlen(string))
+#define IOVEC_MAKE_STRING(string) (struct iovec) IOVEC_INIT_STRING(string)
--- a/src/basic/journal-importer.c
+++ b/src/basic/journal-importer.c
@ -20,8 +20,9 @@
 #include <unistd.h>

 #include "alloc-util.h"
-#include "journal-importer.h"
 #include "fd-util.h"
+#include "io-util.h"
+#include "journal-importer.h"
 #include "parse-util.h"
 #include "string-util.h"
 #include "unaligned.h"
@ -38,7 +39,7 @@ static int iovw_put(struct iovec_wrapper *iovw, void* data, size_t len) {
        if (!GREEDY_REALLOC(iovw->iovec, iovw->size_bytes, iovw->count + 1))
                return log_oom();

-        iovw->iovec[iovw->count++] = (struct iovec) {data, len};
+        iovw->iovec[iovw->count++] = IOVEC_MAKE(data, len);
        return 0;
 }

--- a/src/basic/log.c
+++ b/src/basic/log.c
@ -351,22 +351,22 @@ static int write_to_console(

        if (log_target == LOG_TARGET_CONSOLE_PREFIXED) {
                xsprintf(prefix, "<%i>", level);
-                IOVEC_SET_STRING(iovec[n++], prefix);
+                iovec[n++] = IOVEC_MAKE_STRING(prefix);
        }

        highlight = LOG_PRI(level) <= LOG_ERR && show_color;

        if (show_location) {
                snprintf(location, sizeof(location), "(%s:%i) ", file, line);
-                IOVEC_SET_STRING(iovec[n++], location);
+                iovec[n++] = IOVEC_MAKE_STRING(location);
        }

        if (highlight)
-                IOVEC_SET_STRING(iovec[n++], ANSI_HIGHLIGHT_RED);
-        IOVEC_SET_STRING(iovec[n++], buffer);
+                iovec[n++] = IOVEC_MAKE_STRING(ANSI_HIGHLIGHT_RED);
+        iovec[n++] = IOVEC_MAKE_STRING(buffer);
        if (highlight)
-                IOVEC_SET_STRING(iovec[n++], ANSI_NORMAL);
-        IOVEC_SET_STRING(iovec[n++], "\n");
+                iovec[n++] = IOVEC_MAKE_STRING(ANSI_NORMAL);
+        iovec[n++] = IOVEC_MAKE_STRING("\n");

        if (writev(console_fd, iovec, n) < 0) {

@ -425,11 +425,11 @@ static int write_to_syslog(

        xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());

-        IOVEC_SET_STRING(iovec[0], header_priority);
-        IOVEC_SET_STRING(iovec[1], header_time);
-        IOVEC_SET_STRING(iovec[2], program_invocation_short_name);
-        IOVEC_SET_STRING(iovec[3], header_pid);
-        IOVEC_SET_STRING(iovec[4], buffer);
+        iovec[0] = IOVEC_MAKE_STRING(header_priority);
+        iovec[1] = IOVEC_MAKE_STRING(header_time);
+        iovec[2] = IOVEC_MAKE_STRING(program_invocation_short_name);
+        iovec[3] = IOVEC_MAKE_STRING(header_pid);
+        iovec[4] = IOVEC_MAKE_STRING(buffer);

        /* When using syslog via SOCK_STREAM separate the messages by NUL chars */
        if (syslog_is_stream)
@ -470,11 +470,11 @@ static int write_to_kmsg(
        xsprintf(header_priority, "<%i>", level);
        xsprintf(header_pid, "["PID_FMT"]: ", getpid_cached());

-        IOVEC_SET_STRING(iovec[0], header_priority);
-        IOVEC_SET_STRING(iovec[1], program_invocation_short_name);
-        IOVEC_SET_STRING(iovec[2], header_pid);
-        IOVEC_SET_STRING(iovec[3], buffer);
-        IOVEC_SET_STRING(iovec[4], "\n");
+        iovec[0] = IOVEC_MAKE_STRING(header_priority);
+        iovec[1] = IOVEC_MAKE_STRING(program_invocation_short_name);
+        iovec[2] = IOVEC_MAKE_STRING(header_pid);
+        iovec[3] = IOVEC_MAKE_STRING(buffer);
+        iovec[4] = IOVEC_MAKE_STRING("\n");

        if (writev(kmsg_fd, iovec, ELEMENTSOF(iovec)) < 0)
                return -errno;
@ -547,10 +547,10 @@ static int write_to_journal(

        log_do_header(header, sizeof(header), level, error, file, line, func, object_field, object, extra_field, extra);

-        IOVEC_SET_STRING(iovec[0], header);
-        IOVEC_SET_STRING(iovec[1], "MESSAGE=");
-        IOVEC_SET_STRING(iovec[2], buffer);
-        IOVEC_SET_STRING(iovec[3], "\n");
+        iovec[0] = IOVEC_MAKE_STRING(header);
+        iovec[1] = IOVEC_MAKE_STRING("MESSAGE=");
+        iovec[2] = IOVEC_MAKE_STRING(buffer);
+        iovec[3] = IOVEC_MAKE_STRING("\n");

        mh.msg_iov = iovec;
        mh.msg_iovlen = ELEMENTSOF(iovec);
@ -872,7 +872,7 @@ int log_format_iovec(
                 * the next format string */
                VA_FORMAT_ADVANCE(format, ap);

-                IOVEC_SET_STRING(iovec[(*n)++], m);
+                iovec[(*n)++] = IOVEC_MAKE_STRING(m);

                if (newline_separator) {
                        iovec[*n].iov_base = (char*) &nl;
@ -893,9 +893,9 @@ int log_struct_internal(
                const char *func,
                const char *format, ...) {

+        LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
        char buf[LINE_MAX];
        bool found = false;
-        LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
        PROTECT_ERRNO;
        va_list ap;

@ -926,7 +926,7 @@ int log_struct_internal(

                /* If the journal is available do structured logging */
                log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
-                IOVEC_SET_STRING(iovec[n++], header);
+                iovec[n++] = IOVEC_MAKE_STRING(header);

                va_start(ap, format);
                r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, true, error, format, ap);
@ -975,6 +975,73 @@ int log_struct_internal(
        return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, buf + 8);
 }

+int log_struct_iovec_internal(
+                int level,
+                int error,
+                const char *file,
+                int line,
+                const char *func,
+                const struct iovec input_iovec[],
+                size_t n_input_iovec) {
+
+        LogRealm realm = LOG_REALM_REMOVE_LEVEL(level);
+        PROTECT_ERRNO;
+        size_t i;
+        char *m;
+
+        if (error < 0)
+                error = -error;
+
+        if (_likely_(LOG_PRI(level) > log_max_level[realm]))
+                return -error;
+
+        if (log_target == LOG_TARGET_NULL)
+                return -error;
+
+        if ((level & LOG_FACMASK) == 0)
+                level = log_facility | LOG_PRI(level);
+
+        if (IN_SET(log_target, LOG_TARGET_AUTO,
+                               LOG_TARGET_JOURNAL_OR_KMSG,
+                               LOG_TARGET_JOURNAL) &&
+            journal_fd >= 0) {
+
+                struct iovec iovec[1 + n_input_iovec*2];
+                char header[LINE_MAX];
+                struct msghdr mh = {
+                        .msg_iov = iovec,
+                        .msg_iovlen = 1 + n_input_iovec*2,
+                };
+
+                log_do_header(header, sizeof(header), level, error, file, line, func, NULL, NULL, NULL, NULL);
+                iovec[0] = IOVEC_MAKE_STRING(header);
+
+                for (i = 0; i < n_input_iovec; i++) {
+                        iovec[1+i*2] = input_iovec[i];
+                        iovec[1+i*2+1] = IOVEC_MAKE_STRING("\n");
+                }
+
+                if (sendmsg(journal_fd, &mh, MSG_NOSIGNAL) >= 0)
+                        return -error;
+        }
+
+        for (i = 0; i < n_input_iovec; i++) {
+                if (input_iovec[i].iov_len < strlen("MESSAGE="))
+                        continue;
+
+                if (memcmp(input_iovec[i].iov_base, "MESSAGE=", strlen("MESSAGE=")) == 0)
+                        break;
+        }
+
+        if (_unlikely_(i >= n_input_iovec)) /* Couldn't find MESSAGE=? */
+                return -error;
+
+        m = strndupa(input_iovec[i].iov_base + strlen("MESSAGE="),
+                     input_iovec[i].iov_len - strlen("MESSAGE="));
+
+        return log_dispatch_internal(level, error, file, line, func, NULL, NULL, NULL, NULL, m);
+}
+
 int log_set_target_from_string(const char *e) {
        LogTarget t;

--- a/src/basic/log.h
+++ b/src/basic/log.h
@ -187,6 +187,15 @@ int log_format_iovec(
                const char *format,
                va_list ap) _printf_(6, 0);

+int log_struct_iovec_internal(
+                int level,
+                int error,
+                const char *file,
+                int line,
+                const char *func,
+                const struct iovec input_iovec[],
+                size_t n_input_iovec);
+
 /* This modifies the buffer passed! */
 int log_dump_internal(
                int level,
@ -270,6 +279,11 @@ void log_assert_failed_return_realm(
                            error, __FILE__, __LINE__, __func__, __VA_ARGS__)
 #define log_struct(level, ...) log_struct_errno(level, 0, __VA_ARGS__)

+#define log_struct_iovec_errno(level, error, iovec, n_iovec)            \
+        log_struct_iovec_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
+                                  error, __FILE__, __LINE__, __func__, iovec, n_iovec)
+#define log_struct_iovec(level, iovec, n_iovec) log_struct_iovec_errno(level, 0, iovec, n_iovec)
+
 /* This modifies the buffer passed! */
 #define log_dump(level, buffer) \
        log_dump_internal(LOG_REALM_PLUS_LEVEL(LOG_REALM, level), \
--- a/src/basic/meson.build
+++ b/src/basic/meson.build
@ -1,4 +1,6 @@
 basic_sources_plain = files('''
+        MurmurHash2.c
+        MurmurHash2.h
        af-list.c
        af-list.h
        alloc-util.c
@ -16,6 +18,8 @@ basic_sources_plain = files('''
        bitmap.c
        bitmap.h
        blkid-util.h
+        bpf-program.c
+        bpf-program.h
        btrfs-ctree.h
        btrfs-util.c
        btrfs-util.h
@ -24,10 +28,10 @@ basic_sources_plain = files('''
        bus-label.h
        calendarspec.c
        calendarspec.h
-        capability-util.c
-        capability-util.h
        cap-list.c
        cap-list.h
+        capability-util.c
+        capability-util.h
        cgroup-util.c
        cgroup-util.h
        chattr-util.c
@ -61,10 +65,10 @@ basic_sources_plain = files('''
        extract-word.h
        fd-util.c
        fd-util.h
-        fileio.c
-        fileio.h
        fileio-label.c
        fileio-label.h
+        fileio.c
+        fileio.h
        format-util.h
        fs-util.c
        fs-util.h
@ -82,9 +86,9 @@ basic_sources_plain = files('''
        hostname-util.h
        in-addr-util.c
        in-addr-util.h
-        ioprio.h
        io-util.c
        io-util.h
+        ioprio.h
        journal-importer.c
        journal-importer.h
        khash.c
@ -106,13 +110,11 @@ basic_sources_plain = files('''
        mempool.c
        mempool.h
        missing_syscall.h
+        mkdir-label.c
        mkdir.c
        mkdir.h
-        mkdir-label.c
        mount-util.c
        mount-util.h
-        MurmurHash2.c
-        MurmurHash2.h
        nss-util.h
        ordered-set.c
        ordered-set.h
@ -138,9 +140,9 @@ basic_sources_plain = files('''
        rlimit-util.h
        rm-rf.c
        rm-rf.h
-        securebits.h
        securebits-util.c
        securebits-util.h
+        securebits.h
        selinux-util.c
        selinux-util.h
        set.h
--- a/src/basic/missing_syscall.h
+++ b/src/basic/missing_syscall.h
@ -22,6 +22,8 @@

 /* Missing glibc definitions to access certain kernel APIs */

+#include <sys/types.h>
+
 #if !HAVE_DECL_PIVOT_ROOT
 static inline int pivot_root(const char *new_root, const char *put_old) {
        return syscall(SYS_pivot_root, new_root, put_old);
@ -316,3 +318,33 @@ static inline ssize_t copy_file_range(int fd_in, loff_t *off_in,
 #  endif
 }
 #endif
+
+#if !HAVE_DECL_BPF
+#  ifndef __NR_bpf
+#    if defined __i386__
+#      define __NR_bpf 357
+#    elif defined __x86_64__
+#      define __NR_bpf 321
+#    elif defined __aarch64__
+#      define __NR_bpf 280
+#    elif defined __sparc__
+#      define __NR_bpf 349
+#    elif defined __s390__
+#      define __NR_bpf 351
+#    else
+#      warning "__NR_bpf not defined for your architecture"
+#    endif
+#  endif
+
+union bpf_attr;
+
+static inline int bpf(int cmd, union bpf_attr *attr, size_t size) {
+#ifdef __NR_bpf
+        return (int) syscall(__NR_bpf, cmd, attr, size);
+#else
+        errno = ENOSYS;
+        return -1;
+#endif
+}
+
+#endif
--- a/src/basic/rlimit-util.c
+++ b/src/basic/rlimit-util.c
@ -42,7 +42,8 @@ int setrlimit_closest(int resource, const struct rlimit *rlim) {

        /* So we failed to set the desired setrlimit, then let's try
         * to get as close as we can */
-        assert_se(getrlimit(resource, &highest) == 0);
+        if (getrlimit(resource, &highest) < 0)
+                return -errno;

        fixed.rlim_cur = MIN(rlim->rlim_cur, highest.rlim_max);
        fixed.rlim_max = MIN(rlim->rlim_max, highest.rlim_max);
--- a/src/basic/socket-label.c
+++ b/src/basic/socket-label.c
@ -83,7 +83,7 @@ int socket_address_listen(
                        return -errno;
        }

-        if (socket_address_family(a) == AF_INET || socket_address_family(a) == AF_INET6) {
+        if (IN_SET(socket_address_family(a), AF_INET, AF_INET6)) {
                if (bind_to_device)
                        if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, bind_to_device, strlen(bind_to_device)+1) < 0)
                                return -errno;
--- a/src/core/bpf-firewall.c
+++ b/src/core/bpf-firewall.c
@ -0,0 +1,680 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/libbpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "fd-util.h"
+#include "ip-address-access.h"
+#include "unit.h"
+
+enum {
+        MAP_KEY_PACKETS,
+        MAP_KEY_BYTES,
+};
+
+enum {
+        ACCESS_ALLOWED = 1,
+        ACCESS_DENIED  = 2,
+};
+
+/* Compile instructions for one list of addresses, one direction and one specific verdict on matches. */
+
+static int add_lookup_instructions(
+                BPFProgram *p,
+                int map_fd,
+                int protocol,
+                bool is_ingress,
+                int verdict) {
+
+        int r, addr_offset, addr_size;
+
+        assert(p);
+        assert(map_fd >= 0);
+
+        switch (protocol) {
+
+        case ETH_P_IP:
+                addr_size = sizeof(uint32_t);
+                addr_offset = is_ingress ?
+                        offsetof(struct iphdr, saddr) :
+                        offsetof(struct iphdr, daddr);
+                break;
+
+        case ETH_P_IPV6:
+                addr_size = 4 * sizeof(uint32_t);
+                addr_offset = is_ingress ?
+                        offsetof(struct ip6_hdr, ip6_src.s6_addr) :
+                        offsetof(struct ip6_hdr, ip6_dst.s6_addr);
+                break;
+
+        default:
+                return -EAFNOSUPPORT;
+        }
+
+        do {
+                /* Compare IPv4 with one word instruction (32bit) */
+                struct bpf_insn insn[] = {
+                        /* If skb->protocol != ETH_P_IP, skip this whole block. The offset will be set later. */
+                        BPF_JMP_IMM(BPF_JNE, BPF_REG_7, htobe16(protocol), 0),
+
+                        /*
+                         * Call into BPF_FUNC_skb_load_bytes to load the dst/src IP address
+                         *
+                         * R1: Pointer to the skb
+                         * R2: Data offset
+                         * R3: Destination buffer on the stack (r10 - 4)
+                         * R4: Number of bytes to read (4)
+                         */
+
+                        BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+                        BPF_MOV32_IMM(BPF_REG_2, addr_offset),
+
+                        BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+                        BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -addr_size),
+
+                        BPF_MOV32_IMM(BPF_REG_4, addr_size),
+                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+
+                        /*
+                         * Call into BPF_FUNC_map_lookup_elem to see if the address matches any entry in the
+                         * LPM trie map. For this to work, the prefixlen field of 'struct bpf_lpm_trie_key'
+                         * has to be set to the maximum possible value.
+                         *
+                         * On success, the looked up value is stored in R0. For this application, the actual
+                         * value doesn't matter, however; we just set the bit in @verdict in R8 if we found any
+                         * matching value.
+                         */
+
+                        BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+                        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -addr_size - sizeof(uint32_t)),
+                        BPF_ST_MEM(BPF_W, BPF_REG_2, 0, addr_size * 8),
+
+                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+                        BPF_ALU32_IMM(BPF_OR, BPF_REG_8, verdict),
+                };
+
+                /* Jump label fixup */
+                insn[0].off = ELEMENTSOF(insn) - 1;
+
+                r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+                if (r < 0)
+                        return r;
+
+        } while (false);
+
+        return 0;
+}
+
+static int bpf_firewall_compile_bpf(
+                Unit *u,
+                bool is_ingress,
+                BPFProgram **ret) {
+
+        struct bpf_insn pre_insn[] = {
+                /*
+                 * When the eBPF program is entered, R1 contains the address of the skb.
+                 * However, R1-R5 are scratch registers that are not preserved when calling
+                 * into kernel functions, so we need to save anything that's supposed to
+                 * stay around to R6-R9. Save the skb to R6.
+                 */
+                BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+                /*
+                 * Although we cannot access the skb data directly from eBPF programs used in this
+                 * scenario, the kernel has prepared some fields for us to access through struct __sk_buff.
+                 * Load the protocol (IPv4, IPv6) used by the packet in flight once and cache it in R7
+                 * for later use.
+                 */
+                BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct __sk_buff, protocol)),
+
+                /*
+                 * R8 is used to keep track of whether any address check has explicitly allowed or denied the packet
+                 * through ACCESS_DENIED or ACCESS_ALLOWED bits. Reset them both to 0 in the beginning.
+                 */
+                BPF_MOV32_IMM(BPF_REG_8, 0),
+        };
+
+        /*
+         * The access checkers compiled for the configured allowance and denial lists
+         * write to R8 at runtime. The following code prepares for an early exit that
+         * skip the accounting if the packet is denied.
+         *
+         * R0 = 1
+         * if (R8 == ACCESS_DENIED)
+         *     R0 = 0
+         *
+         * This means that if both ACCESS_DENIED and ACCESS_ALLOWED are set, the packet
+         * is allowed to pass.
+         */
+        struct bpf_insn post_insn[] = {
+                BPF_MOV64_IMM(BPF_REG_0, 1),
+                BPF_JMP_IMM(BPF_JNE, BPF_REG_8, ACCESS_DENIED, 1),
+                BPF_MOV64_IMM(BPF_REG_0, 0),
+        };
+
+        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+        int accounting_map_fd, r;
+        bool access_enabled;
+
+        assert(u);
+        assert(ret);
+
+        accounting_map_fd = is_ingress ?
+                u->ip_accounting_ingress_map_fd :
+                u->ip_accounting_egress_map_fd;
+
+        access_enabled =
+                u->ipv4_allow_map_fd >= 0 ||
+                u->ipv6_allow_map_fd >= 0 ||
+                u->ipv4_deny_map_fd >= 0 ||
+                u->ipv6_deny_map_fd >= 0;
+
+        if (accounting_map_fd < 0 && !access_enabled) {
+                *ret = NULL;
+                return 0;
+        }
+
+        r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+        if (r < 0)
+                return r;
+
+        r = bpf_program_add_instructions(p, pre_insn, ELEMENTSOF(pre_insn));
+        if (r < 0)
+                return r;
+
+        if (access_enabled) {
+                /*
+                 * The simple rule this function translates into eBPF instructions is:
+                 *
+                 * - Access will be granted when an address matches an entry in @list_allow
+                 * - Otherwise, access will be denied when an address matches an entry in @list_deny
+                 * - Otherwise, access will be granted
+                 */
+
+                if (u->ipv4_deny_map_fd >= 0) {
+                        r = add_lookup_instructions(p, u->ipv4_deny_map_fd, ETH_P_IP, is_ingress, ACCESS_DENIED);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (u->ipv6_deny_map_fd >= 0) {
+                        r = add_lookup_instructions(p, u->ipv6_deny_map_fd, ETH_P_IPV6, is_ingress, ACCESS_DENIED);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (u->ipv4_allow_map_fd >= 0) {
+                        r = add_lookup_instructions(p, u->ipv4_allow_map_fd, ETH_P_IP, is_ingress, ACCESS_ALLOWED);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (u->ipv6_allow_map_fd >= 0) {
+                        r = add_lookup_instructions(p, u->ipv6_allow_map_fd, ETH_P_IPV6, is_ingress, ACCESS_ALLOWED);
+                        if (r < 0)
+                                return r;
+                }
+        }
+
+        r = bpf_program_add_instructions(p, post_insn, ELEMENTSOF(post_insn));
+        if (r < 0)
+                return r;
+
+        if (accounting_map_fd >= 0) {
+                struct bpf_insn insn[] = {
+                        /*
+                         * If R0 == 0, the packet will be denied; skip the accounting instructions in this case.
+                         * The jump label will be fixed up later.
+                         */
+                        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+
+                        /* Count packets */
+                        BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+                        BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+                        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+                        BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd), /* load map fd to r1 */
+                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                        BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+                        BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+                        /* Count bytes */
+                        BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+                        BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+                        BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                        BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+                        BPF_LD_MAP_FD(BPF_REG_1, accounting_map_fd),
+                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                        BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                        BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+                        BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+                        /* Allow the packet to pass */
+                        BPF_MOV64_IMM(BPF_REG_0, 1),
+                };
+
+                /* Jump label fixup */
+                insn[0].off = ELEMENTSOF(insn) - 1;
+
+                r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+                if (r < 0)
+                        return r;
+        }
+
+        do {
+                /*
+                 * Exit from the eBPF program, R0 contains the verdict.
+                 * 0 means the packet is denied, 1 means the packet may pass.
+                 */
+                struct bpf_insn insn[] = {
+                        BPF_EXIT_INSN()
+                };
+
+                r = bpf_program_add_instructions(p, insn, ELEMENTSOF(insn));
+                if (r < 0)
+                        return r;
+        } while (false);
+
+        *ret = p;
+        p = NULL;
+
+        return 0;
+}
+
+static int bpf_firewall_count_access_items(IPAddressAccessItem *list, size_t *n_ipv4, size_t *n_ipv6) {
+        IPAddressAccessItem *a;
+
+        assert(n_ipv4);
+        assert(n_ipv6);
+
+        LIST_FOREACH(items, a, list) {
+                switch (a->family) {
+
+                case AF_INET:
+                        (*n_ipv4)++;
+                        break;
+
+                case AF_INET6:
+                        (*n_ipv6)++;
+                        break;
+
+                default:
+                        return -EAFNOSUPPORT;
+                }
+        }
+
+        return 0;
+}
+
+static int bpf_firewall_add_access_items(
+                IPAddressAccessItem *list,
+                int ipv4_map_fd,
+                int ipv6_map_fd,
+                int verdict) {
+
+        struct bpf_lpm_trie_key *key_ipv4, *key_ipv6;
+        uint64_t value = verdict;
+        IPAddressAccessItem *a;
+        int r;
+
+        key_ipv4 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t));
+        key_ipv6 = alloca0(offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t) * 4);
+
+        LIST_FOREACH(items, a, list) {
+                switch (a->family) {
+
+                case AF_INET:
+                        key_ipv4->prefixlen = a->prefixlen;
+                        memcpy(key_ipv4->data, &a->address, sizeof(uint32_t));
+
+                        r = bpf_map_update_element(ipv4_map_fd, key_ipv4, &value);
+                        if (r < 0)
+                                return r;
+
+                        break;
+
+                case AF_INET6:
+                        key_ipv6->prefixlen = a->prefixlen;
+                        memcpy(key_ipv6->data, &a->address, 4 * sizeof(uint32_t));
+
+                        r = bpf_map_update_element(ipv6_map_fd, key_ipv6, &value);
+                        if (r < 0)
+                                return r;
+
+                        break;
+
+                default:
+                        return -EAFNOSUPPORT;
+                }
+        }
+
+        return 0;
+}
+
+static int bpf_firewall_prepare_access_maps(
+                Unit *u,
+                int verdict,
+                int *ret_ipv4_map_fd,
+                int *ret_ipv6_map_fd) {
+
+        _cleanup_close_ int ipv4_map_fd = -1, ipv6_map_fd = -1;
+        size_t n_ipv4 = 0, n_ipv6 = 0;
+        Unit *p;
+        int r;
+
+        assert(ret_ipv4_map_fd);
+        assert(ret_ipv6_map_fd);
+
+        for (p = u; p; p = UNIT_DEREF(p->slice)) {
+                CGroupContext *cc;
+
+                cc = unit_get_cgroup_context(p);
+                if (!cc)
+                        continue;
+
+                bpf_firewall_count_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny, &n_ipv4, &n_ipv6);
+        }
+
+        if (n_ipv4 > 0) {
+                ipv4_map_fd = bpf_map_new(
+                                BPF_MAP_TYPE_LPM_TRIE,
+                                offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t),
+                                sizeof(uint64_t),
+                                n_ipv4,
+                                BPF_F_NO_PREALLOC);
+                if (ipv4_map_fd < 0)
+                        return ipv4_map_fd;
+        }
+
+        if (n_ipv6 > 0) {
+                ipv6_map_fd = bpf_map_new(
+                                BPF_MAP_TYPE_LPM_TRIE,
+                                offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint32_t)*4,
+                                sizeof(uint64_t),
+                                n_ipv6,
+                                BPF_F_NO_PREALLOC);
+                if (ipv6_map_fd < 0)
+                        return ipv6_map_fd;
+        }
+
+        for (p = u; p; p = UNIT_DEREF(p->slice)) {
+                CGroupContext *cc;
+
+                cc = unit_get_cgroup_context(p);
+                if (!cc)
+                        continue;
+
+                r = bpf_firewall_add_access_items(verdict == ACCESS_ALLOWED ? cc->ip_address_allow : cc->ip_address_deny,
+                                                  ipv4_map_fd, ipv6_map_fd, verdict);
+                if (r < 0)
+                        return r;
+        }
+
+        *ret_ipv4_map_fd = ipv4_map_fd;
+        *ret_ipv6_map_fd = ipv6_map_fd;
+
+        ipv4_map_fd = ipv6_map_fd = -1;
+        return 0;
+}
+
+static int bpf_firewall_prepare_accounting_maps(bool enabled, int *fd_ingress, int *fd_egress) {
+        int r;
+
+        assert(fd_ingress);
+        assert(fd_egress);
+
+        if (enabled) {
+                if (*fd_ingress < 0) {
+                        r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+                        if (r < 0)
+                                return r;
+
+                        *fd_ingress = r;
+                }
+
+                if (*fd_egress < 0) {
+
+                        r = bpf_map_new(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(uint64_t), 2, 0);
+                        if (r < 0)
+                                return r;
+
+                        *fd_egress = r;
+                }
+        } else {
+                *fd_ingress = safe_close(*fd_ingress);
+                *fd_egress = safe_close(*fd_egress);
+        }
+
+        return 0;
+}
+
+int bpf_firewall_compile(Unit *u) {
+        CGroupContext *cc;
+        int r;
+
+        assert(u);
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r == 0) {
+                log_debug("BPF firewalling not supported on this systemd, proceeding without.");
+                return -EOPNOTSUPP;
+        }
+
+        /* Note that when we compile a new firewall we first flush out the access maps and the BPF programs themselves,
+         * but we reuse the the accounting maps. That way the firewall in effect always maps to the actual
+         * configuration, but we don't flush out the accounting unnecessarily */
+
+        u->ip_bpf_ingress = bpf_program_unref(u->ip_bpf_ingress);
+        u->ip_bpf_egress = bpf_program_unref(u->ip_bpf_egress);
+
+        u->ipv4_allow_map_fd = safe_close(u->ipv4_allow_map_fd);
+        u->ipv4_deny_map_fd = safe_close(u->ipv4_deny_map_fd);
+
+        u->ipv6_allow_map_fd = safe_close(u->ipv6_allow_map_fd);
+        u->ipv6_deny_map_fd = safe_close(u->ipv6_deny_map_fd);
+
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -EINVAL;
+
+        r = bpf_firewall_prepare_access_maps(u, ACCESS_ALLOWED, &u->ipv4_allow_map_fd, &u->ipv6_allow_map_fd);
+        if (r < 0)
+                return log_error_errno(r, "Preparation of eBPF allow maps failed: %m");
+
+        r = bpf_firewall_prepare_access_maps(u, ACCESS_DENIED, &u->ipv4_deny_map_fd, &u->ipv6_deny_map_fd);
+        if (r < 0)
+                return log_error_errno(r, "Preparation of eBPF deny maps failed: %m");
+
+        r = bpf_firewall_prepare_accounting_maps(cc->ip_accounting, &u->ip_accounting_ingress_map_fd, &u->ip_accounting_egress_map_fd);
+        if (r < 0)
+                return log_error_errno(r, "Preparation of eBPF accounting maps failed: %m");
+
+        r = bpf_firewall_compile_bpf(u, true, &u->ip_bpf_ingress);
+        if (r < 0)
+                return log_error_errno(r, "Compilation for ingress BPF program failed: %m");
+
+        r = bpf_firewall_compile_bpf(u, false, &u->ip_bpf_egress);
+        if (r < 0)
+                return log_error_errno(r, "Compilation for egress BPF program failed: %m");
+
+        return 0;
+}
+
+int bpf_firewall_install(Unit *u) {
+        _cleanup_free_ char *path = NULL;
+        CGroupContext *cc;
+        int r;
+
+        assert(u);
+
+        if (!u->cgroup_path)
+                return -EINVAL;
+
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -EINVAL;
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r == 0) {
+                log_debug("BPF firewalling not supported on this systemd, proceeding without.");
+                return -EOPNOTSUPP;
+        }
+
+        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, NULL, &path);
+        if (r < 0)
+                return log_error_errno(r, "Failed to determine cgroup path: %m");
+
+        if (u->ip_bpf_egress) {
+                r = bpf_program_load_kernel(u->ip_bpf_egress, NULL, 0);
+                if (r < 0)
+                        return log_error_errno(r, "Kernel upload of egress BPF program failed: %m");
+
+                r = bpf_program_cgroup_attach(u->ip_bpf_egress, BPF_CGROUP_INET_EGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+                if (r < 0)
+                        return log_error_errno(r, "Attaching egress BPF program to cgroup %s failed: %m", path);
+        } else {
+                r = bpf_program_cgroup_detach(BPF_CGROUP_INET_EGRESS, path);
+                if (r < 0)
+                        return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
+                                              "Detaching egress BPF program from cgroup failed: %m");
+        }
+
+        if (u->ip_bpf_ingress) {
+                r = bpf_program_load_kernel(u->ip_bpf_ingress, NULL, 0);
+                if (r < 0)
+                        return log_error_errno(r, "Kernel upload of ingress BPF program failed: %m");
+
+                r = bpf_program_cgroup_attach(u->ip_bpf_ingress, BPF_CGROUP_INET_INGRESS, path, cc->delegate ? BPF_F_ALLOW_OVERRIDE : 0);
+                if (r < 0)
+                        return log_error_errno(r, "Attaching ingress BPF program to cgroup %s failed: %m", path);
+        } else {
+                r = bpf_program_cgroup_detach(BPF_CGROUP_INET_INGRESS, path);
+                if (r < 0)
+                        return log_full_errno(r == -ENOENT ? LOG_DEBUG : LOG_ERR, r,
+                                              "Detaching ingress BPF program from cgroup failed: %m");
+        }
+
+        return 0;
+}
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets) {
+        uint64_t key, packets;
+        int r;
+
+        if (map_fd < 0)
+                return -EBADF;
+
+        if (ret_packets) {
+                key = MAP_KEY_PACKETS;
+                r = bpf_map_lookup_element(map_fd, &key, &packets);
+                if (r < 0)
+                        return r;
+        }
+
+        if (ret_bytes) {
+                key = MAP_KEY_BYTES;
+                r = bpf_map_lookup_element(map_fd, &key, ret_bytes);
+                if (r < 0)
+                        return r;
+        }
+
+        if (ret_packets)
+                *ret_packets = packets;
+
+        return 0;
+}
+
+int bpf_firewall_reset_accounting(int map_fd) {
+        uint64_t key, value = 0;
+        int r;
+
+        if (map_fd < 0)
+                return -EBADF;
+
+        key = MAP_KEY_PACKETS;
+        r = bpf_map_update_element(map_fd, &key, &value);
+        if (r < 0)
+                return r;
+
+        key = MAP_KEY_BYTES;
+        return bpf_map_update_element(map_fd, &key, &value);
+}
+
+
+int bpf_firewall_supported(void) {
+        static int supported = -1;
+        int fd, r;
+
+        /* Checks whether BPF firewalling is supported. For this, we check three things:
+         *
+         * a) whether we are privileged
+         * b) whether the unified hierarchy is being used
+         * c) the BPF implementation in the kernel supports BPF LPM TRIE maps, which we require
+         *
+         */
+
+        if (supported >= 0)
+                return supported;
+
+        if (geteuid() != 0)
+                return supported = false;
+
+        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+        if (r < 0)
+                return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
+        if (r == 0)
+                return supported = false;
+
+        fd = bpf_map_new(BPF_MAP_TYPE_LPM_TRIE,
+                         offsetof(struct bpf_lpm_trie_key, data) + sizeof(uint64_t),
+                         sizeof(uint64_t),
+                         1,
+                         BPF_F_NO_PREALLOC);
+        if (fd < 0) {
+                log_debug_errno(r, "Can't allocate BPF LPM TRIE map, BPF firewalling is not supported: %m");
+                return supported = false;
+        }
+
+        safe_close(fd);
+
+        return supported = true;
+}
--- a/src/core/bpf-firewall.h
+++ b/src/core/bpf-firewall.h
@ -0,0 +1,32 @@
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+#include "unit.h"
+
+int bpf_firewall_supported(void);
+
+int bpf_firewall_compile(Unit *u);
+int bpf_firewall_install(Unit *u);
+
+int bpf_firewall_read_accounting(int map_fd, uint64_t *ret_bytes, uint64_t *ret_packets);
+int bpf_firewall_reset_accounting(int map_fd);
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@ -21,6 +21,7 @@
 #include <fnmatch.h>

 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "cgroup-util.h"
 #include "cgroup.h"
 #include "fd-util.h"
@ -30,9 +31,9 @@
 #include "path-util.h"
 #include "process-util.h"
 #include "special.h"
+#include "stdio-util.h"
 #include "string-table.h"
 #include "string-util.h"
-#include "stdio-util.h"

 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)

@ -141,6 +142,9 @@ void cgroup_context_done(CGroupContext *c) {

        while (c->device_allow)
                cgroup_context_free_device_allow(c, c->device_allow);
+
+        c->ip_address_allow = ip_address_access_free_all(c->ip_address_allow);
+        c->ip_address_deny = ip_address_access_free_all(c->ip_address_deny);
 }

 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
@ -149,6 +153,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
        CGroupBlockIODeviceBandwidth *b;
        CGroupBlockIODeviceWeight *w;
        CGroupDeviceAllow *a;
+        IPAddressAccessItem *iaai;
        char u[FORMAT_TIMESPAN_MAX];

        assert(c);
@ -162,6 +167,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                "%sBlockIOAccounting=%s\n"
                "%sMemoryAccounting=%s\n"
                "%sTasksAccounting=%s\n"
+                "%sIPAccounting=%s\n"
                "%sCPUWeight=%" PRIu64 "\n"
                "%sStartupCPUWeight=%" PRIu64 "\n"
                "%sCPUShares=%" PRIu64 "\n"
@ -184,6 +190,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                prefix, yes_no(c->blockio_accounting),
                prefix, yes_no(c->memory_accounting),
                prefix, yes_no(c->tasks_accounting),
+                prefix, yes_no(c->ip_accounting),
                prefix, c->cpu_weight,
                prefix, c->startup_cpu_weight,
                prefix, c->cpu_shares,
@ -253,6 +260,20 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                                b->path,
                                format_bytes(buf, sizeof(buf), b->wbps));
        }
+
+        LIST_FOREACH(items, iaai, c->ip_address_allow) {
+                _cleanup_free_ char *k = NULL;
+
+                (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+                fprintf(f, "%sIPAddressAllow=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
+        }
+
+        LIST_FOREACH(items, iaai, c->ip_address_deny) {
+                _cleanup_free_ char *k = NULL;
+
+                (void) in_addr_to_string(iaai->family, &iaai->address, &k);
+                fprintf(f, "%sIPAddressDeny=%s/%u\n", prefix, strnull(k), iaai->prefixlen);
+        }
 }

 static int lookup_block_device(const char *p, dev_t *dev) {
@ -645,7 +666,27 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
                              "Failed to set %s: %m", file);
 }

-static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
+static void cgroup_apply_firewall(Unit *u, CGroupContext *c) {
+        int r;
+
+        if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
+                                    * not recursive we don't ever touch the bpf on them */
+                return;
+
+        r = bpf_firewall_compile(u);
+        if (r < 0)
+                return;
+
+        (void) bpf_firewall_install(u);
+        return;
+}
+
+static void cgroup_context_apply(
+                Unit *u,
+                CGroupMask apply_mask,
+                bool apply_bpf,
+                ManagerState state) {
+
        const char *path;
        CGroupContext *c;
        bool is_root;
@ -659,7 +700,8 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
        assert(c);
        assert(path);

-        if (mask == 0)
+        /* Nothing to do? Exit early! */
+        if (apply_mask == 0 && !apply_bpf)
                return;

        /* Some cgroup attributes are not supported on the root cgroup,
@ -673,9 +715,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
         * cgroup trees (assuming we are running in a container then),
         * and missing cgroups, i.e. EROFS and ENOENT. */

-        if ((mask & CGROUP_MASK_CPU) && !is_root) {
-                bool has_weight = cgroup_context_has_cpu_weight(c);
-                bool has_shares = cgroup_context_has_cpu_shares(c);
+        if ((apply_mask & CGROUP_MASK_CPU) && !is_root) {
+                bool has_weight, has_shares;
+
+                has_weight = cgroup_context_has_cpu_weight(c);
+                has_shares = cgroup_context_has_cpu_shares(c);

                if (cg_all_unified() > 0) {
                        uint64_t weight;
@ -712,7 +756,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                }
        }

-        if (mask & CGROUP_MASK_IO) {
+        if (apply_mask & CGROUP_MASK_IO) {
                bool has_io = cgroup_context_has_io_config(c);
                bool has_blockio = cgroup_context_has_blockio_config(c);

@ -789,7 +833,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                }
        }

-        if (mask & CGROUP_MASK_BLKIO) {
+        if (apply_mask & CGROUP_MASK_BLKIO) {
                bool has_io = cgroup_context_has_io_config(c);
                bool has_blockio = cgroup_context_has_blockio_config(c);

@ -856,7 +900,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                }
        }

-        if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_MEMORY) && !is_root) {
                if (cg_all_unified() > 0) {
                        uint64_t max, swap_max = CGROUP_LIMIT_MAX;

@ -896,7 +940,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                }
        }

-        if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_DEVICES) && !is_root) {
                CGroupDeviceAllow *a;

                /* Changing the devices list of a populated cgroup
@ -960,7 +1004,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                }
        }

-        if ((mask & CGROUP_MASK_PIDS) && !is_root) {
+        if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) {

                if (c->tasks_max != CGROUP_LIMIT_MAX) {
                        char buf[DECIMAL_STR_MAX(uint64_t) + 2];
@ -974,6 +1018,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                        log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
                                      "Failed to set pids.max: %m");
        }
+
+        if (apply_bpf)
+                cgroup_apply_firewall(u, c);
 }

 CGroupMask cgroup_context_get_mask(CGroupContext *c) {
@ -1120,6 +1167,39 @@ CGroupMask unit_get_enable_mask(Unit *u) {
        return mask;
 }

+bool unit_get_needs_bpf(Unit *u) {
+        CGroupContext *c;
+        Unit *p;
+        assert(u);
+
+        /* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the
+         * moment. */
+        if (u->type == UNIT_SLICE)
+                return false;
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return false;
+
+        if (c->ip_accounting ||
+            c->ip_address_allow ||
+            c->ip_address_deny)
+                return true;
+
+        /* If any parent slice has an IP access list defined, it applies too */
+        for (p = UNIT_DEREF(u->slice); p; p = UNIT_DEREF(p->slice)) {
+                c = unit_get_cgroup_context(p);
+                if (!c)
+                        return false;
+
+                if (c->ip_address_allow ||
+                    c->ip_address_deny)
+                        return true;
+        }
+
+        return false;
+}
+
 /* Recurse from a unit up through its containing slices, propagating
 * mask bits upward. A unit is also member of itself. */
 void unit_update_cgroup_members_masks(Unit *u) {
@ -1295,7 +1375,8 @@ int unit_watch_cgroup(Unit *u) {
 static int unit_create_cgroup(
                Unit *u,
                CGroupMask target_mask,
-                CGroupMask enable_mask) {
+                CGroupMask enable_mask,
+                bool needs_bpf) {

        CGroupContext *c;
        int r;
@ -1337,6 +1418,7 @@ static int unit_create_cgroup(
        u->cgroup_realized = true;
        u->cgroup_realized_mask = target_mask;
        u->cgroup_enabled_mask = enable_mask;
+        u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;

        if (u->type != UNIT_SLICE && !c->delegate) {

@ -1386,10 +1468,19 @@ static void cgroup_xattr_apply(Unit *u) {
                log_unit_warning_errno(u, r, "Failed to set invocation ID on control group %s, ignoring: %m", u->cgroup_path);
 }

-static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask enable_mask) {
+static bool unit_has_mask_realized(
+                Unit *u,
+                CGroupMask target_mask,
+                CGroupMask enable_mask,
+                bool needs_bpf) {
+
        assert(u);

-        return u->cgroup_realized && u->cgroup_realized_mask == target_mask && u->cgroup_enabled_mask == enable_mask;
+        return u->cgroup_realized &&
+                u->cgroup_realized_mask == target_mask &&
+                u->cgroup_enabled_mask == enable_mask &&
+                ((needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_ON) ||
+                 (!needs_bpf && u->cgroup_bpf_state == UNIT_CGROUP_BPF_OFF));
 }

 /* Check if necessary controllers and attributes for a unit are in place.
@ -1400,6 +1491,7 @@ static bool unit_has_mask_realized(Unit *u, CGroupMask target_mask, CGroupMask e
 * Returns 0 on success and < 0 on failure. */
 static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
        CGroupMask target_mask, enable_mask;
+        bool needs_bpf, apply_bpf;
        int r;

        assert(u);
@ -1411,10 +1503,16 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {

        target_mask = unit_get_target_mask(u);
        enable_mask = unit_get_enable_mask(u);
+        needs_bpf = unit_get_needs_bpf(u);

-        if (unit_has_mask_realized(u, target_mask, enable_mask))
+        if (unit_has_mask_realized(u, target_mask, enable_mask, needs_bpf))
                return 0;

+        /* Make sure we apply the BPF filters either when one is configured, or if none is configured but previously
+         * the state was anything but off. This way, if a unit with a BPF filter applied is reconfigured to lose it
+         * this will trickle down properly to cgroupfs. */
+        apply_bpf = needs_bpf || u->cgroup_bpf_state != UNIT_CGROUP_BPF_OFF;
+
        /* First, realize parents */
        if (UNIT_ISSET(u->slice)) {
                r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
@ -1423,18 +1521,19 @@ static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
        }

        /* And then do the real work */
-        r = unit_create_cgroup(u, target_mask, enable_mask);
+        r = unit_create_cgroup(u, target_mask, enable_mask, needs_bpf);
        if (r < 0)
                return r;

        /* Finally, apply the necessary attributes. */
-        cgroup_context_apply(u, target_mask, state);
+        cgroup_context_apply(u, target_mask, apply_bpf, state);
        cgroup_xattr_apply(u);

        return 0;
 }

 static void unit_add_to_cgroup_queue(Unit *u) {
+        assert(u);

        if (u->in_cgroup_queue)
                return;
@ -1492,7 +1591,10 @@ static void unit_queue_siblings(Unit *u) {
                        /* If the unit doesn't need any new controllers
                         * and has current ones realized, it doesn't need
                         * any changes. */
-                        if (unit_has_mask_realized(m, unit_get_target_mask(m), unit_get_enable_mask(m)))
+                        if (unit_has_mask_realized(m,
+                                                   unit_get_target_mask(m),
+                                                   unit_get_enable_mask(m),
+                                                   unit_get_needs_bpf(m)))
                                continue;

                        unit_add_to_cgroup_queue(m);
@ -1756,6 +1858,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents,

 int manager_setup_cgroup(Manager *m) {
        _cleanup_free_ char *path = NULL;
+        const char *scope_path;
        CGroupController c;
        int r, all_unified;
        char *e;
@ -1813,74 +1916,67 @@ int manager_setup_cgroup(Manager *m) {
                        log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER_LEGACY ". File system hierarchy is at %s.", path);
        }

-        if (!m->test_run_flags) {
-                const char *scope_path;
+        /* 3. Install agent */
+        if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {

-                /* 3. Install agent */
-                if (cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER) > 0) {
+                /* In the unified hierarchy we can get
+                 * cgroup empty notifications via inotify. */

-                        /* In the unified hierarchy we can get
-                         * cgroup empty notifications via inotify. */
+                m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
+                safe_close(m->cgroup_inotify_fd);

-                        m->cgroup_inotify_event_source = sd_event_source_unref(m->cgroup_inotify_event_source);
-                        safe_close(m->cgroup_inotify_fd);
+                m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
+                if (m->cgroup_inotify_fd < 0)
+                        return log_error_errno(errno, "Failed to create control group inotify object: %m");

-                        m->cgroup_inotify_fd = inotify_init1(IN_NONBLOCK|IN_CLOEXEC);
-                        if (m->cgroup_inotify_fd < 0)
-                                return log_error_errno(errno, "Failed to create control group inotify object: %m");
-
-                        r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
-                        if (r < 0)
-                                return log_error_errno(r, "Failed to watch control group inotify object: %m");
-
-                        /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
-                         * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
-                        r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
-                        if (r < 0)
-                                return log_error_errno(r, "Failed to set priority of inotify event source: %m");
-
-                        (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");
-
-                } else if (MANAGER_IS_SYSTEM(m)) {
-
-                        /* On the legacy hierarchy we only get
-                         * notifications via cgroup agents. (Which
-                         * isn't really reliable, since it does not
-                         * generate events when control groups with
-                         * children run empty. */
-
-                        r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
-                        if (r < 0)
-                                log_warning_errno(r, "Failed to install release agent, ignoring: %m");
-                        else if (r > 0)
-                                log_debug("Installed release agent.");
-                        else if (r == 0)
-                                log_debug("Release agent already installed.");
-                }
-
-                /* 4. Make sure we are in the special "init.scope" unit in the root slice. */
-                scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
-                r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+                r = sd_event_add_io(m->event, &m->cgroup_inotify_event_source, m->cgroup_inotify_fd, EPOLLIN, on_cgroup_inotify_event, m);
                if (r < 0)
-                        return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
+                        return log_error_errno(r, "Failed to watch control group inotify object: %m");

-                /* also, move all other userspace processes remaining
-                 * in the root cgroup into that scope. */
-                r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+                /* Process cgroup empty notifications early, but after service notifications and SIGCHLD. Also
+                 * see handling of cgroup agent notifications, for the classic cgroup hierarchy support. */
+                r = sd_event_source_set_priority(m->cgroup_inotify_event_source, SD_EVENT_PRIORITY_NORMAL-5);
                if (r < 0)
-                        log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
+                        return log_error_errno(r, "Failed to set priority of inotify event source: %m");

-                /* 5. And pin it, so that it cannot be unmounted */
-                safe_close(m->pin_cgroupfs_fd);
-                m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
-                if (m->pin_cgroupfs_fd < 0)
-                        return log_error_errno(errno, "Failed to open pin file: %m");
+                (void) sd_event_source_set_description(m->cgroup_inotify_event_source, "cgroup-inotify");

-                /* 6.  Always enable hierarchical support if it exists... */
-                if (!all_unified)
-                        (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
+        } else if (MANAGER_IS_SYSTEM(m) && m->test_run_flags == 0) {
+
+                /* On the legacy hierarchy we only get notifications via cgroup agents. (Which isn't really reliable,
+                 * since it does not generate events when control groups with children run empty. */
+
+                r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
+                if (r < 0)
+                        log_warning_errno(r, "Failed to install release agent, ignoring: %m");
+                else if (r > 0)
+                        log_debug("Installed release agent.");
+                else if (r == 0)
+                        log_debug("Release agent already installed.");
        }

+        /* 4. Make sure we are in the special "init.scope" unit in the root slice. */
+        scope_path = strjoina(m->cgroup_root, "/" SPECIAL_INIT_SCOPE);
+        r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+        if (r < 0)
+                return log_error_errno(r, "Failed to create %s control group: %m", scope_path);
+
+        /* also, move all other userspace processes remaining
+         * in the root cgroup into that scope. */
+        r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
+        if (r < 0)
+                log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
+
+        /* 5. And pin it, so that it cannot be unmounted */
+        safe_close(m->pin_cgroupfs_fd);
+        m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
+        if (m->pin_cgroupfs_fd < 0)
+                return log_error_errno(errno, "Failed to open pin file: %m");
+
+        /* 6.  Always enable hierarchical support if it exists... */
+        if (!all_unified && m->test_run_flags == 0)
+                (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
+
        /* 7. Figure out which controllers are supported */
        r = cg_mask_supported(&m->cgroup_supported);
        if (r < 0)
@ -1992,11 +2088,18 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {

 int unit_get_memory_current(Unit *u, uint64_t *ret) {
        _cleanup_free_ char *v = NULL;
+        CGroupContext *cc;
        int r;

        assert(u);
        assert(ret);

+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -ENODATA;
+        if (!cc->memory_accounting)
+                return -ENODATA;
+
        if (!u->cgroup_path)
                return -ENODATA;

@ -2020,11 +2123,18 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) {

 int unit_get_tasks_current(Unit *u, uint64_t *ret) {
        _cleanup_free_ char *v = NULL;
+        CGroupContext *cc;
        int r;

        assert(u);
        assert(ret);

+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -ENODATA;
+        if (!cc->tasks_accounting)
+                return -ENODATA;
+
        if (!u->cgroup_path)
                return -ENODATA;

@ -2091,6 +2201,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
 }

 int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
+        CGroupContext *cc;
        nsec_t ns;
        int r;

@ -2100,6 +2211,12 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
         * started. If the cgroup has been removed already, returns the last cached value. To cache the value, simply
         * call this function with a NULL return value. */

+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -ENODATA;
+        if (!cc->cpu_accounting)
+                return -ENODATA;
+
        r = unit_get_cpu_usage_raw(u, &ns);
        if (r == -ENODATA && u->cpu_usage_last != NSEC_INFINITY) {
                /* If we can't get the CPU usage anymore (because the cgroup was already removed, for example), use our
@ -2124,7 +2241,57 @@ int unit_get_cpu_usage(Unit *u, nsec_t *ret) {
        return 0;
 }

-int unit_reset_cpu_usage(Unit *u) {
+int unit_get_ip_accounting(
+                Unit *u,
+                CGroupIPAccountingMetric metric,
+                uint64_t *ret) {
+
+        CGroupContext *cc;
+        uint64_t value;
+        int fd, r;
+
+        assert(u);
+        assert(metric >= 0);
+        assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
+        assert(ret);
+
+        /* IP accounting is currently not recursive, and hence we refuse to return any data for slice nodes. Slices are
+         * inner cgroup nodes and hence have no processes directly attached, hence their counters would be zero
+         * anyway. And if we block this now we can later open this up, if the kernel learns recursive BPF cgroup
+         * filters. */
+        if (u->type == UNIT_SLICE)
+                return -ENODATA;
+
+        cc = unit_get_cgroup_context(u);
+        if (!cc)
+                return -ENODATA;
+        if (!cc->ip_accounting)
+                return -ENODATA;
+
+        fd = IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_INGRESS_PACKETS) ?
+                u->ip_accounting_ingress_map_fd :
+                u->ip_accounting_egress_map_fd;
+
+        if (fd < 0)
+                return -ENODATA;
+
+        if (IN_SET(metric, CGROUP_IP_INGRESS_BYTES, CGROUP_IP_EGRESS_BYTES))
+                r = bpf_firewall_read_accounting(fd, &value, NULL);
+        else
+                r = bpf_firewall_read_accounting(fd, NULL, &value);
+        if (r < 0)
+                return r;
+
+        /* Add in additional metrics from a previous runtime. Note that when reexecing/reloading the daemon we compile
+         * all BPF programs and maps anew, but serialize the old counters. When deserializing we store them in the
+         * ip_accounting_extra[] field, and add them in here transparently. */
+
+        *ret = value + u->ip_accounting_extra[metric];
+
+        return r;
+}
+
+int unit_reset_cpu_accounting(Unit *u) {
        nsec_t ns;
        int r;

@ -2142,6 +2309,22 @@ int unit_reset_cpu_usage(Unit *u) {
        return 0;
 }

+int unit_reset_ip_accounting(Unit *u) {
+        int r = 0, q = 0;
+
+        assert(u);
+
+        if (u->ip_accounting_ingress_map_fd >= 0)
+                r = bpf_firewall_reset_accounting(u->ip_accounting_ingress_map_fd);
+
+        if (u->ip_accounting_egress_map_fd >= 0)
+                q = bpf_firewall_reset_accounting(u->ip_accounting_egress_map_fd);
+
+        zero(u->ip_accounting_extra);
+
+        return r < 0 ? r : q;
+}
+
 bool unit_cgroup_delegate(Unit *u) {
        CGroupContext *c;

@ -2167,6 +2350,9 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
        if (m & (CGROUP_MASK_IO | CGROUP_MASK_BLKIO))
                m |= CGROUP_MASK_IO | CGROUP_MASK_BLKIO;

+        if (m & (CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT))
+                m |= CGROUP_MASK_CPU | CGROUP_MASK_CPUACCT;
+
        if ((u->cgroup_realized_mask & m) == 0)
                return;

@ -2174,6 +2360,36 @@ void unit_invalidate_cgroup(Unit *u, CGroupMask m) {
        unit_add_to_cgroup_queue(u);
 }

+void unit_invalidate_cgroup_bpf(Unit *u) {
+        assert(u);
+
+        if (!UNIT_HAS_CGROUP_CONTEXT(u))
+                return;
+
+        if (u->cgroup_bpf_state == UNIT_CGROUP_BPF_INVALIDATED)
+                return;
+
+        u->cgroup_bpf_state = UNIT_CGROUP_BPF_INVALIDATED;
+        unit_add_to_cgroup_queue(u);
+
+        /* If we are a slice unit, we also need to put compile a new BPF program for all our children, as the IP access
+         * list of our children includes our own. */
+        if (u->type == UNIT_SLICE) {
+                Unit *member;
+                Iterator i;
+
+                SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
+                        if (member == u)
+                                continue;
+
+                        if (UNIT_DEREF(member->slice) != u)
+                                continue;
+
+                        unit_invalidate_cgroup_bpf(member);
+                }
+        }
+}
+
 void manager_invalidate_startup_units(Manager *m) {
        Iterator i;
        Unit *u;
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@ -21,9 +21,10 @@

 #include <stdbool.h>

+#include "cgroup-util.h"
+#include "ip-address-access.h"
 #include "list.h"
 #include "time-util.h"
-#include "cgroup-util.h"

 typedef struct CGroupContext CGroupContext;
 typedef struct CGroupDeviceAllow CGroupDeviceAllow;
@ -87,6 +88,7 @@ struct CGroupContext {
        bool blockio_accounting;
        bool memory_accounting;
        bool tasks_accounting;
+        bool ip_accounting;

        /* For unified hierarchy */
        uint64_t cpu_weight;
@ -103,6 +105,9 @@ struct CGroupContext {
        uint64_t memory_max;
        uint64_t memory_swap_max;

+        LIST_HEAD(IPAddressAccessItem, ip_address_allow);
+        LIST_HEAD(IPAddressAccessItem, ip_address_deny);
+
        /* For legacy hierarchies */
        uint64_t cpu_shares;
        uint64_t startup_cpu_shares;
@ -123,6 +128,16 @@ struct CGroupContext {
        bool delegate;
 };

+/* Used when querying IP accounting data */
+typedef enum CGroupIPAccountingMetric {
+        CGROUP_IP_INGRESS_BYTES,
+        CGROUP_IP_INGRESS_PACKETS,
+        CGROUP_IP_EGRESS_BYTES,
+        CGROUP_IP_EGRESS_PACKETS,
+        _CGROUP_IP_ACCOUNTING_METRIC_MAX,
+        _CGROUP_IP_ACCOUNTING_METRIC_INVALID = -1,
+} CGroupIPAccountingMetric;
+
 #include "unit.h"

 void cgroup_context_init(CGroupContext *c);
@ -145,6 +160,8 @@ CGroupMask unit_get_subtree_mask(Unit *u);
 CGroupMask unit_get_target_mask(Unit *u);
 CGroupMask unit_get_enable_mask(Unit *u);

+bool unit_get_needs_bpf(Unit *u);
+
 void unit_update_cgroup_members_masks(Unit *u);

 char *unit_default_cgroup_path(Unit *u);
@ -172,7 +189,10 @@ int unit_watch_all_pids(Unit *u);
 int unit_get_memory_current(Unit *u, uint64_t *ret);
 int unit_get_tasks_current(Unit *u, uint64_t *ret);
 int unit_get_cpu_usage(Unit *u, nsec_t *ret);
-int unit_reset_cpu_usage(Unit *u);
+int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
+
+int unit_reset_cpu_accounting(Unit *u);
+int unit_reset_ip_accounting(Unit *u);

 bool unit_cgroup_delegate(Unit *u);

@ -180,6 +200,7 @@ int unit_notify_cgroup_empty(Unit *u);
 int manager_notify_cgroup_empty(Manager *m, const char *group);

 void unit_invalidate_cgroup(Unit *u, CGroupMask m);
+void unit_invalidate_cgroup_bpf(Unit *u);

 void manager_invalidate_startup_units(Manager *m);

--- a/src/core/dbus-cgroup.c
+++ b/src/core/dbus-cgroup.c
@ -17,7 +17,11 @@
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/

+#include <arpa/inet.h>
+
+#include "af-list.h"
 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "bus-util.h"
 #include "cgroup-util.h"
 #include "cgroup.h"
@ -206,6 +210,48 @@ static int property_get_device_allow(
        return sd_bus_message_close_container(reply);
 }

+static int property_get_ip_address_access(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        IPAddressAccessItem** items = userdata, *i;
+        int r;
+
+        r = sd_bus_message_open_container(reply, 'a', "(iayu)");
+        if (r < 0)
+                return r;
+
+        LIST_FOREACH(items, i, *items) {
+
+                r = sd_bus_message_open_container(reply, 'r', "iayu");
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_append(reply, "i", i->family);
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_append_array(reply, 'y', &i->address, FAMILY_ADDRESS_SIZE(i->family));
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_append(reply, "u", (uint32_t) i->prefixlen);
+                if (r < 0)
+                        return r;
+
+                r = sd_bus_message_close_container(reply);
+                if (r < 0)
+                        return r;
+        }
+
+        return sd_bus_message_close_container(reply);
+}
+
 const sd_bus_vtable bus_cgroup_vtable[] = {
        SD_BUS_VTABLE_START(0),
        SD_BUS_PROPERTY("Delegate", "b", bus_property_get_bool, offsetof(CGroupContext, delegate), 0),
@ -239,6 +285,9 @@ const sd_bus_vtable bus_cgroup_vtable[] = {
        SD_BUS_PROPERTY("DeviceAllow", "a(ss)", property_get_device_allow, 0, 0),
        SD_BUS_PROPERTY("TasksAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, tasks_accounting), 0),
        SD_BUS_PROPERTY("TasksMax", "t", NULL, offsetof(CGroupContext, tasks_max), 0),
+        SD_BUS_PROPERTY("IPAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, ip_accounting), 0),
+        SD_BUS_PROPERTY("IPAddressAllow", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_allow), 0),
+        SD_BUS_PROPERTY("IPAddressDeny", "a(iayu)", property_get_ip_address_access, offsetof(CGroupContext, ip_address_deny), 0),
        SD_BUS_VTABLE_END
 };

@ -1133,6 +1182,7 @@ int bus_cgroup_set_property(
                }

                return 1;
+
        } else if (streq(name, "TasksMaxScale")) {
                uint64_t limit;
                uint32_t raw;
@ -1152,6 +1202,137 @@ int bus_cgroup_set_property(
                                                          (uint32_t) (DIV_ROUND_UP((uint64_t) raw * 100U, (uint64_t) UINT32_MAX)));
                }

+                return 1;
+
+        } else if (streq(name, "IPAccounting")) {
+                int b;
+
+                r = sd_bus_message_read(message, "b", &b);
+                if (r < 0)
+                        return r;
+
+                if (mode != UNIT_CHECK) {
+                        c->ip_accounting = b;
+
+                        unit_invalidate_cgroup_bpf(u);
+                        unit_write_drop_in_private(u, mode, name, b ? "IPAccounting=yes" : "IPAccounting=no");
+                }
+
+                return 1;
+
+        } else if (STR_IN_SET(name, "IPAddressAllow", "IPAddressDeny")) {
+                IPAddressAccessItem **list;
+                size_t n = 0;
+
+                list = streq(name, "IPAddressAllow") ? &c->ip_address_allow : &c->ip_address_deny;
+
+                r = sd_bus_message_enter_container(message, 'a', "(iayu)");
+                if (r < 0)
+                        return r;
+
+                for (;;) {
+                        const void *ap;
+                        int32_t family;
+                        uint32_t prefixlen;
+                        size_t an;
+
+                        r = sd_bus_message_enter_container(message, 'r', "iayu");
+                        if (r < 0)
+                                return r;
+                        if (r == 0)
+                                break;
+
+                        r = sd_bus_message_read(message, "i", &family);
+                        if (r < 0)
+                                return r;
+
+                        if (!IN_SET(family, AF_INET, AF_INET6))
+                                return sd_bus_error_set_errnof(error, EINVAL, "IPAddressAllow= expects IPv4 or IPv6 addresses only.");
+
+                        r = sd_bus_message_read_array(message, 'y', &ap, &an);
+                        if (r < 0)
+                                return r;
+
+                        if (an != FAMILY_ADDRESS_SIZE(family))
+                                return sd_bus_error_set_errnof(error, EINVAL, "IP address has wrong size for family (%s, expected %zu, got %zu)",
+                                                               af_to_name(family), FAMILY_ADDRESS_SIZE(family), an);
+
+                        r = sd_bus_message_read(message, "u", &prefixlen);
+                        if (r < 0)
+                                return r;
+
+                        if (prefixlen > FAMILY_ADDRESS_SIZE(family)*8)
+                                return sd_bus_error_set_errnof(error, EINVAL, "Prefix length too large for family.");
+
+                        if (mode != UNIT_CHECK) {
+                                IPAddressAccessItem *item;
+
+                                item = new0(IPAddressAccessItem, 1);
+                                if (!item)
+                                        return -ENOMEM;
+
+                                item->family = family;
+                                item->prefixlen = prefixlen;
+                                memcpy(&item->address, ap, an);
+
+                                LIST_PREPEND(items, *list, item);
+                        }
+
+                        r = sd_bus_message_exit_container(message);
+                        if (r < 0)
+                                return r;
+
+                        n++;
+                }
+
+                r = sd_bus_message_exit_container(message);
+                if (r < 0)
+                        return r;
+
+                *list = ip_address_access_reduce(*list);
+
+                if (mode != UNIT_CHECK) {
+                        _cleanup_free_ char *buf = NULL;
+                        _cleanup_fclose_ FILE *f = NULL;
+                        IPAddressAccessItem *item;
+                        size_t size = 0;
+
+                        if (n == 0)
+                                *list = ip_address_access_free_all(*list);
+
+                        unit_invalidate_cgroup_bpf(u);
+                        f = open_memstream(&buf, &size);
+                        if (!f)
+                                return -ENOMEM;
+
+                        fputs_unlocked(name, f);
+                        fputs_unlocked("=\n", f);
+
+                        LIST_FOREACH(items, item, *list) {
+                                char buffer[CONST_MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
+
+                                errno = 0;
+                                if (!inet_ntop(item->family, &item->address, buffer, sizeof(buffer)))
+                                        return errno > 0 ? -errno : -EINVAL;
+
+                                fprintf(f, "%s=%s/%u\n", name, buffer, item->prefixlen);
+                        }
+
+                        r = fflush_and_check(f);
+                        if (r < 0)
+                                return r;
+                        unit_write_drop_in_private(u, mode, name, buf);
+
+                        if (*list) {
+                                r = bpf_firewall_supported();
+                                if (r < 0)
+                                        return r;
+                                if (r == 0)
+                                        log_warning("Transient unit %s configures an IP firewall, but the local system does not support BPF/cgroup firewalling.\n"
+                                                    "Proceeding WITHOUT firewalling in effect!", u->id);
+                        }
+                }
+
                return 1;
        }

--- a/src/core/dbus-unit.c
+++ b/src/core/dbus-unit.c
@ -20,6 +20,7 @@
 #include "sd-bus.h"

 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "bus-common-errors.h"
 #include "cgroup-util.h"
 #include "dbus-job.h"
@ -1051,6 +1052,39 @@ int bus_unit_method_get_processes(sd_bus_message *message, void *userdata, sd_bu
        return sd_bus_send(NULL, reply, NULL);
 }

+static int property_get_ip_counter(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        CGroupIPAccountingMetric metric;
+        uint64_t value = (uint64_t) -1;
+        Unit *u = userdata;
+
+        assert(bus);
+        assert(reply);
+        assert(property);
+        assert(u);
+
+        if (streq(property, "IPIngressBytes"))
+                metric = CGROUP_IP_INGRESS_BYTES;
+        else if (streq(property, "IPIngressPackets"))
+                metric = CGROUP_IP_INGRESS_PACKETS;
+        else if (streq(property, "IPEgressBytes"))
+                metric = CGROUP_IP_EGRESS_BYTES;
+        else {
+                assert(streq(property, "IPEgressPackets"));
+                metric = CGROUP_IP_EGRESS_PACKETS;
+        }
+
+        (void) unit_get_ip_accounting(u, metric, &value);
+        return sd_bus_message_append(reply, "t", value);
+}
+
 const sd_bus_vtable bus_unit_cgroup_vtable[] = {
        SD_BUS_VTABLE_START(0),
        SD_BUS_PROPERTY("Slice", "s", property_get_slice, 0, 0),
@ -1058,6 +1092,10 @@ const sd_bus_vtable bus_unit_cgroup_vtable[] = {
        SD_BUS_PROPERTY("MemoryCurrent", "t", property_get_current_memory, 0, 0),
        SD_BUS_PROPERTY("CPUUsageNSec", "t", property_get_cpu_usage, 0, 0),
        SD_BUS_PROPERTY("TasksCurrent", "t", property_get_current_tasks, 0, 0),
+        SD_BUS_PROPERTY("IPIngressBytes", "t", property_get_ip_counter, 0, 0),
+        SD_BUS_PROPERTY("IPIngressPackets", "t", property_get_ip_counter, 0, 0),
+        SD_BUS_PROPERTY("IPEgressBytes", "t", property_get_ip_counter, 0, 0),
+        SD_BUS_PROPERTY("IPEgressPackets", "t", property_get_ip_counter, 0, 0),
        SD_BUS_METHOD("GetProcesses", NULL, "a(sus)", bus_unit_method_get_processes, SD_BUS_VTABLE_UNPRIVILEGED),
        SD_BUS_VTABLE_END
 };
--- a/src/core/dynamic-user.c
+++ b/src/core/dynamic-user.c
@ -23,13 +23,14 @@

 #include "dynamic-user.h"
 #include "fd-util.h"
+#include "fileio.h"
 #include "fs-util.h"
+#include "io-util.h"
 #include "parse-util.h"
 #include "random-util.h"
 #include "stdio-util.h"
 #include "string-util.h"
 #include "user-util.h"
-#include "fileio.h"

 /* Takes a value generated randomly or by hashing and turns it into a UID in the right range */
 #define UID_CLAMP_INTO_RANGE(rnd) (((uid_t) (rnd) % (DYNAMIC_UID_MAX - DYNAMIC_UID_MIN + 1)) + DYNAMIC_UID_MIN)
@ -245,8 +246,8 @@ static int pick_uid(const char *name, uid_t *ret_uid) {
                /* Let's store the user name in the lock file, so that we can use it for looking up the username for a UID */
                l = pwritev(lock_fd,
                            (struct iovec[2]) {
-                                    { .iov_base = (char*) name, .iov_len = strlen(name) },
-                                    { .iov_base = (char[1]) { '\n' }, .iov_len = 1 }
+                                    IOVEC_INIT_STRING(name),
+                                    IOVEC_INIT((char[1]) { '\n' }, 1),
                            }, 2, 0);
                if (l < 0) {
                        (void) unlink(lock_path);
@ -271,10 +272,7 @@ static int pick_uid(const char *name, uid_t *ret_uid) {

 static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
        uid_t uid = UID_INVALID;
-        struct iovec iov = {
-                .iov_base = &uid,
-                .iov_len = sizeof(uid),
-        };
+        struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
        union {
                struct cmsghdr cmsghdr;
                uint8_t buf[CMSG_SPACE(sizeof(int))];
@ -314,10 +312,7 @@ static int dynamic_user_pop(DynamicUser *d, uid_t *ret_uid, int *ret_lock_fd) {
 }

 static int dynamic_user_push(DynamicUser *d, uid_t uid, int lock_fd) {
-        struct iovec iov = {
-                .iov_base = &uid,
-                .iov_len = sizeof(uid),
-        };
+        struct iovec iov = IOVEC_INIT(&uid, sizeof(uid));
        union {
                struct cmsghdr cmsghdr;
                uint8_t buf[CMSG_SPACE(sizeof(int))];
--- a/src/core/execute.c
+++ b/src/core/execute.c
@ -2351,9 +2351,9 @@ static int send_user_lookup(

        if (writev(user_lookup_fd,
               (struct iovec[]) {
-                           { .iov_base = &uid, .iov_len = sizeof(uid) },
-                           { .iov_base = &gid, .iov_len = sizeof(gid) },
-                           { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
+                           IOVEC_INIT(&uid, sizeof(uid)),
+                           IOVEC_INIT(&gid, sizeof(gid)),
+                           IOVEC_INIT_STRING(unit->id) }, 3) < 0)
                return -errno;

        return 0;
@ -3150,6 +3150,7 @@ static int exec_child(
                                   "EXECUTABLE=%s", command->path,
                                   LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
                                   LOG_UNIT_ID(unit),
+                                   LOG_UNIT_INVOCATION_ID(unit),
                                   NULL);
                        log_close();
                }
@ -3223,6 +3224,7 @@ int exec_spawn(Unit *unit,
                   LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
                   "EXECUTABLE=%s", command->path,
                   LOG_UNIT_ID(unit),
+                   LOG_UNIT_INVOCATION_ID(unit),
                   NULL);
        pid = fork();
        if (pid < 0)
@ -3254,6 +3256,7 @@ int exec_spawn(Unit *unit,
                                log_struct_errno(LOG_ERR, r,
                                                 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
                                                 LOG_UNIT_ID(unit),
+                                                 LOG_UNIT_INVOCATION_ID(unit),
                                                 LOG_UNIT_MESSAGE(unit, "%s: %m",
                                                                  error_message),
                                                 "EXECUTABLE=%s", command->path,
@ -3262,6 +3265,7 @@ int exec_spawn(Unit *unit,
                                log_struct_errno(LOG_INFO, r,
                                                 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
                                                 LOG_UNIT_ID(unit),
+                                                 LOG_UNIT_INVOCATION_ID(unit),
                                                 LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
                                                                  command->path),
                                                 "EXECUTABLE=%s", command->path,
@ -3270,6 +3274,7 @@ int exec_spawn(Unit *unit,
                                log_struct_errno(LOG_ERR, r,
                                                 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
                                                 LOG_UNIT_ID(unit),
+                                                 LOG_UNIT_INVOCATION_ID(unit),
                                                 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
                                                                  exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
                                                                  command->path),
--- a/src/core/ip-address-access.c
+++ b/src/core/ip-address-access.c
@ -0,0 +1,217 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "alloc-util.h"
+#include "bpf-firewall.h"
+#include "extract-word.h"
+#include "hostname-util.h"
+#include "ip-address-access.h"
+#include "parse-util.h"
+#include "string-util.h"
+
+int config_parse_ip_address_access(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        IPAddressAccessItem **list = data;
+        const char *p;
+        int r;
+
+        assert(list);
+
+        if (isempty(rvalue)) {
+                *list = ip_address_access_free_all(*list);
+                return 0;
+        }
+
+        p = rvalue;
+
+        for (;;) {
+                _cleanup_free_ IPAddressAccessItem *a = NULL;
+                _cleanup_free_ char *word = NULL;
+
+                r = extract_first_word(&p, &word, NULL, 0);
+                if (r == 0)
+                        break;
+                if (r == -ENOMEM)
+                        return log_oom();
+                if (r < 0) {
+                        log_syntax(unit, LOG_WARNING, filename, line, r, "Invalid syntax, ignoring: %s", rvalue);
+                        break;
+                }
+
+                a = new0(IPAddressAccessItem, 1);
+                if (!a)
+                        return log_oom();
+
+                if (streq(word, "any")) {
+                        /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+                        a->family = AF_INET;
+                        LIST_APPEND(items, *list, a);
+
+                        a = new0(IPAddressAccessItem, 1);
+                        if (!a)
+                                return log_oom();
+
+                        a->family = AF_INET6;
+
+                } else if (is_localhost(word)) {
+                        /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+                        a->family = AF_INET;
+                        a->address.in.s_addr = htobe32(0x7f000000);
+                        a->prefixlen = 8;
+                        LIST_APPEND(items, *list, a);
+
+                        a = new0(IPAddressAccessItem, 1);
+                        if (!a)
+                                return log_oom();
+
+                        a->family = AF_INET6;
+                        a->address.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+                        a->prefixlen = 128;
+
+                } else if (streq(word, "link-local")) {
+
+                        /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+                        a->family = AF_INET;
+                        a->address.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+                        a->prefixlen = 16;
+                        LIST_APPEND(items, *list, a);
+
+                        a = new0(IPAddressAccessItem, 1);
+                        if (!a)
+                                return log_oom();
+
+                        a->family = AF_INET6;
+                        a->address.in6 = (struct in6_addr) {
+                                .__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
+                        };
+                        a->prefixlen = 64;
+
+                } else if (streq(word, "multicast")) {
+
+                        /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+                        a->family = AF_INET;
+                        a->address.in.s_addr = htobe32((UINT32_C(224) << 24));
+                        a->prefixlen = 4;
+                        LIST_APPEND(items, *list, a);
+
+                        a = new0(IPAddressAccessItem, 1);
+                        if (!a)
+                                return log_oom();
+
+                        a->family = AF_INET6;
+                        a->address.in6 = (struct in6_addr) {
+                                .__in6_u.__u6_addr32[0] = htobe32(0xff000000)
+                        };
+                        a->prefixlen = 8;
+
+                } else {
+                        r = in_addr_prefix_from_string_auto(word, &a->family, &a->address, &a->prefixlen);
+                        if (r < 0) {
+                                log_syntax(unit, LOG_WARNING, filename, line, r, "Address prefix is invalid, ignoring assignment: %s", word);
+                                return 0;
+                        }
+                }
+
+                LIST_APPEND(items, *list, a);
+                a = NULL;
+        }
+
+        *list = ip_address_access_reduce(*list);
+
+        if (*list) {
+                r = bpf_firewall_supported();
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        log_warning("File %s:%u configures an IP firewall (%s=%s), but the local system does not support BPF/cgroup based firewalling.\n"
+                                    "Proceeding WITHOUT firewalling in effect!", filename, line, lvalue, rvalue);
+        }
+
+        return 0;
+}
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first) {
+        IPAddressAccessItem *next, *p = first;
+
+        while (p) {
+                next = p->items_next;
+                free(p);
+
+                p = next;
+        }
+
+        return NULL;
+}
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first) {
+        IPAddressAccessItem *a, *b, *tmp;
+        int r;
+
+        /* Drops all entries from the list that are covered by another entry in full, thus removing all redundant
+         * entries. */
+
+        LIST_FOREACH_SAFE(items, a, tmp, first) {
+
+                /* Drop irrelevant bits */
+                (void) in_addr_mask(a->family, &a->address, a->prefixlen);
+
+                LIST_FOREACH(items, b, first) {
+
+                        if (a == b)
+                                continue;
+
+                        if (a->family != b->family)
+                                continue;
+
+                        if (b->prefixlen > a->prefixlen)
+                                continue;
+
+                        r = in_addr_prefix_covers(b->family,
+                                                  &b->address,
+                                                  b->prefixlen,
+                                                  &a->address);
+                        if (r <= 0)
+                                continue;
+
+                        /* b covers a fully, then let's drop a */
+
+                        LIST_REMOVE(items, first, a);
+                        free(a);
+                }
+        }
+
+        return first;
+}
--- a/src/core/ip-address-access.h
+++ b/src/core/ip-address-access.h
@ -0,0 +1,38 @@
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "in-addr-util.h"
+#include "list.h"
+
+typedef struct IPAddressAccessItem IPAddressAccessItem;
+
+struct IPAddressAccessItem {
+        int family;
+        unsigned char prefixlen;
+        union in_addr_union address;
+        LIST_FIELDS(IPAddressAccessItem, items);
+};
+
+int config_parse_ip_address_access(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+
+IPAddressAccessItem* ip_address_access_free_all(IPAddressAccessItem *first);
+
+IPAddressAccessItem* ip_address_access_reduce(IPAddressAccessItem *first);
--- a/src/core/job.c
+++ b/src/core/job.c
@ -806,21 +806,26 @@ static void job_log_status_message(Unit *u, JobType t, JobResult result) {
        default:
                log_struct(job_result_log_level[result],
                           LOG_MESSAGE("%s", buf),
-                           "RESULT=%s", job_result_to_string(result),
+                           "JOB_TYPE=%s", job_type_to_string(t),
+                           "JOB_RESULT=%s", job_result_to_string(result),
                           LOG_UNIT_ID(u),
+                           LOG_UNIT_INVOCATION_ID(u),
                           NULL);
                return;
        }

        log_struct(job_result_log_level[result],
                   LOG_MESSAGE("%s", buf),
-                   "RESULT=%s", job_result_to_string(result),
+                   "JOB_TYPE=%s", job_type_to_string(t),
+                   "JOB_RESULT=%s", job_result_to_string(result),
                   LOG_UNIT_ID(u),
+                   LOG_UNIT_INVOCATION_ID(u),
                   mid,
                   NULL);
 }

 static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
+        assert(u);

        /* No message if the job did not actually do anything due to failed condition. */
        if (t == JOB_START && result == JOB_DONE && !u->condition_result)
@ -903,7 +908,7 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
         * the unit itself. We don't treat JOB_CANCELED as failure in
         * this context. And JOB_FAILURE is already handled by the
         * unit itself. */
-        if (result == JOB_TIMEOUT || result == JOB_DEPENDENCY) {
+        if (IN_SET(result, JOB_TIMEOUT, JOB_DEPENDENCY)) {
                log_struct(LOG_NOTICE,
                           "JOB_TYPE=%s", job_type_to_string(t),
                           "JOB_RESULT=%s", job_result_to_string(result),
--- a/src/core/load-fragment-gperf.gperf.m4
+++ b/src/core/load-fragment-gperf.gperf.m4
@ -174,6 +174,9 @@ $1.BlockIOWriteBandwidth,        config_parse_blockio_bandwidth,     0,
 $1.TasksAccounting,              config_parse_bool,                  0,                             offsetof($1, cgroup_context.tasks_accounting)
 $1.TasksMax,                     config_parse_tasks_max,             0,                             offsetof($1, cgroup_context.tasks_max)
 $1.Delegate,                     config_parse_bool,                  0,                             offsetof($1, cgroup_context.delegate)
+$1.IPAccounting,                 config_parse_bool,                  0,                             offsetof($1, cgroup_context.ip_accounting)
+$1.IPAddressAllow,               config_parse_ip_address_access,     0,                             offsetof($1, cgroup_context.ip_address_allow)
+$1.IPAddressDeny,                config_parse_ip_address_access,     0,                             offsetof($1, cgroup_context.ip_address_deny)
 $1.NetClass,                     config_parse_warn_compat,           DISABLED_LEGACY,               0'
 )m4_dnl
 Unit.Description,                config_parse_unit_string_printf,    0,                             offsetof(Unit, description)
--- a/src/core/main.c
+++ b/src/core/main.c
@ -128,6 +128,7 @@ static Set* arg_syscall_archs = NULL;
 static FILE* arg_serialization = NULL;
 static bool arg_default_cpu_accounting = false;
 static bool arg_default_io_accounting = false;
+static bool arg_default_ip_accounting = false;
 static bool arg_default_blockio_accounting = false;
 static bool arg_default_memory_accounting = false;
 static bool arg_default_tasks_accounting = true;
@ -748,6 +749,7 @@ static int parse_config_file(void) {
                { "Manager", "DefaultLimitRTTIME",        config_parse_limit,            RLIMIT_RTTIME, arg_default_rlimit         },
                { "Manager", "DefaultCPUAccounting",      config_parse_bool,             0, &arg_default_cpu_accounting            },
                { "Manager", "DefaultIOAccounting",       config_parse_bool,             0, &arg_default_io_accounting             },
+                { "Manager", "DefaultIPAccounting",       config_parse_bool,             0, &arg_default_ip_accounting             },
                { "Manager", "DefaultBlockIOAccounting",  config_parse_bool,             0, &arg_default_blockio_accounting        },
                { "Manager", "DefaultMemoryAccounting",   config_parse_bool,             0, &arg_default_memory_accounting         },
                { "Manager", "DefaultTasksAccounting",    config_parse_bool,             0, &arg_default_tasks_accounting          },
@ -792,6 +794,7 @@ static void manager_set_defaults(Manager *m) {
        m->default_start_limit_burst = arg_default_start_limit_burst;
        m->default_cpu_accounting = arg_default_cpu_accounting;
        m->default_io_accounting = arg_default_io_accounting;
+        m->default_ip_accounting = arg_default_ip_accounting;
        m->default_blockio_accounting = arg_default_blockio_accounting;
        m->default_memory_accounting = arg_default_memory_accounting;
        m->default_tasks_accounting = arg_default_tasks_accounting;
@ -1202,6 +1205,26 @@ static int bump_rlimit_nofile(struct rlimit *saved_rlimit) {
        return 0;
 }

+static int bump_rlimit_memlock(struct rlimit *saved_rlimit) {
+        int r;
+
+        assert(saved_rlimit);
+        assert(getuid() == 0);
+
+        /* BPF_MAP_TYPE_LPM_TRIE bpf maps are charged against RLIMIT_MEMLOCK, even though we have CAP_IPC_LOCK which
+         * should normally disable such checks. We need them to implement IPAccessAllow= and IPAccessDeny=, hence let's
+         * bump the value high enough for the root user. */
+
+        if (getrlimit(RLIMIT_MEMLOCK, saved_rlimit) < 0)
+                return log_warning_errno(errno, "Reading RLIMIT_MEMLOCK failed, ignoring: %m");
+
+        r = setrlimit_closest(RLIMIT_MEMLOCK, &RLIMIT_MAKE_CONST(1024ULL*1024ULL*16ULL));
+        if (r < 0)
+                return log_warning_errno(r, "Setting RLIMIT_MEMLOCK failed, ignoring: %m");
+
+        return 0;
+}
+
 static void test_usr(void) {

        /* Check that /usr is not a separate fs */
@ -1385,7 +1408,7 @@ int main(int argc, char *argv[]) {
        bool queue_default_job = false;
        bool empty_etc = false;
        char *switch_root_dir = NULL, *switch_root_init = NULL;
-        struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0);
+        struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), saved_rlimit_memlock = RLIMIT_MAKE_CONST((rlim_t) -1);
        const char *error_message = NULL;

 #ifdef HAVE_SYSV_COMPAT
@ -1812,9 +1835,11 @@ int main(int argc, char *argv[]) {
                        if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0)
                                log_warning_errno(errno, "Failed to make us a subreaper: %m");

-                if (arg_system)
+                if (arg_system) {
                        /* Bump up RLIMIT_NOFILE for systemd itself */
                        (void) bump_rlimit_nofile(&saved_rlimit_nofile);
+                        (void) bump_rlimit_memlock(&saved_rlimit_memlock);
+                }
        }

        r = manager_new(arg_system ? UNIT_FILE_SYSTEM : UNIT_FILE_USER,
@ -2048,6 +2073,8 @@ finish:
                 * its child processes */
                if (saved_rlimit_nofile.rlim_cur > 0)
                        (void) setrlimit(RLIMIT_NOFILE, &saved_rlimit_nofile);
+                if (saved_rlimit_memlock.rlim_cur != (rlim_t) -1)
+                        (void) setrlimit(RLIMIT_MEMLOCK, &saved_rlimit_memlock);

                if (switch_root_dir) {
                        /* Kill all remaining processes from the
--- a/src/core/manager.c
+++ b/src/core/manager.c
@ -616,6 +616,9 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
        m->default_timer_accuracy_usec = USEC_PER_MINUTE;
        m->default_tasks_accounting = true;
        m->default_tasks_max = UINT64_MAX;
+        m->default_timeout_start_usec = DEFAULT_TIMEOUT_USEC;
+        m->default_timeout_stop_usec = DEFAULT_TIMEOUT_USEC;
+        m->default_restart_usec = DEFAULT_RESTART_USEC;

 #ifdef ENABLE_EFI
        if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0)
@ -628,13 +631,13 @@ int manager_new(UnitFileScope scope, unsigned test_run_flags, Manager **_m) {
                m->unit_log_format_string = "UNIT=%s";

                m->invocation_log_field = "INVOCATION_ID=";
-                m->invocation_log_format_string = "INVOCATION_ID=" SD_ID128_FORMAT_STR;
+                m->invocation_log_format_string = "INVOCATION_ID=%s";
        } else {
                m->unit_log_field = "USER_UNIT=";
                m->unit_log_format_string = "USER_UNIT=%s";

                m->invocation_log_field = "USER_INVOCATION_ID=";
-                m->invocation_log_format_string = "USER_INVOCATION_ID=" SD_ID128_FORMAT_STR;
+                m->invocation_log_format_string = "USER_INVOCATION_ID=%s";
        }

        m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1;
--- a/src/core/manager.h
+++ b/src/core/manager.h
@ -29,6 +29,7 @@
 #include "cgroup-util.h"
 #include "fdset.h"
 #include "hashmap.h"
+#include "ip-address-access.h"
 #include "list.h"
 #include "ratelimit.h"

@ -271,6 +272,7 @@ struct Manager {
        bool default_io_accounting;
        bool default_blockio_accounting;
        bool default_tasks_accounting;
+        bool default_ip_accounting;

        uint64_t default_tasks_max;
        usec_t default_timer_accuracy_usec;
--- a/src/core/meson.build
+++ b/src/core/meson.build
@ -1,114 +1,118 @@
 libcore_la_sources = '''
-        unit.c
-        unit.h
-        unit-printf.c
-        unit-printf.h
-        job.c
-        job.h
-        manager.c
-        manager.h
-        transaction.c
-        transaction.h
-        load-fragment.c
-        load-fragment.h
-        service.c
-        service.h
-        socket.c
-        socket.h
-        target.c
-        target.h
-        device.c
-        device.h
-        mount.c
-        mount.h
+        audit-fd.c
+        audit-fd.h
        automount.c
        automount.h
-        swap.c
-        swap.h
-        timer.c
-        timer.h
-        path.c
-        path.h
-        slice.c
-        slice.h
-        scope.c
-        scope.h
-        load-dropin.c
-        load-dropin.h
-        execute.c
-        execute.h
-        dynamic-user.c
-        dynamic-user.h
-        kill.c
-        kill.h
-        dbus.c
-        dbus.h
-        dbus-manager.c
-        dbus-manager.h
-        dbus-unit.c
-        dbus-unit.h
-        dbus-job.c
-        dbus-job.h
-        dbus-service.c
-        dbus-service.h
-        dbus-socket.c
-        dbus-socket.h
-        dbus-target.c
-        dbus-target.h
-        dbus-device.c
-        dbus-device.h
-        dbus-mount.c
-        dbus-mount.h
-        dbus-automount.c
-        dbus-automount.h
-        dbus-swap.c
-        dbus-swap.h
-        dbus-timer.c
-        dbus-timer.h
-        dbus-path.c
-        dbus-path.h
-        dbus-slice.c
-        dbus-slice.h
-        dbus-scope.c
-        dbus-scope.h
-        dbus-execute.c
-        dbus-execute.h
-        dbus-kill.c
-        dbus-kill.h
-        dbus-cgroup.c
-        dbus-cgroup.h
+        bpf-firewall.c
+        bpf-firewall.h
        cgroup.c
        cgroup.h
+        dbus-automount.c
+        dbus-automount.h
+        dbus-cgroup.c
+        dbus-cgroup.h
+        dbus-device.c
+        dbus-device.h
+        dbus-execute.c
+        dbus-execute.h
+        dbus-job.c
+        dbus-job.h
+        dbus-kill.c
+        dbus-kill.h
+        dbus-manager.c
+        dbus-manager.h
+        dbus-mount.c
+        dbus-mount.h
+        dbus-path.c
+        dbus-path.h
+        dbus-scope.c
+        dbus-scope.h
+        dbus-service.c
+        dbus-service.h
+        dbus-slice.c
+        dbus-slice.h
+        dbus-socket.c
+        dbus-socket.h
+        dbus-swap.c
+        dbus-swap.h
+        dbus-target.c
+        dbus-target.h
+        dbus-timer.c
+        dbus-timer.h
+        dbus-unit.c
+        dbus-unit.h
+        dbus.c
+        dbus.h
+        device.c
+        device.h
+        dynamic-user.c
+        dynamic-user.h
+        emergency-action.c
+        emergency-action.h
+        execute.c
+        execute.h
+        hostname-setup.c
+        hostname-setup.h
+        ima-setup.c
+        ima-setup.h
+        ip-address-access.c
+        ip-address-access.h
+        job.c
+        job.h
+        kill.c
+        kill.h
+        killall.c
+        killall.h
+        kmod-setup.c
+        kmod-setup.h
+        load-dropin.c
+        load-dropin.h
+        load-fragment.c
+        load-fragment.h
+        locale-setup.c
+        locale-setup.h
+        loopback-setup.c
+        loopback-setup.h
+        machine-id-setup.c
+        machine-id-setup.h
+        manager.c
+        manager.h
+        mount-setup.c
+        mount-setup.h
+        mount.c
+        mount.h
+        namespace.c
+        namespace.h
+        path.c
+        path.h
+        scope.c
+        scope.h
        selinux-access.c
        selinux-access.h
        selinux-setup.c
        selinux-setup.h
-        smack-setup.c
-        smack-setup.h
-        ima-setup.c
-        ima-setup.h
-        locale-setup.h
-        locale-setup.c
-        hostname-setup.c
-        hostname-setup.h
-        machine-id-setup.c
-        machine-id-setup.h
-        mount-setup.c
-        mount-setup.h
-        kmod-setup.c
-        kmod-setup.h
-        loopback-setup.h
-        loopback-setup.c
-        namespace.c
-        namespace.h
-        killall.h
-        killall.c
-        audit-fd.c
-        audit-fd.h
+        service.c
+        service.h
        show-status.c
        show-status.h
-        emergency-action.c
-        emergency-action.h
+        slice.c
+        slice.h
+        smack-setup.c
+        smack-setup.h
+        socket.c
+        socket.h
+        swap.c
+        swap.h
+        target.c
+        target.h
+        timer.c
+        timer.h
+        transaction.c
+        transaction.h
+        unit-printf.c
+        unit-printf.h
+        unit.c
+        unit.h
 '''.split()

 load_fragment_gperf_gperf = custom_target(
--- a/src/core/mount.c
+++ b/src/core/mount.c
@ -736,6 +736,7 @@ static void mount_dump(Unit *u, FILE *f, const char *prefix) {

        exec_context_dump(&m->exec_context, f, prefix);
        kill_context_dump(&m->kill_context, f, prefix);
+        cgroup_context_dump(&m->cgroup_context, f, prefix);
 }

 static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
@ -753,9 +754,10 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) {
        assert(_pid);

        (void) unit_realize_cgroup(UNIT(m));
-        if (m->reset_cpu_usage) {
-                (void) unit_reset_cpu_usage(UNIT(m));
-                m->reset_cpu_usage = false;
+        if (m->reset_accounting) {
+                (void) unit_reset_cpu_accounting(UNIT(m));
+                (void) unit_reset_ip_accounting(UNIT(m));
+                m->reset_accounting = false;
        }

        r = unit_setup_exec_runtime(UNIT(m));
@ -1043,7 +1045,7 @@ static int mount_start(Unit *u) {

        m->result = MOUNT_SUCCESS;
        m->reload_result = MOUNT_SUCCESS;
-        m->reset_cpu_usage = true;
+        m->reset_accounting = true;

        mount_enter_mounting(m);
        return 1;
--- a/src/core/mount.h
+++ b/src/core/mount.h
@ -67,7 +67,7 @@ struct Mount {
        bool just_mounted:1;
        bool just_changed:1;

-        bool reset_cpu_usage:1;
+        bool reset_accounting:1;

        bool sloppy_options;

--- a/src/core/scope.c
+++ b/src/core/scope.c
@ -333,7 +333,8 @@ static int scope_start(Unit *u) {
                return r;

        (void) unit_realize_cgroup(u);
-        (void) unit_reset_cpu_usage(u);
+        (void) unit_reset_cpu_accounting(u);
+        (void) unit_reset_ip_accounting(u);

        r = unit_attach_pids_to_cgroup(u);
        if (r < 0) {
--- a/src/core/service.c
+++ b/src/core/service.c
@ -803,6 +803,8 @@ static void service_dump(Unit *u, FILE *f, const char *prefix) {
                        "%sFile Descriptor Store Current: %u\n",
                        prefix, s->n_fd_store_max,
                        prefix, s->n_fd_store);
+
+        cgroup_context_dump(&s->cgroup_context, f, prefix);
 }

 static int service_load_pid_file(Service *s, bool may_warn) {
@ -1242,9 +1244,10 @@ static int service_spawn(
        }

        (void) unit_realize_cgroup(UNIT(s));
-        if (s->reset_cpu_usage) {
-                (void) unit_reset_cpu_usage(UNIT(s));
-                s->reset_cpu_usage = false;
+        if (s->reset_accounting) {
+                (void) unit_reset_cpu_accounting(UNIT(s));
+                (void) unit_reset_ip_accounting(UNIT(s));
+                s->reset_accounting = false;
        }

        r = unit_setup_exec_runtime(UNIT(s));
@ -1953,6 +1956,7 @@ static void service_enter_restart(Service *s) {
        log_struct(LOG_INFO,
                   "MESSAGE_ID=" SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR,
                   LOG_UNIT_ID(UNIT(s)),
+                   LOG_UNIT_INVOCATION_ID(UNIT(s)),
                   LOG_UNIT_MESSAGE(UNIT(s), "Scheduled restart job, restart counter is at %u.", s->n_restarts),
                   "N_RESTARTS=%u", s->n_restarts,
                   NULL);
@ -2136,7 +2140,7 @@ static int service_start(Unit *u) {
        s->main_pid_known = false;
        s->main_pid_alien = false;
        s->forbid_restart = false;
-        s->reset_cpu_usage = true;
+        s->reset_accounting = true;

        s->status_text = mfree(s->status_text);
        s->status_errno = 0;
@ -2948,6 +2952,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                           "EXIT_CODE=%s", sigchld_code_to_string(code),
                           "EXIT_STATUS=%i", status,
                           LOG_UNIT_ID(u),
+                           LOG_UNIT_INVOCATION_ID(u),
                           NULL);

                if (s->result == SERVICE_SUCCESS)
--- a/src/core/service.h
+++ b/src/core/service.h
@ -165,7 +165,7 @@ struct Service {
        bool forbid_restart:1;
        bool start_timeout_defined:1;

-        bool reset_cpu_usage:1;
+        bool reset_accounting:1;

        char *bus_name;
        char *bus_name_owner; /* unique name of the current owner */
--- a/src/core/show-status.c
+++ b/src/core/show-status.c
@ -93,21 +93,21 @@ int status_vprintf(const char *status, bool ellipse, bool ephemeral, const char
        }

        if (prev_ephemeral)
-                IOVEC_SET_STRING(iovec[n++], "\r" ANSI_ERASE_TO_END_OF_LINE);
+                iovec[n++] = IOVEC_MAKE_STRING("\r" ANSI_ERASE_TO_END_OF_LINE);
        prev_ephemeral = ephemeral;

        if (status) {
                if (!isempty(status)) {
-                        IOVEC_SET_STRING(iovec[n++], "[");
-                        IOVEC_SET_STRING(iovec[n++], status);
-                        IOVEC_SET_STRING(iovec[n++], "] ");
+                        iovec[n++] = IOVEC_MAKE_STRING("[");
+                        iovec[n++] = IOVEC_MAKE_STRING(status);
+                        iovec[n++] = IOVEC_MAKE_STRING("] ");
                } else
-                        IOVEC_SET_STRING(iovec[n++], status_indent);
+                        iovec[n++] = IOVEC_MAKE_STRING(status_indent);
        }

-        IOVEC_SET_STRING(iovec[n++], s);
+        iovec[n++] = IOVEC_MAKE_STRING(s);
        if (!ephemeral)
-                IOVEC_SET_STRING(iovec[n++], "\n");
+                iovec[n++] = IOVEC_MAKE_STRING("\n");

        if (writev(fd, iovec, n) < 0)
                return -errno;
--- a/src/core/slice.c
+++ b/src/core/slice.c
@ -222,7 +222,8 @@ static int slice_start(Unit *u) {
                return r;

        (void) unit_realize_cgroup(u);
-        (void) unit_reset_cpu_usage(u);
+        (void) unit_reset_cpu_accounting(u);
+        (void) unit_reset_ip_accounting(u);

        slice_set_state(t, SLICE_ACTIVE);
        return 1;
--- a/src/core/socket.c
+++ b/src/core/socket.c
@ -29,6 +29,7 @@
 #include <linux/sctp.h>

 #include "alloc-util.h"
+#include "bpf-firewall.h"
 #include "bus-error.h"
 #include "bus-util.h"
 #include "copy.h"
@ -37,6 +38,7 @@
 #include "exit-status.h"
 #include "fd-util.h"
 #include "format-util.h"
+#include "in-addr-util.h"
 #include "io-util.h"
 #include "label.h"
 #include "log.h"
@ -56,7 +58,6 @@
 #include "unit-name.h"
 #include "unit.h"
 #include "user-util.h"
-#include "in-addr-util.h"

 struct SocketPeer {
        unsigned n_ref;
@ -852,6 +853,8 @@ static void socket_dump(Unit *u, FILE *f, const char *prefix) {

                exec_command_dump_list(s->exec_command[c], f, prefix2);
        }
+
+        cgroup_context_dump(&s->cgroup_context, f, prefix);
 }

 static int instance_from_socket(int fd, unsigned nr, char **instance) {
@ -1435,6 +1438,102 @@ no_label:
        return 0;
 }

+static int socket_address_listen_do(
+                Socket *s,
+                const SocketAddress *address,
+                const char *label) {
+
+        assert(s);
+        assert(address);
+
+        return socket_address_listen(
+                        address,
+                        SOCK_CLOEXEC|SOCK_NONBLOCK,
+                        s->backlog,
+                        s->bind_ipv6_only,
+                        s->bind_to_device,
+                        s->reuse_port,
+                        s->free_bind,
+                        s->transparent,
+                        s->directory_mode,
+                        s->socket_mode,
+                        label);
+}
+
+static int socket_address_listen_in_cgroup(
+                Socket *s,
+                const SocketAddress *address,
+                const char *label) {
+
+        _cleanup_close_pair_ int pair[2] = { -1, -1 };
+        int fd, r;
+        pid_t pid;
+
+        assert(s);
+        assert(address);
+
+        /* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
+         * in which the socket is actually created. This way we ensure the socket is actually properly attached to the
+         * unit's cgroup for the purpose of BPF filtering and such. */
+
+        if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
+                goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r == 0) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
+                goto shortcut;
+
+        if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+                return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+        r = unit_fork_helper_process(UNIT(s), &pid);
+        if (r < 0)
+                return log_unit_error_errno(UNIT(s), r, "Failed to fork off listener stub process: %m");
+        if (r == 0) {
+                /* Child */
+
+                pair[0] = safe_close(pair[0]);
+
+                fd = socket_address_listen_do(s, address, label);
+                if (fd < 0) {
+                        log_unit_error_errno(UNIT(s), fd, "Failed to create listening socket: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = send_one_fd(pair[1], fd, 0);
+                if (r < 0) {
+                        log_unit_error_errno(UNIT(s), r, "Failed to send listening socket to parent: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        pair[1] = safe_close(pair[1]);
+        fd = receive_one_fd(pair[0], 0);
+
+        /* We synchronously wait for the helper, as it shouldn't be slow */
+        r = wait_for_terminate_and_warn("listen-cgroup-helper", pid, false);
+        if (r < 0) {
+                safe_close(fd);
+                return r;
+        }
+
+        if (fd < 0)
+                return log_unit_error_errno(UNIT(s), fd, "Failed to receive listening socket: %m");
+
+        return fd;
+
+shortcut:
+        fd = socket_address_listen_do(s, address, label);
+        if (fd < 0)
+                return log_error_errno(fd, "Failed to create listening socket: %m");
+
+        return fd;
+}
+
 static int socket_open_fds(Socket *s) {
        _cleanup_(mac_selinux_freep) char *label = NULL;
        bool know_label = false;
@ -1478,18 +1577,7 @@ static int socket_open_fds(Socket *s) {
                                break;
                        }

-                        r = socket_address_listen(
-                                        &p->address,
-                                        SOCK_CLOEXEC|SOCK_NONBLOCK,
-                                        s->backlog,
-                                        s->bind_ipv6_only,
-                                        s->bind_to_device,
-                                        s->reuse_port,
-                                        s->free_bind,
-                                        s->transparent,
-                                        s->directory_mode,
-                                        s->socket_mode,
-                                        label);
+                        r = socket_address_listen_in_cgroup(s, &p->address, label);
                        if (r < 0)
                                goto rollback;

@ -1773,9 +1861,10 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) {
        assert(_pid);

        (void) unit_realize_cgroup(UNIT(s));
-        if (s->reset_cpu_usage) {
-                (void) unit_reset_cpu_usage(UNIT(s));
-                s->reset_cpu_usage = false;
+        if (s->reset_accounting) {
+                (void) unit_reset_cpu_accounting(UNIT(s));
+                (void) unit_reset_ip_accounting(UNIT(s));
+                s->reset_accounting = false;
        }

        r = unit_setup_exec_runtime(UNIT(s));
@ -1826,27 +1915,23 @@ static int socket_chown(Socket *s, pid_t *_pid) {
        /* We have to resolve the user names out-of-process, hence
         * let's fork here. It's messy, but well, what can we do? */

-        pid = fork();
-        if (pid < 0)
-                return -errno;
-
-        if (pid == 0) {
-                SocketPort *p;
+        r = unit_fork_helper_process(UNIT(s), &pid);
+        if (r < 0)
+                return r;
+        if (r == 0) {
                uid_t uid = UID_INVALID;
                gid_t gid = GID_INVALID;
-                int ret;
+                SocketPort *p;

-                (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
-                (void) ignore_signals(SIGPIPE, -1);
-                log_forget_fds();
+                /* Child */

                if (!isempty(s->user)) {
                        const char *user = s->user;

                        r = get_user_creds(&user, &uid, &gid, NULL, NULL);
                        if (r < 0) {
-                                ret = EXIT_USER;
-                                goto fail_child;
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve user %s: %m", user);
+                                _exit(EXIT_USER);
                        }
                }

@ -1855,8 +1940,8 @@ static int socket_chown(Socket *s, pid_t *_pid) {

                        r = get_group_creds(&group, &gid);
                        if (r < 0) {
-                                ret = EXIT_GROUP;
-                                goto fail_child;
+                                log_unit_error_errno(UNIT(s), r, "Failed to resolve group %s: %m", group);
+                                _exit(EXIT_GROUP);
                        }
                }

@ -1872,19 +1957,12 @@ static int socket_chown(Socket *s, pid_t *_pid) {
                                continue;

                        if (chown(path, uid, gid) < 0) {
-                                r = -errno;
-                                ret = EXIT_CHOWN;
-                                goto fail_child;
+                                log_unit_error_errno(UNIT(s), errno, "Failed to chown(): %m");
+                                _exit(EXIT_CHOWN);
                        }
                }

-                _exit(0);
-
-        fail_child:
-                log_open();
-                log_error_errno(r, "Failed to chown socket at step %s: %m", exit_status_to_string(ret, EXIT_STATUS_SYSTEMD));
-
-                _exit(ret);
+                _exit(EXIT_SUCCESS);
        }

        r = unit_watch_pid(UNIT(s), pid);
@ -2371,7 +2449,7 @@ static int socket_start(Unit *u) {
                return r;

        s->result = SOCKET_SUCCESS;
-        s->reset_cpu_usage = true;
+        s->reset_accounting = true;

        socket_enter_start_pre(s);
        return 1;
@ -2696,6 +2774,97 @@ _pure_ static bool socket_check_gc(Unit *u) {
        return s->n_connections > 0;
 }

+static int socket_accept_do(Socket *s, int fd) {
+        int cfd;
+
+        assert(s);
+        assert(fd >= 0);
+
+        for (;;) {
+                cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
+                if (cfd < 0) {
+                        if (errno == EINTR)
+                                continue;
+
+                        return -errno;
+                }
+
+                break;
+        }
+
+        return cfd;
+}
+
+static int socket_accept_in_cgroup(Socket *s, SocketPort *p, int fd) {
+        _cleanup_close_pair_ int pair[2] = { -1, -1 };
+        int cfd, r;
+        pid_t pid;
+
+        assert(s);
+        assert(p);
+        assert(fd >= 0);
+
+        /* Similar to socket_address_listen_in_cgroup(), but for accept() rathern than socket(): make sure that any
+         * connection socket is also properly associated with the cgroup. */
+
+        if (!IN_SET(p->address.sockaddr.sa.sa_family, AF_INET, AF_INET6))
+                goto shortcut;
+
+        r = bpf_firewall_supported();
+        if (r < 0)
+                return r;
+        if (r == 0)
+                goto shortcut;
+
+        if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
+                return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
+
+        r = unit_fork_helper_process(UNIT(s), &pid);
+        if (r < 0)
+                return log_unit_error_errno(UNIT(s), r, "Failed to fork off accept stub process: %m");
+        if (r == 0) {
+                /* Child */
+
+                pair[0] = safe_close(pair[0]);
+
+                cfd = socket_accept_do(s, fd);
+                if (cfd < 0) {
+                        log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                r = send_one_fd(pair[1], cfd, 0);
+                if (r < 0) {
+                        log_unit_error_errno(UNIT(s), r, "Failed to send connection socket to parent: %m");
+                        _exit(EXIT_FAILURE);
+                }
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        pair[1] = safe_close(pair[1]);
+        cfd = receive_one_fd(pair[0], 0);
+
+        /* We synchronously wait for the helper, as it shouldn't be slow */
+        r = wait_for_terminate_and_warn("accept-cgroup-helper", pid, false);
+        if (r < 0) {
+                safe_close(cfd);
+                return r;
+        }
+
+        if (cfd < 0)
+                return log_unit_error_errno(UNIT(s), cfd, "Failed to receive connection socket: %m");
+
+        return cfd;
+
+shortcut:
+        cfd = socket_accept_do(s, fd);
+        if (cfd < 0)
+                return log_unit_error_errno(UNIT(s), cfd, "Failed to accept connection socket: %m");
+
+        return cfd;
+}
+
 static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
        SocketPort *p = userdata;
        int cfd = -1;
@ -2721,20 +2890,9 @@ static int socket_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
            p->type == SOCKET_SOCKET &&
            socket_address_can_accept(&p->address)) {

-                for (;;) {
-
-                        cfd = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
-                        if (cfd < 0) {
-
-                                if (errno == EINTR)
-                                        continue;
-
-                                log_unit_error_errno(UNIT(p->socket), errno, "Failed to accept socket: %m");
-                                goto fail;
-                        }
-
-                        break;
-                }
+                cfd = socket_accept_in_cgroup(p->socket, p, fd);
+                if (cfd < 0)
+                        goto fail;

                socket_apply_socket_options(p->socket, cfd);
        }
--- a/src/core/socket.h
+++ b/src/core/socket.h
@ -161,7 +161,7 @@ struct Socket {

        char *user, *group;

-        bool reset_cpu_usage:1;
+        bool reset_accounting:1;

        char *fdname;

--- a/src/core/swap.c
+++ b/src/core/swap.c
@ -602,6 +602,7 @@ static void swap_dump(Unit *u, FILE *f, const char *prefix) {

        exec_context_dump(&s->exec_context, f, prefix);
        kill_context_dump(&s->kill_context, f, prefix);
+        cgroup_context_dump(&s->cgroup_context, f, prefix);
 }

 static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
@ -619,9 +620,10 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) {
        assert(_pid);

        (void) unit_realize_cgroup(UNIT(s));
-        if (s->reset_cpu_usage) {
-                (void) unit_reset_cpu_usage(UNIT(s));
-                s->reset_cpu_usage = false;
+        if (s->reset_accounting) {
+                (void) unit_reset_cpu_accounting(UNIT(s));
+                (void) unit_reset_ip_accounting(UNIT(s));
+                s->reset_accounting = false;
        }

        r = unit_setup_exec_runtime(UNIT(s));
@ -860,7 +862,7 @@ static int swap_start(Unit *u) {
                return r;

        s->result = SWAP_SUCCESS;
-        s->reset_cpu_usage = true;
+        s->reset_accounting = true;

        swap_enter_activating(s);
        return 1;
--- a/src/core/swap.h
+++ b/src/core/swap.h
@ -70,7 +70,7 @@ struct Swap {
        bool is_active:1;
        bool just_activated:1;

-        bool reset_cpu_usage:1;
+        bool reset_accounting:1;

        SwapResult result;

--- a/src/core/system.conf
+++ b/src/core/system.conf
@ -40,6 +40,7 @@
 #DefaultEnvironment=
 #DefaultCPUAccounting=no
 #DefaultIOAccounting=no
+#DefaultIPAccounting=no
 #DefaultBlockIOAccounting=no
 #DefaultMemoryAccounting=no
 #DefaultTasksAccounting=yes
@ -60,3 +61,5 @@
 #DefaultLimitNICE=
 #DefaultLimitRTPRIO=
 #DefaultLimitRTTIME=
+#IPAddressAllow=
+#IPAddressDeny=
--- a/src/core/unit.c
+++ b/src/core/unit.c
@ -35,9 +35,11 @@
 #include "dropin.h"
 #include "escape.h"
 #include "execute.h"
+#include "fd-util.h"
 #include "fileio-label.h"
 #include "format-util.h"
 #include "id128-util.h"
+#include "io-util.h"
 #include "load-dropin.h"
 #include "load-fragment.h"
 #include "log.h"
@ -103,6 +105,13 @@ Unit *unit_new(Manager *m, size_t size) {
        u->ref_gid = GID_INVALID;
        u->cpu_usage_last = NSEC_INFINITY;

+        u->ip_accounting_ingress_map_fd = -1;
+        u->ip_accounting_egress_map_fd = -1;
+        u->ipv4_allow_map_fd = -1;
+        u->ipv6_allow_map_fd = -1;
+        u->ipv4_deny_map_fd = -1;
+        u->ipv6_deny_map_fd = -1;
+
        RATELIMIT_INIT(u->start_limit, m->default_start_limit_interval, m->default_start_limit_burst);
        RATELIMIT_INIT(u->auto_stop_ratelimit, 10 * USEC_PER_SEC, 16);

@ -153,9 +162,11 @@ static void unit_init(Unit *u) {

                cc->cpu_accounting = u->manager->default_cpu_accounting;
                cc->io_accounting = u->manager->default_io_accounting;
+                cc->ip_accounting = u->manager->default_ip_accounting;
                cc->blockio_accounting = u->manager->default_blockio_accounting;
                cc->memory_accounting = u->manager->default_memory_accounting;
                cc->tasks_accounting = u->manager->default_tasks_accounting;
+                cc->ip_accounting = u->manager->default_ip_accounting;

                if (u->type != UNIT_SLICE)
                        cc->tasks_max = u->manager->default_tasks_max;
@ -610,6 +621,17 @@ void unit_free(Unit *u) {
        while (u->refs)
                unit_ref_unset(u->refs);

+        safe_close(u->ip_accounting_ingress_map_fd);
+        safe_close(u->ip_accounting_egress_map_fd);
+
+        safe_close(u->ipv4_allow_map_fd);
+        safe_close(u->ipv6_allow_map_fd);
+        safe_close(u->ipv4_deny_map_fd);
+        safe_close(u->ipv6_deny_map_fd);
+
+        bpf_program_unref(u->ip_bpf_ingress);
+        bpf_program_unref(u->ip_bpf_egress);
+
        free(u);
 }

@ -1523,6 +1545,7 @@ static void unit_status_log_starting_stopping_reloading(Unit *u, JobType t) {
        log_struct(LOG_INFO,
                   LOG_MESSAGE("%s", buf),
                   LOG_UNIT_ID(u),
+                   LOG_UNIT_INVOCATION_ID(u),
                   mid,
                   NULL);
 }
@ -1979,6 +2002,134 @@ void unit_trigger_notify(Unit *u) {
                        UNIT_VTABLE(other)->trigger_notify(other, u);
 }

+static int unit_log_resources(Unit *u) {
+
+        struct iovec iovec[1 + _CGROUP_IP_ACCOUNTING_METRIC_MAX + 4];
+        size_t n_message_parts = 0, n_iovec = 0;
+        char* message_parts[3 + 1], *t;
+        nsec_t nsec = NSEC_INFINITY;
+        CGroupIPAccountingMetric m;
+        size_t i;
+        int r;
+        const char* const ip_fields[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+                [CGROUP_IP_INGRESS_BYTES]   = "IP_METRIC_INGRESS_BYTES",
+                [CGROUP_IP_INGRESS_PACKETS] = "IP_METRIC_INGRESS_PACKETS",
+                [CGROUP_IP_EGRESS_BYTES]    = "IP_METRIC_EGRESS_BYTES",
+                [CGROUP_IP_EGRESS_PACKETS]  = "IP_METRIC_EGRESS_PACKETS",
+        };
+
+        assert(u);
+
+        /* Invoked whenever a unit enters failed or dead state. Logs information about consumed resources if resource
+         * accounting was enabled for a unit. It does this in two ways: a friendly human readable string with reduced
+         * information and the complete data in structured fields. */
+
+        (void) unit_get_cpu_usage(u, &nsec);
+        if (nsec != NSEC_INFINITY) {
+                char buf[FORMAT_TIMESPAN_MAX] = "";
+
+                /* Format the CPU time for inclusion in the structured log message */
+                if (asprintf(&t, "CPU_USAGE_NSEC=%" PRIu64, nsec) < 0) {
+                        r = log_oom();
+                        goto finish;
+                }
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+                /* Format the CPU time for inclusion in the human language message string */
+                format_timespan(buf, sizeof(buf), nsec / NSEC_PER_USEC, USEC_PER_MSEC);
+                t = strjoin(n_message_parts > 0 ? "consumed " : "Consumed ", buf, " CPU time");
+                if (!t) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                message_parts[n_message_parts++] = t;
+        }
+
+        for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+                char buf[FORMAT_BYTES_MAX] = "";
+                uint64_t value = UINT64_MAX;
+
+                assert(ip_fields[m]);
+
+                (void) unit_get_ip_accounting(u, m, &value);
+                if (value == UINT64_MAX)
+                        continue;
+
+                /* Format IP accounting data for inclusion in the structured log message */
+                if (asprintf(&t, "%s=%" PRIu64, ip_fields[m], value) < 0) {
+                        r = log_oom();
+                        goto finish;
+                }
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(t);
+
+                /* Format the IP accounting data for inclusion in the human language message string, but only for the
+                 * bytes counters (and not for the packets counters) */
+                if (m == CGROUP_IP_INGRESS_BYTES)
+                        t = strjoin(n_message_parts > 0 ? "received " : "Received ",
+                                    format_bytes(buf, sizeof(buf), value),
+                                    " IP traffic");
+                else if (m == CGROUP_IP_EGRESS_BYTES)
+                        t = strjoin(n_message_parts > 0 ? "sent " : "Sent ",
+                                    format_bytes(buf, sizeof(buf), value),
+                                    " IP traffic");
+                else
+                        continue;
+                if (!t) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                message_parts[n_message_parts++] = t;
+        }
+
+        /* Is there any accounting data available at all? */
+        if (n_iovec == 0) {
+                r = 0;
+                goto finish;
+        }
+
+        if (n_message_parts == 0)
+                t = strjoina("MESSAGE=", u->id, ": Completed");
+        else {
+                _cleanup_free_ char *joined;
+
+                message_parts[n_message_parts] = NULL;
+
+                joined = strv_join(message_parts, ", ");
+                if (!joined) {
+                        r = log_oom();
+                        goto finish;
+                }
+
+                t = strjoina("MESSAGE=", u->id, ": ", joined);
+        }
+
+        /* The following four fields we allocate on the stack or are static strings, we hence don't want to free them,
+         * and hence don't increase n_iovec for them */
+        iovec[n_iovec] = IOVEC_MAKE_STRING(t);
+        iovec[n_iovec + 1] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_UNIT_RESOURCES_STR);
+
+        t = strjoina(u->manager->unit_log_field, u->id);
+        iovec[n_iovec + 2] = IOVEC_MAKE_STRING(t);
+
+        t = strjoina(u->manager->invocation_log_field, u->invocation_id_string);
+        iovec[n_iovec + 3] = IOVEC_MAKE_STRING(t);
+
+        log_struct_iovec(LOG_INFO, iovec, n_iovec + 4);
+        r = 0;
+
+finish:
+        for (i = 0; i < n_message_parts; i++)
+                free(message_parts[i]);
+
+        for (i = 0; i < n_iovec; i++)
+                free(iovec[i].iov_base);
+
+        return r;
+
+}
+
 void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_success) {
        Manager *m;
        bool unexpected;
@ -2150,28 +2301,33 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
                        manager_send_unit_plymouth(m, u);

        } else {
+                /* We don't care about D-Bus going down here, since we'll get an asynchronous notification for it
+                 * anyway. */

-                /* We don't care about D-Bus here, since we'll get an
-                 * asynchronous notification for it anyway. */
+                if (UNIT_IS_INACTIVE_OR_FAILED(ns) &&
+                    !UNIT_IS_INACTIVE_OR_FAILED(os)
+                    && !MANAGER_IS_RELOADING(m)) {

-                if (u->type == UNIT_SERVICE &&
-                    UNIT_IS_INACTIVE_OR_FAILED(ns) &&
-                    !UNIT_IS_INACTIVE_OR_FAILED(os) &&
-                    !MANAGER_IS_RELOADING(m)) {
+                        /* This unit just stopped/failed. */
+                        if (u->type == UNIT_SERVICE) {

-                        /* Hmm, if there was no start record written
-                         * write it now, so that we always have a nice
-                         * pair */
-                        if (!u->in_audit) {
-                                manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);
+                                /* Hmm, if there was no start record written
+                                 * write it now, so that we always have a nice
+                                 * pair */
+                                if (!u->in_audit) {
+                                        manager_send_unit_audit(m, u, AUDIT_SERVICE_START, ns == UNIT_INACTIVE);

-                                if (ns == UNIT_INACTIVE)
-                                        manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
-                        } else
-                                /* Write audit record if we have just finished shutting down */
-                                manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);
+                                        if (ns == UNIT_INACTIVE)
+                                                manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, true);
+                                } else
+                                        /* Write audit record if we have just finished shutting down */
+                                        manager_send_unit_audit(m, u, AUDIT_SERVICE_STOP, ns == UNIT_INACTIVE);

-                        u->in_audit = false;
+                                u->in_audit = false;
+                        }
+
+                        /* Write a log message about consumed resources */
+                        unit_log_resources(u);
                }
        }

@ -2749,7 +2905,15 @@ static int unit_serialize_cgroup_mask(FILE *f, const char *key, CGroupMask mask)
        return r;
 }

+static const char *ip_accounting_metric_field[_CGROUP_IP_ACCOUNTING_METRIC_MAX] = {
+        [CGROUP_IP_INGRESS_BYTES] = "ip-accounting-ingress-bytes",
+        [CGROUP_IP_INGRESS_PACKETS] = "ip-accounting-ingress-packets",
+        [CGROUP_IP_EGRESS_BYTES] = "ip-accounting-egress-bytes",
+        [CGROUP_IP_EGRESS_PACKETS] = "ip-accounting-egress-packets",
+};
+
 int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
+        CGroupIPAccountingMetric m;
        int r;

        assert(u);
@ -2798,6 +2962,7 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {
        unit_serialize_item(u, f, "cgroup-realized", yes_no(u->cgroup_realized));
        (void) unit_serialize_cgroup_mask(f, "cgroup-realized-mask", u->cgroup_realized_mask);
        (void) unit_serialize_cgroup_mask(f, "cgroup-enabled-mask", u->cgroup_enabled_mask);
+        unit_serialize_item_format(u, f, "cgroup-bpf-realized", "%i", u->cgroup_bpf_state);

        if (uid_is_valid(u->ref_uid))
                unit_serialize_item_format(u, f, "ref-uid", UID_FMT, u->ref_uid);
@ -2809,6 +2974,14 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds, bool serialize_jobs) {

        bus_track_serialize(u->bus_track, f, "ref");

+        for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++) {
+                uint64_t v;
+
+                r = unit_get_ip_accounting(u, m, &v);
+                if (r >= 0)
+                        unit_serialize_item_format(u, f, ip_accounting_metric_field[m], "%" PRIu64, v);
+        }
+
        if (serialize_jobs) {
                if (u->job) {
                        fprintf(f, "job\n");
@ -2915,6 +3088,7 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {

        for (;;) {
                char line[LINE_MAX], *l, *v;
+                CGroupIPAccountingMetric m;
                size_t k;

                if (!fgets(line, sizeof(line), f)) {
@ -3069,6 +3243,20 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                                log_unit_debug(u, "Failed to parse cgroup-enabled-mask %s, ignoring.", v);
                        continue;

+                } else if (streq(l, "cgroup-bpf-realized")) {
+                        int i;
+
+                        r = safe_atoi(v, &i);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse cgroup BPF state %s, ignoring.", v);
+                        else
+                                u->cgroup_bpf_state =
+                                        i < 0 ? UNIT_CGROUP_BPF_INVALIDATED :
+                                        i > 0 ? UNIT_CGROUP_BPF_ON :
+                                        UNIT_CGROUP_BPF_OFF;
+
+                        continue;
+
                } else if (streq(l, "ref-uid")) {
                        uid_t uid;

@ -3111,6 +3299,21 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
                        continue;
                }

+                /* Check if this is an IP accounting metric serialization field */
+                for (m = 0; m < _CGROUP_IP_ACCOUNTING_METRIC_MAX; m++)
+                        if (streq(l, ip_accounting_metric_field[m]))
+                                break;
+                if (m < _CGROUP_IP_ACCOUNTING_METRIC_MAX) {
+                        uint64_t c;
+
+                        r = safe_atou64(v, &c);
+                        if (r < 0)
+                                log_unit_debug(u, "Failed to parse IP accounting value %s, ignoring.", v);
+                        else
+                                u->ip_accounting_extra[m] = c;
+                        continue;
+                }
+
                if (unit_can_serialize(u)) {
                        if (rt) {
                                r = exec_runtime_deserialize_item(u, rt, l, v, fds);
@ -3137,6 +3340,11 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) {
        if (!dual_timestamp_is_set(&u->state_change_timestamp))
                dual_timestamp_get(&u->state_change_timestamp);

+        /* Let's make sure that everything that is deserialized also gets any potential new cgroup settings applied
+         * after we are done. For that we invalidate anything already realized, so that we can realize it again. */
+        unit_invalidate_cgroup(u, _CGROUP_MASK_ALL);
+        unit_invalidate_cgroup_bpf(u);
+
        return 0;
 }

@ -4169,6 +4377,7 @@ void unit_warn_if_dir_nonempty(Unit *u, const char* where) {
        log_struct(LOG_NOTICE,
                   "MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
                   LOG_UNIT_ID(u),
+                   LOG_UNIT_INVOCATION_ID(u),
                   LOG_UNIT_MESSAGE(u, "Directory %s to mount over is not empty, mounting anyway.", where),
                   "WHERE=%s", where,
                   NULL);
@ -4191,6 +4400,7 @@ int unit_fail_if_symlink(Unit *u, const char* where) {
        log_struct(LOG_ERR,
                   "MESSAGE_ID=" SD_MESSAGE_OVERMOUNTING_STR,
                   LOG_UNIT_ID(u),
+                   LOG_UNIT_INVOCATION_ID(u),
                   LOG_UNIT_MESSAGE(u, "Mount on symlink %s not allowed.", where),
                   "WHERE=%s", where,
                   NULL);
@ -4436,3 +4646,43 @@ void unit_set_exec_params(Unit *u, ExecParameters *p) {
        p->cgroup_path = u->cgroup_path;
        SET_FLAG(p->flags, EXEC_CGROUP_DELEGATE, unit_cgroup_delegate(u));
 }
+
+int unit_fork_helper_process(Unit *u, pid_t *ret) {
+        pid_t pid;
+        int r;
+
+        assert(u);
+        assert(ret);
+
+        /* Forks off a helper process and makes sure it is a member of the unit's cgroup. Returns == 0 in the child,
+         * and > 0 in the parent. The pid parameter is always filled in with the child's PID. */
+
+        (void) unit_realize_cgroup(u);
+
+        pid = fork();
+        if (pid < 0)
+                return -errno;
+
+        if (pid == 0) {
+
+                (void) default_signals(SIGNALS_CRASH_HANDLER, SIGNALS_IGNORE, -1);
+                (void) ignore_signals(SIGPIPE, -1);
+
+                log_close();
+                log_open();
+
+                if (u->cgroup_path) {
+                        r = cg_attach_everywhere(u->manager->cgroup_supported, u->cgroup_path, 0, NULL, NULL);
+                        if (r < 0) {
+                                log_unit_error_errno(u, r, "Failed to join unit cgroup %s: %m", u->cgroup_path);
+                                _exit(EXIT_CGROUP);
+                        }
+                }
+
+                *ret = getpid_cached();
+                return 0;
+        }
+
+        *ret = pid;
+        return 1;
+}
--- a/src/core/unit.h
+++ b/src/core/unit.h
@ -28,11 +28,13 @@ typedef struct UnitVTable UnitVTable;
 typedef struct UnitRef UnitRef;
 typedef struct UnitStatusMessageFormats UnitStatusMessageFormats;

+#include "bpf-program.h"
 #include "condition.h"
 #include "emergency-action.h"
 #include "install.h"
 #include "list.h"
 #include "unit-name.h"
+#include "cgroup.h"

 typedef enum KillOperation {
        KILL_TERMINATE,
@ -70,6 +72,12 @@ struct UnitRef {
        LIST_FIELDS(UnitRef, refs);
 };

+typedef enum UnitCGroupBPFState {
+        UNIT_CGROUP_BPF_OFF = 0,
+        UNIT_CGROUP_BPF_ON = 1,
+        UNIT_CGROUP_BPF_INVALIDATED = -1,
+} UnitCGroupBPFState;
+
 struct Unit {
        Manager *manager;

@ -205,6 +213,20 @@ struct Unit {
        CGroupMask cgroup_members_mask;
        int cgroup_inotify_wd;

+        /* IP BPF Firewalling/accounting */
+        int ip_accounting_ingress_map_fd;
+        int ip_accounting_egress_map_fd;
+
+        int ipv4_allow_map_fd;
+        int ipv6_allow_map_fd;
+        int ipv4_deny_map_fd;
+        int ipv6_deny_map_fd;
+
+        BPFProgram *ip_bpf_ingress;
+        BPFProgram *ip_bpf_egress;
+
+        uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
+
        /* How to start OnFailure units */
        JobMode on_failure_job_mode;

@ -254,6 +276,8 @@ struct Unit {
        bool cgroup_members_mask_valid:1;
        bool cgroup_subtree_mask_valid:1;

+        UnitCGroupBPFState cgroup_bpf_state:2;
+
        bool start_limit_hit:1;

        /* Did we already invoke unit_coldplug() for this unit? */
@ -661,6 +685,8 @@ bool unit_shall_confirm_spawn(Unit *u);

 void unit_set_exec_params(Unit *s, ExecParameters *p);

+int unit_fork_helper_process(Unit *u, pid_t *ret);
+
 /* Macros which append UNIT= or USER_UNIT= to the message */

 #define log_unit_full(unit, level, error, ...)                          \
@ -684,3 +710,4 @@ void unit_set_exec_params(Unit *s, ExecParameters *p);

 #define LOG_UNIT_MESSAGE(unit, fmt, ...) "MESSAGE=%s: " fmt, (unit)->id, ##__VA_ARGS__
 #define LOG_UNIT_ID(unit) (unit)->manager->unit_log_format_string, (unit)->id
+#define LOG_UNIT_INVOCATION_ID(unit) (unit)->manager->invocation_log_format_string, (unit)->invocation_id_string
--- a/src/coredump/coredump.c
+++ b/src/coredump/coredump.c
@ -749,7 +749,7 @@ static int submit_coredump(
                const char *coredump_filename;

                coredump_filename = strjoina("COREDUMP_FILENAME=", filename);
-                IOVEC_SET_STRING(iovec[n_iovec++], coredump_filename);
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(coredump_filename);
        } else if (arg_storage == COREDUMP_STORAGE_EXTERNAL)
                log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
                         coredump_size, arg_external_size_max);
@ -804,10 +804,10 @@ log:
                return 0;
        }

-        IOVEC_SET_STRING(iovec[n_iovec++], core_message);
+        iovec[n_iovec++] = IOVEC_MAKE_STRING(core_message);

        if (truncated)
-                IOVEC_SET_STRING(iovec[n_iovec++], "COREDUMP_TRUNCATED=1");
+                iovec[n_iovec++] = IOVEC_MAKE_STRING("COREDUMP_TRUNCATED=1");

        /* Optionally store the entire coredump in the journal */
        if (arg_storage == COREDUMP_STORAGE_JOURNAL) {
@ -817,11 +817,9 @@ log:
                        /* Store the coredump itself in the journal */

                        r = allocate_journal_field(coredump_fd, (size_t) coredump_size, &coredump_data, &sz);
-                        if (r >= 0) {
-                                iovec[n_iovec].iov_base = coredump_data;
-                                iovec[n_iovec].iov_len = sz;
-                                n_iovec++;
-                        } else
+                        if (r >= 0)
+                                iovec[n_iovec++] = IOVEC_MAKE(coredump_data, sz);
+                        else
                                log_warning_errno(r, "Failed to attach the core to the journal entry: %m");
                } else
                        log_info("The core will not be stored: size %"PRIu64" is greater than %"PRIu64" (the configured maximum)",
@ -1070,7 +1068,7 @@ static char* set_iovec_field(struct iovec iovec[27], size_t *n_iovec, const char

        x = strappend(field, value);
        if (x)
-                IOVEC_SET_STRING(iovec[(*n_iovec)++], x);
+                iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(x);
        return x;
 }

@ -1162,7 +1160,7 @@ static int gather_pid_metadata(
        if (sd_pid_get_owner_uid(pid, &owner_uid) >= 0) {
                r = asprintf(&t, "COREDUMP_OWNER_UID=" UID_FMT, owner_uid);
                if (r > 0)
-                        IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
+                        iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);
        }

        if (sd_pid_get_slice(pid, &t) >= 0)
@ -1218,7 +1216,7 @@ static int gather_pid_metadata(

        t = strjoin("COREDUMP_TIMESTAMP=", context[CONTEXT_TIMESTAMP], "000000", NULL);
        if (t)
-                IOVEC_SET_STRING(iovec[(*n_iovec)++], t);
+                iovec[(*n_iovec)++] = IOVEC_MAKE_STRING(t);

        if (safe_atoi(context[CONTEXT_SIGNAL], &signo) >= 0 && SIGNAL_VALID(signo))
                set_iovec_field(iovec, n_iovec, "COREDUMP_SIGNAL_NAME=SIG", signal_to_string(signo));
@ -1253,10 +1251,10 @@ static int process_kernel(int argc, char* argv[]) {

        n_iovec = n_to_free;

-        IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);
+        iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_COREDUMP_STR);

        assert_cc(2 == LOG_CRIT);
-        IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
+        iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");

        assert(n_iovec <= ELEMENTSOF(iovec));

@ -1344,15 +1342,15 @@ static int process_backtrace(int argc, char *argv[]) {
                        r = log_oom();
                        goto finish;
                }
-                IOVEC_SET_STRING(iovec[n_iovec++], message);
+                iovec[n_iovec++] = IOVEC_MAKE_STRING(message);
        } else {
                for (i = 0; i < importer.iovw.count; i++)
                        iovec[n_iovec++] = importer.iovw.iovec[i];
        }

-        IOVEC_SET_STRING(iovec[n_iovec++], "MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
+        iovec[n_iovec++] = IOVEC_MAKE_STRING("MESSAGE_ID=" SD_MESSAGE_BACKTRACE_STR);
        assert_cc(2 == LOG_CRIT);
-        IOVEC_SET_STRING(iovec[n_iovec++], "PRIORITY=2");
+        iovec[n_iovec++] = IOVEC_MAKE_STRING("PRIORITY=2");

        assert(n_iovec <= n_allocated);

--- a/src/journal/journal-send.c
+++ b/src/journal/journal-send.c
@ -114,9 +114,8 @@ _public_ int sd_journal_printv(int priority, const char *format, va_list ap) {
        if (isempty(buffer+8))
                return 0;

-        zero(iov);
-        IOVEC_SET_STRING(iov[0], buffer);
-        IOVEC_SET_STRING(iov[1], p);
+        iov[0] = IOVEC_MAKE_STRING(buffer);
+        iov[1] = IOVEC_MAKE_STRING(p);

        return sd_journal_sendv(iov, 2);
 }
@ -167,7 +166,7 @@ _printf_(1, 0) static int fill_iovec_sprintf(const char *format, va_list ap, int

                (void) strstrip(buffer); /* strip trailing whitespace, keep prefixing whitespace */

-                IOVEC_SET_STRING(iov[i++], buffer);
+                iov[i++] = IOVEC_MAKE_STRING(buffer);

                format = va_arg(ap, char *);
        }
@ -259,27 +258,19 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
                         * newline, then the size (64bit LE), followed
                         * by the data and a final newline */

-                        w[j].iov_base = iov[i].iov_base;
-                        w[j].iov_len = c - (char*) iov[i].iov_base;
-                        j++;
-
-                        IOVEC_SET_STRING(w[j++], "\n");
+                        w[j++] = IOVEC_MAKE(iov[i].iov_base, c - (char*) iov[i].iov_base);
+                        w[j++] = IOVEC_MAKE_STRING("\n");

                        l[i] = htole64(iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
-                        w[j].iov_base = &l[i];
-                        w[j].iov_len = sizeof(uint64_t);
-                        j++;
-
-                        w[j].iov_base = c + 1;
-                        w[j].iov_len = iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1;
-                        j++;
+                        w[j++] = IOVEC_MAKE(&l[i], sizeof(uint64_t));

+                        w[j++] = IOVEC_MAKE(c + 1, iov[i].iov_len - (c - (char*) iov[i].iov_base) - 1);
                } else
                        /* Nothing special? Then just add the line and
                         * append a newline */
                        w[j++] = iov[i];

-                IOVEC_SET_STRING(w[j++], "\n");
+                w[j++] = IOVEC_MAKE_STRING("\n");
        }

        if (!have_syslog_identifier &&
@ -291,9 +282,9 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) {
                 * since everything else is much nicer to retrieve
                 * from the outside. */

-                IOVEC_SET_STRING(w[j++], "SYSLOG_IDENTIFIER=");
-                IOVEC_SET_STRING(w[j++], program_invocation_short_name);
-                IOVEC_SET_STRING(w[j++], "\n");
+                w[j++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=");
+                w[j++] = IOVEC_MAKE_STRING(program_invocation_short_name);
+                w[j++] = IOVEC_MAKE_STRING("\n");
        }

        fd = journal_fd();
@ -380,9 +371,9 @@ static int fill_iovec_perror_and_send(const char *message, int skip, struct iove
                        xsprintf(error, "ERRNO=%i", _saved_errno_);

                        assert_cc(3 == LOG_ERR);
-                        IOVEC_SET_STRING(iov[skip+0], "PRIORITY=3");
-                        IOVEC_SET_STRING(iov[skip+1], buffer);
-                        IOVEC_SET_STRING(iov[skip+2], error);
+                        iov[skip+0] = IOVEC_MAKE_STRING("PRIORITY=3");
+                        iov[skip+1] = IOVEC_MAKE_STRING(buffer);
+                        iov[skip+2] = IOVEC_MAKE_STRING(error);

                        return sd_journal_sendv(iov, skip + 3);
                }
@ -492,20 +483,19 @@ _public_ int sd_journal_printv_with_location(int priority, const char *file, con
         * CODE_FUNC=, hence let's do it manually here. */
        ALLOCA_CODE_FUNC(f, func);

-        zero(iov);
-        IOVEC_SET_STRING(iov[0], buffer);
-        IOVEC_SET_STRING(iov[1], p);
-        IOVEC_SET_STRING(iov[2], file);
-        IOVEC_SET_STRING(iov[3], line);
-        IOVEC_SET_STRING(iov[4], f);
+        iov[0] = IOVEC_MAKE_STRING(buffer);
+        iov[1] = IOVEC_MAKE_STRING(p);
+        iov[2] = IOVEC_MAKE_STRING(file);
+        iov[3] = IOVEC_MAKE_STRING(line);
+        iov[4] = IOVEC_MAKE_STRING(f);

        return sd_journal_sendv(iov, ELEMENTSOF(iov));
 }

 _public_ int sd_journal_send_with_location(const char *file, const char *line, const char *func, const char *format, ...) {
+        _cleanup_free_ struct iovec *iov = NULL;
        int r, i, j;
        va_list ap;
-        struct iovec *iov = NULL;
        char *f;

        va_start(ap, format);
@ -519,9 +509,9 @@ _public_ int sd_journal_send_with_location(const char *file, const char *line, c

        ALLOCA_CODE_FUNC(f, func);

-        IOVEC_SET_STRING(iov[0], file);
-        IOVEC_SET_STRING(iov[1], line);
-        IOVEC_SET_STRING(iov[2], f);
+        iov[0] = IOVEC_MAKE_STRING(file);
+        iov[1] = IOVEC_MAKE_STRING(line);
+        iov[2] = IOVEC_MAKE_STRING(f);

        r = sd_journal_sendv(iov, i);

@ -529,8 +519,6 @@ finish:
        for (j = 3; j < i; j++)
                free(iov[j].iov_base);

-        free(iov);
-
        return r;
 }

@ -550,9 +538,9 @@ _public_ int sd_journal_sendv_with_location(

        ALLOCA_CODE_FUNC(f, func);

-        IOVEC_SET_STRING(niov[n++], file);
-        IOVEC_SET_STRING(niov[n++], line);
-        IOVEC_SET_STRING(niov[n++], f);
+        niov[n++] = IOVEC_MAKE_STRING(file);
+        niov[n++] = IOVEC_MAKE_STRING(line);
+        niov[n++] = IOVEC_MAKE_STRING(f);

        return sd_journal_sendv(niov, n);
 }
@ -567,9 +555,9 @@ _public_ int sd_journal_perror_with_location(

        ALLOCA_CODE_FUNC(f, func);

-        IOVEC_SET_STRING(iov[0], file);
-        IOVEC_SET_STRING(iov[1], line);
-        IOVEC_SET_STRING(iov[2], f);
+        iov[0] = IOVEC_MAKE_STRING(file);
+        iov[1] = IOVEC_MAKE_STRING(line);
+        iov[2] = IOVEC_MAKE_STRING(f);

        return fill_iovec_perror_and_send(message, 3, iov);
 }
--- a/src/journal/journald-audit.c
+++ b/src/journal/journald-audit.c
@ -383,26 +383,26 @@ static void process_audit_string(Server *s, int type, const char *data, size_t s
                return;
        }

-        IOVEC_SET_STRING(iov[n_iov++], "_TRANSPORT=audit");
+        iov[n_iov++] = IOVEC_MAKE_STRING("_TRANSPORT=audit");

        sprintf(source_time_field, "_SOURCE_REALTIME_TIMESTAMP=%" PRIu64,
                (usec_t) seconds * USEC_PER_SEC + (usec_t) msec * USEC_PER_MSEC);
-        IOVEC_SET_STRING(iov[n_iov++], source_time_field);
+        iov[n_iov++] = IOVEC_MAKE_STRING(source_time_field);

        sprintf(type_field, "_AUDIT_TYPE=%i", type);
-        IOVEC_SET_STRING(iov[n_iov++], type_field);
+        iov[n_iov++] = IOVEC_MAKE_STRING(type_field);

        sprintf(id_field, "_AUDIT_ID=%" PRIu64, id);
-        IOVEC_SET_STRING(iov[n_iov++], id_field);
+        iov[n_iov++] = IOVEC_MAKE_STRING(id_field);

        assert_cc(4 == LOG_FAC(LOG_AUTH));
-        IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_FACILITY=4");
-        IOVEC_SET_STRING(iov[n_iov++], "SYSLOG_IDENTIFIER=audit");
+        iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=4");
+        iov[n_iov++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=audit");

        type_name = audit_type_name_alloca(type);

        m = strjoina("MESSAGE=", type_name, " ", p);
-        IOVEC_SET_STRING(iov[n_iov++], m);
+        iov[n_iov++] = IOVEC_MAKE_STRING(m);

        z = n_iov;

--- a/src/journal/journald-console.c
+++ b/src/journal/journald-console.c
@ -59,9 +59,10 @@ void server_forward_console(
        struct timespec ts;
        char tbuf[sizeof("[] ")-1 + DECIMAL_STR_MAX(ts.tv_sec) + DECIMAL_STR_MAX(ts.tv_nsec)-3 + 1];
        char header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t)];
-        int n = 0, fd;
        _cleanup_free_ char *ident_buf = NULL;
+        _cleanup_close_ int fd = -1;
        const char *tty;
+        int n = 0;

        assert(s);
        assert(message);
@ -75,7 +76,8 @@ void server_forward_console(
                xsprintf(tbuf, "[%5"PRI_TIME".%06"PRI_NSEC"] ",
                         ts.tv_sec,
                         (nsec_t)ts.tv_nsec / 1000);
-                IOVEC_SET_STRING(iovec[n++], tbuf);
+
+                iovec[n++] = IOVEC_MAKE_STRING(tbuf);
        }

        /* Second: identifier and PID */
@ -88,19 +90,19 @@ void server_forward_console(
                xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);

                if (identifier)
-                        IOVEC_SET_STRING(iovec[n++], identifier);
+                        iovec[n++] = IOVEC_MAKE_STRING(identifier);

-                IOVEC_SET_STRING(iovec[n++], header_pid);
+                iovec[n++] = IOVEC_MAKE_STRING(header_pid);
        } else if (identifier) {
-                IOVEC_SET_STRING(iovec[n++], identifier);
-                IOVEC_SET_STRING(iovec[n++], ": ");
+                iovec[n++] = IOVEC_MAKE_STRING(identifier);
+                iovec[n++] = IOVEC_MAKE_STRING(": ");
        }

        /* Fourth: message */
-        IOVEC_SET_STRING(iovec[n++], message);
-        IOVEC_SET_STRING(iovec[n++], "\n");
+        iovec[n++] = IOVEC_MAKE_STRING(message);
+        iovec[n++] = IOVEC_MAKE_STRING("\n");

-        tty = s->tty_path ? s->tty_path : "/dev/console";
+        tty = s->tty_path ?: "/dev/console";

        /* Before you ask: yes, on purpose we open/close the console for each log line we write individually. This is a
         * good strategy to avoid journald getting killed by the kernel's SAK concept (it doesn't fix this entirely,
@ -115,6 +117,4 @@ void server_forward_console(

        if (writev(fd, iovec, n) < 0)
                log_debug_errno(errno, "Failed to write to %s for logging: %m", tty);
-
-        safe_close(fd);
 }
--- a/src/journal/journald-kmsg.c
+++ b/src/journal/journald-kmsg.c
@ -26,6 +26,7 @@
 #include "libudev.h"
 #include "sd-messages.h"

+#include "alloc-util.h"
 #include "escape.h"
 #include "fd-util.h"
 #include "format-util.h"
@ -45,11 +46,11 @@ void server_forward_kmsg(
        const char *message,
        const struct ucred *ucred) {

+        _cleanup_free_ char *ident_buf = NULL;
        struct iovec iovec[5];
        char header_priority[DECIMAL_STR_MAX(priority) + 3],
             header_pid[sizeof("[]: ")-1 + DECIMAL_STR_MAX(pid_t) + 1];
        int n = 0;
-        char *ident_buf = NULL;

        assert(s);
        assert(priority >= 0);
@ -68,7 +69,7 @@ void server_forward_kmsg(

        /* First: priority field */
        xsprintf(header_priority, "<%i>", priority);
-        IOVEC_SET_STRING(iovec[n++], header_priority);
+        iovec[n++] = IOVEC_MAKE_STRING(header_priority);

        /* Second: identifier and PID */
        if (ucred) {
@ -80,22 +81,20 @@ void server_forward_kmsg(
                xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);

                if (identifier)
-                        IOVEC_SET_STRING(iovec[n++], identifier);
+                        iovec[n++] = IOVEC_MAKE_STRING(identifier);

-                IOVEC_SET_STRING(iovec[n++], header_pid);
+                iovec[n++] = IOVEC_MAKE_STRING(header_pid);
        } else if (identifier) {
-                IOVEC_SET_STRING(iovec[n++], identifier);
-                IOVEC_SET_STRING(iovec[n++], ": ");
+                iovec[n++] = IOVEC_MAKE_STRING(identifier);
+                iovec[n++] = IOVEC_MAKE_STRING(": ");
        }

        /* Fourth: message */
-        IOVEC_SET_STRING(iovec[n++], message);
-        IOVEC_SET_STRING(iovec[n++], "\n");
+        iovec[n++] = IOVEC_MAKE_STRING(message);
+        iovec[n++] = IOVEC_MAKE_STRING("\n");

        if (writev(s->dev_kmsg_fd, iovec, n) < 0)
                log_debug_errno(errno, "Failed to write to /dev/kmsg for logging: %m");
-
-        free(ident_buf);
 }

 static bool is_us(const char *pid) {
@ -111,11 +110,11 @@ static bool is_us(const char *pid) {

 static void dev_kmsg_record(Server *s, const char *p, size_t l) {
        struct iovec iovec[N_IOVEC_META_FIELDS + 7 + N_IOVEC_KERNEL_FIELDS + 2 + N_IOVEC_UDEV_FIELDS];
-        char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL;
+        _cleanup_free_ char *message = NULL, *syslog_priority = NULL, *syslog_pid = NULL, *syslog_facility = NULL, *syslog_identifier = NULL, *source_time = NULL, *identifier = NULL, *pid = NULL;
        int priority, r;
        unsigned n = 0, z = 0, j;
        unsigned long long usec;
-        char *identifier = NULL, *pid = NULL, *e, *f, *k;
+        char *e, *f, *k;
        uint64_t serial;
        size_t pl;
        char *kernel_device = NULL;
@ -216,7 +215,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                if (startswith(m, "_KERNEL_DEVICE="))
                        kernel_device = m + 15;

-                IOVEC_SET_STRING(iovec[n++], m);
+                iovec[n++] = IOVEC_MAKE_STRING(m);
                z++;

                l -= (e - k) + 1;
@ -236,7 +235,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                        if (g) {
                                b = strappend("_UDEV_DEVNODE=", g);
                                if (b) {
-                                        IOVEC_SET_STRING(iovec[n++], b);
+                                        iovec[n++] = IOVEC_MAKE_STRING(b);
                                        z++;
                                }
                        }
@ -245,7 +244,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                        if (g) {
                                b = strappend("_UDEV_SYSNAME=", g);
                                if (b) {
-                                        IOVEC_SET_STRING(iovec[n++], b);
+                                        iovec[n++] = IOVEC_MAKE_STRING(b);
                                        z++;
                                }
                        }
@ -261,7 +260,7 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                                if (g) {
                                        b = strappend("_UDEV_DEVLINK=", g);
                                        if (b) {
-                                                IOVEC_SET_STRING(iovec[n++], b);
+                                                iovec[n++] = IOVEC_MAKE_STRING(b);
                                                z++;
                                        }
                                }
@ -274,18 +273,18 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
        }

        if (asprintf(&source_time, "_SOURCE_MONOTONIC_TIMESTAMP=%llu", usec) >= 0)
-                IOVEC_SET_STRING(iovec[n++], source_time);
+                iovec[n++] = IOVEC_MAKE_STRING(source_time);

-        IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=kernel");
+        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=kernel");

        if (asprintf(&syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK) >= 0)
-                IOVEC_SET_STRING(iovec[n++], syslog_priority);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);

        if (asprintf(&syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority)) >= 0)
-                IOVEC_SET_STRING(iovec[n++], syslog_facility);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);

        if ((priority & LOG_FACMASK) == LOG_KERN)
-                IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=kernel");
+                iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=kernel");
        else {
                pl -= syslog_parse_identifier((const char**) &p, &identifier, &pid);

@ -297,33 +296,24 @@ static void dev_kmsg_record(Server *s, const char *p, size_t l) {
                if (identifier) {
                        syslog_identifier = strappend("SYSLOG_IDENTIFIER=", identifier);
                        if (syslog_identifier)
-                                IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+                                iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
                }

                if (pid) {
                        syslog_pid = strappend("SYSLOG_PID=", pid);
                        if (syslog_pid)
-                                IOVEC_SET_STRING(iovec[n++], syslog_pid);
+                                iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
                }
        }

        if (cunescape_length_with_prefix(p, pl, "MESSAGE=", UNESCAPE_RELAX, &message) >= 0)
-                IOVEC_SET_STRING(iovec[n++], message);
+                iovec[n++] = IOVEC_MAKE_STRING(message);

        server_dispatch_message(s, iovec, n, ELEMENTSOF(iovec), NULL, NULL, priority, 0);

 finish:
        for (j = 0; j < z; j++)
                free(iovec[j].iov_base);
-
-        free(message);
-        free(syslog_priority);
-        free(syslog_identifier);
-        free(syslog_pid);
-        free(syslog_facility);
-        free(source_time);
-        free(identifier);
-        free(pid);
 }

 static int server_read_dev_kmsg(Server *s) {
--- a/src/journal/journald-native.c
+++ b/src/journal/journald-native.c
@ -282,7 +282,7 @@ static int server_process_entry(
        }

        tn = n++;
-        IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
+        iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
        entry_size += strlen("_TRANSPORT=journal");

        if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
--- a/src/journal/journald-server.c
+++ b/src/journal/journald-server.c
@ -724,14 +724,14 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                char *k;                                                \
                k = newa(char, strlen(field "=") + DECIMAL_STR_MAX(type) + 1); \
                sprintf(k, field "=" format, value);                    \
-                IOVEC_SET_STRING(iovec[n++], k);                        \
+                iovec[n++] = IOVEC_MAKE_STRING(k);                      \
        }

 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field)                  \
        if (!isempty(value)) {                                          \
                char *k;                                                \
                k = strjoina(field "=", value);                         \
-                IOVEC_SET_STRING(iovec[n++], k);                        \
+                iovec[n++] = IOVEC_MAKE_STRING(k);                      \
        }

 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field)                   \
@ -739,7 +739,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                char *k;                                                \
                k = newa(char, strlen(field "=") + SD_ID128_STRING_MAX); \
                sd_id128_to_string(value, stpcpy(k, field "="));        \
-                IOVEC_SET_STRING(iovec[n++], k);                        \
+                iovec[n++] = IOVEC_MAKE_STRING(k);                      \
        }

 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field)       \
@ -747,7 +747,7 @@ static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned
                char *k;                                                \
                k = newa(char, strlen(field "=") + value_size + 1);     \
                *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
-                IOVEC_SET_STRING(iovec[n++], k);                        \
+                iovec[n++] = IOVEC_MAKE_STRING(k);                      \
        }                                                               \

 static void dispatch_message_real(
@ -826,20 +826,20 @@ static void dispatch_message_real(

        if (tv) {
                sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
-                IOVEC_SET_STRING(iovec[n++], source_time);
+                iovec[n++] = IOVEC_MAKE_STRING(source_time);
        }

        /* Note that strictly speaking storing the boot id here is
         * redundant since the entry includes this in-line
         * anyway. However, we need this indexed, too. */
        if (!isempty(s->boot_id_field))
-                IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
+                iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);

        if (!isempty(s->machine_id_field))
-                IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
+                iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);

        if (!isempty(s->hostname_field))
-                IOVEC_SET_STRING(iovec[n++], s->hostname_field);
+                iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);

        assert(n <= m);

@ -870,15 +870,15 @@ void server_driver_message(Server *s, const char *message_id, const char *format
        assert(format);

        assert_cc(3 == LOG_FAC(LOG_DAEMON));
-        IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
-        IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
+        iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
+        iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");

-        IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
+        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
        assert_cc(6 == LOG_INFO);
-        IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
+        iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");

        if (message_id)
-                IOVEC_SET_STRING(iovec[n++], message_id);
+                iovec[n++] = IOVEC_MAKE_STRING(message_id);
        m = n;

        va_start(ap, format);
@ -899,8 +899,8 @@ void server_driver_message(Server *s, const char *message_id, const char *format
                xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));

                n = 3;
-                IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
-                IOVEC_SET_STRING(iovec[n++], buf);
+                iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
+                iovec[n++] = IOVEC_MAKE_STRING(buf);
                dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), s->my_context, NULL, LOG_INFO, 0);
        }
 }
--- a/src/journal/journald-stream.c
+++ b/src/journal/journald-stream.c
@ -282,22 +282,21 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
        if (s->server->forward_to_wall)
                server_forward_wall(s->server, priority, s->identifier, p, &s->ucred);

-        IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=stdout");
-
-        IOVEC_SET_STRING(iovec[n++], s->id_field);
+        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=stdout");
+        iovec[n++] = IOVEC_MAKE_STRING(s->id_field);

        syslog_priority[strlen("PRIORITY=")] = '0' + LOG_PRI(priority);
-        IOVEC_SET_STRING(iovec[n++], syslog_priority);
+        iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);

        if (priority & LOG_FACMASK) {
                xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
-                IOVEC_SET_STRING(iovec[n++], syslog_facility);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
        }

        if (s->identifier) {
                syslog_identifier = strappend("SYSLOG_IDENTIFIER=", s->identifier);
                if (syslog_identifier)
-                        IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+                        iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
        }

        if (line_break != LINE_BREAK_NEWLINE) {
@ -309,12 +308,12 @@ static int stdout_stream_log(StdoutStream *s, const char *p, LineBreak line_brea
                c =     line_break == LINE_BREAK_NUL ?      "_LINE_BREAK=nul" :
                        line_break == LINE_BREAK_LINE_MAX ? "_LINE_BREAK=line-max" :
                                                            "_LINE_BREAK=eof";
-                IOVEC_SET_STRING(iovec[n++], c);
+                iovec[n++] = IOVEC_MAKE_STRING(c);
        }

        message = strappend("MESSAGE=", p);
        if (message)
-                IOVEC_SET_STRING(iovec[n++], message);
+                iovec[n++] = IOVEC_MAKE_STRING(message);

        if (s->context)
                (void) client_context_maybe_refresh(s->server, s->context, NULL, NULL, 0, NULL, USEC_INFINITY);
--- a/src/journal/journald-syslog.c
+++ b/src/journal/journald-syslog.c
@ -124,7 +124,7 @@ static void forward_syslog_raw(Server *s, int priority, const char *buffer, cons
        if (LOG_PRI(priority) > s->max_level_syslog)
                return;

-        IOVEC_SET_STRING(iovec, buffer);
+        iovec = IOVEC_MAKE_STRING(buffer);
        forward_syslog_iovec(s, &iovec, 1, ucred, tv);
 }

@ -135,7 +135,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
        int n = 0;
        time_t t;
        struct tm *tm;
-        char *ident_buf = NULL;
+        _cleanup_free_ char *ident_buf = NULL;

        assert(s);
        assert(priority >= 0);
@ -147,7 +147,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons

        /* First: priority field */
        xsprintf(header_priority, "<%i>", priority);
-        IOVEC_SET_STRING(iovec[n++], header_priority);
+        iovec[n++] = IOVEC_MAKE_STRING(header_priority);

        /* Second: timestamp */
        t = tv ? tv->tv_sec : ((time_t) (now(CLOCK_REALTIME) / USEC_PER_SEC));
@ -156,7 +156,7 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
                return;
        if (strftime(header_time, sizeof(header_time), "%h %e %T ", tm) <= 0)
                return;
-        IOVEC_SET_STRING(iovec[n++], header_time);
+        iovec[n++] = IOVEC_MAKE_STRING(header_time);

        /* Third: identifier and PID */
        if (ucred) {
@ -168,20 +168,18 @@ void server_forward_syslog(Server *s, int priority, const char *identifier, cons
                xsprintf(header_pid, "["PID_FMT"]: ", ucred->pid);

                if (identifier)
-                        IOVEC_SET_STRING(iovec[n++], identifier);
+                        iovec[n++] = IOVEC_MAKE_STRING(identifier);

-                IOVEC_SET_STRING(iovec[n++], header_pid);
+                iovec[n++] = IOVEC_MAKE_STRING(header_pid);
        } else if (identifier) {
-                IOVEC_SET_STRING(iovec[n++], identifier);
-                IOVEC_SET_STRING(iovec[n++], ": ");
+                iovec[n++] = IOVEC_MAKE_STRING(identifier);
+                iovec[n++] = IOVEC_MAKE_STRING(": ");
        }

        /* Fourth: message */
-        IOVEC_SET_STRING(iovec[n++], message);
+        iovec[n++] = IOVEC_MAKE_STRING(message);

        forward_syslog_iovec(s, iovec, n, ucred, tv);
-
-        free(ident_buf);
 }

 int syslog_fixup_facility(int priority) {
@ -353,29 +351,29 @@ void server_process_syslog_message(
        if (s->forward_to_wall)
                server_forward_wall(s, priority, identifier, buf, ucred);

-        IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=syslog");
+        iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog");

        xsprintf(syslog_priority, "PRIORITY=%i", priority & LOG_PRIMASK);
-        IOVEC_SET_STRING(iovec[n++], syslog_priority);
+        iovec[n++] = IOVEC_MAKE_STRING(syslog_priority);

        if (priority & LOG_FACMASK) {
                xsprintf(syslog_facility, "SYSLOG_FACILITY=%i", LOG_FAC(priority));
-                IOVEC_SET_STRING(iovec[n++], syslog_facility);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_facility);
        }

        if (identifier) {
                syslog_identifier = strjoina("SYSLOG_IDENTIFIER=", identifier);
-                IOVEC_SET_STRING(iovec[n++], syslog_identifier);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier);
        }

        if (pid) {
                syslog_pid = strjoina("SYSLOG_PID=", pid);
-                IOVEC_SET_STRING(iovec[n++], syslog_pid);
+                iovec[n++] = IOVEC_MAKE_STRING(syslog_pid);
        }

        message = strjoina("MESSAGE=", buf);
        if (message)
-                IOVEC_SET_STRING(iovec[n++], message);
+                iovec[n++] = IOVEC_MAKE_STRING(message);

        if (ucred && pid_is_valid(ucred->pid)) {
                r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
--- a/src/libsystemd-network/sd-dhcp-lease.c
+++ b/src/libsystemd-network/sd-dhcp-lease.c
@ -471,7 +471,7 @@ static int lease_parse_routes(
                struct sd_dhcp_route *route = *routes + *routes_size;
                int r;

-                r = in_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
+                r = in4_addr_default_prefixlen((struct in_addr*) option, &route->dst_prefixlen);
                if (r < 0) {
                        log_debug("Failed to determine destination prefix length from class based IP, ignoring");
                        continue;
@ -1253,7 +1253,7 @@ int dhcp_lease_set_default_subnet_mask(sd_dhcp_lease *lease) {
        address.s_addr = lease->address;

        /* fall back to the default subnet masks based on address class */
-        r = in_addr_default_subnet_mask(&address, &mask);
+        r = in4_addr_default_subnet_mask(&address, &mask);
        if (r < 0)
                return r;

--- a/src/libsystemd-network/sd-dhcp-server.c
+++ b/src/libsystemd-network/sd-dhcp-server.c
@ -56,7 +56,7 @@ int sd_dhcp_server_configure_pool(sd_dhcp_server *server, struct in_addr *addres
        assert_return(address->s_addr != INADDR_ANY, -EINVAL);
        assert_return(prefixlen <= 32, -ERANGE);

-        assert_se(in_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
+        assert_se(in4_addr_prefixlen_to_netmask(&netmask_addr, prefixlen));
        netmask = netmask_addr.s_addr;

        server_off = be32toh(address->s_addr & ~netmask);
--- a/src/network/networkd-address.c
+++ b/src/network/networkd-address.c
@ -768,7 +768,7 @@ int config_parse_address(const char *unit,
        }

        if (!e && f == AF_INET) {
-                r = in_addr_default_prefixlen(&buffer.in, &n->prefixlen);
+                r = in4_addr_default_prefixlen(&buffer.in, &n->prefixlen);
                if (r < 0) {
                        log_syntax(unit, LOG_ERR, filename, line, r, "Prefix length not specified, and a default one can not be deduced for '%s', ignoring assignment", address);
                        return 0;
--- a/src/network/networkd-dhcp4.c
+++ b/src/network/networkd-dhcp4.c
@ -237,7 +237,7 @@ static int dhcp_lease_lost(Link *link) {
                if (r >= 0) {
                        r = sd_dhcp_lease_get_netmask(link->dhcp_lease, &netmask);
                        if (r >= 0)
-                                prefixlen = in_addr_netmask_to_prefixlen(&netmask);
+                                prefixlen = in4_addr_netmask_to_prefixlen(&netmask);

                        address->family = AF_INET;
                        address->in_addr.in = addr;
@ -316,7 +316,7 @@ static int dhcp4_update_address(Link *link,
        assert(netmask);
        assert(lifetime);

-        prefixlen = in_addr_netmask_to_prefixlen(netmask);
+        prefixlen = in4_addr_netmask_to_prefixlen(netmask);

        r = address_new(&addr);
        if (r < 0)
@ -406,7 +406,7 @@ static int dhcp_lease_acquired(sd_dhcp_client *client, Link *link) {
        if (r < 0)
                return log_link_error_errno(link, r, "DHCP error: No netmask: %m");

-        prefixlen = in_addr_netmask_to_prefixlen(&netmask);
+        prefixlen = in4_addr_netmask_to_prefixlen(&netmask);

        r = sd_dhcp_lease_get_router(lease, &gateway);
        if (r < 0 && r != -ENODATA)
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@ -1616,6 +1616,27 @@ static int setup_dev_console(const char *dest, const char *console) {
        return mount_verbose(LOG_ERR, console, to, NULL, MS_BIND, NULL);
 }

+static int setup_keyring(void) {
+        key_serial_t keyring;
+
+        /* Allocate a new session keyring for the container. This makes sure the keyring of the session systemd-nspawn
+         * was invoked from doesn't leak into the container. Note that by default we block keyctl() and request_key()
+         * anyway via seccomp so doing this operation isn't strictly necessary, but in case people explicitly whitelist
+         * these system calls let's make sure we don't leak anything into the container. */
+
+        keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
+        if (keyring == -1) {
+                if (errno == ENOSYS)
+                        log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
+                else if (IN_SET(errno, EACCES, EPERM))
+                        log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
+                else
+                        return log_error_errno(errno, "Setting up kernel keyring failed: %m");
+        }
+
+        return 0;
+}
+
 static int setup_kmsg(const char *dest, int kmsg_socket) {
        const char *from, *to;
        _cleanup_umask_ mode_t u;
@ -2642,6 +2663,10 @@ static int outer_child(
        if (r < 0)
                return r;

+        r = setup_keyring();
+        if (r < 0)
+                return r;
+
        r = setup_seccomp(arg_caps_retain, arg_syscall_whitelist, arg_syscall_blacklist);
        if (r < 0)
                return r;
--- a/src/shared/bus-unit-util.c
+++ b/src/shared/bus-unit-util.c
@ -28,6 +28,8 @@
 #include "errno-list.h"
 #include "escape.h"
 #include "hashmap.h"
+#include "hostname-util.h"
+#include "in-addr-util.h"
 #include "list.h"
 #include "locale-util.h"
 #include "mount-util.h"
@ -66,6 +68,31 @@ int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
                        &u->job_path);
 }

+static int bus_append_ip_address_access(sd_bus_message *m, int family, const union in_addr_union *prefix, unsigned char prefixlen) {
+        int r;
+
+        assert(m);
+        assert(prefix);
+
+        r = sd_bus_message_open_container(m, 'r', "iayu");
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_append(m, "i", family);
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_append_array(m, 'y', prefix, FAMILY_ADDRESS_SIZE(family));
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_append(m, "u", prefixlen);
+        if (r < 0)
+                return r;
+
+        return sd_bus_message_close_container(m);
+}
+
 int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignment) {
        const char *eq, *field;
        UnitDependency dep;
@ -207,13 +234,13 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                r = sd_bus_message_append(m, "sv", sn, "t", l.rlim_cur);

        } else if (STR_IN_SET(field,
-                              "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting", "TasksAccounting",
-                              "SendSIGHUP", "SendSIGKILL", "WakeSystem", "DefaultDependencies",
-                              "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate", "RemainAfterExit",
-                              "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers", "NoNewPrivileges",
-                              "SyslogLevelPrefix", "Delegate", "RemainAfterElapse", "MemoryDenyWriteExecute",
-                              "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables",
-                              "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
+                              "CPUAccounting", "MemoryAccounting", "IOAccounting", "BlockIOAccounting",
+                              "TasksAccounting", "IPAccounting", "SendSIGHUP", "SendSIGKILL", "WakeSystem",
+                              "DefaultDependencies", "IgnoreSIGPIPE", "TTYVHangup", "TTYReset", "TTYVTDisallocate",
+                              "RemainAfterExit", "PrivateTmp", "PrivateDevices", "PrivateNetwork", "PrivateUsers",
+                              "NoNewPrivileges", "SyslogLevelPrefix", "Delegate", "RemainAfterElapse",
+                              "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC",
+                              "ProtectKernelTunables", "ProtectKernelModules", "ProtectControlGroups", "MountAPIVFS",
                              "CPUSchedulingResetOnFork", "LockPersonality")) {

                r = parse_boolean(eq);
@ -433,6 +460,98 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen
                        r = sd_bus_message_append(m, "v", "a(st)", 1, path, u);
                }

+        } else if (STR_IN_SET(field, "IPAddressAllow", "IPAddressDeny")) {
+
+                if (isempty(eq))
+                        r = sd_bus_message_append(m, "v", "a(iayu)", 0);
+                else {
+                        unsigned char prefixlen;
+                        union in_addr_union prefix = {};
+                        int family;
+
+                        r = sd_bus_message_open_container(m, 'v', "a(iayu)");
+                        if (r < 0)
+                                return bus_log_create_error(r);
+
+                        r = sd_bus_message_open_container(m, 'a', "(iayu)");
+                        if (r < 0)
+                                return bus_log_create_error(r);
+
+                        if (streq(eq, "any")) {
+                                /* "any" is a shortcut for 0.0.0.0/0 and ::/0 */
+
+                                r = bus_append_ip_address_access(m, AF_INET, &prefix, 0);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                                r = bus_append_ip_address_access(m, AF_INET6, &prefix, 0);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                        } else if (is_localhost(eq)) {
+                                /* "localhost" is a shortcut for 127.0.0.0/8 and ::1/128 */
+
+                                prefix.in.s_addr = htobe32(0x7f000000);
+                                r = bus_append_ip_address_access(m, AF_INET, &prefix, 8);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                                prefix.in6 = (struct in6_addr) IN6ADDR_LOOPBACK_INIT;
+                                r = bus_append_ip_address_access(m, AF_INET6, &prefix, 128);
+                                if (r < 0)
+                                        return r;
+
+                        } else if (streq(eq, "link-local")) {
+
+                                /* "link-local" is a shortcut for 169.254.0.0/16 and fe80::/64 */
+
+                                prefix.in.s_addr = htobe32((UINT32_C(169) << 24 | UINT32_C(254) << 16));
+                                r = bus_append_ip_address_access(m, AF_INET, &prefix, 16);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                                prefix.in6 = (struct in6_addr) {
+                                        .__in6_u.__u6_addr32[0] = htobe32(0xfe800000)
+                                };
+                                r = bus_append_ip_address_access(m, AF_INET6, &prefix, 64);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                        } else if (streq(eq, "multicast")) {
+
+                                /* "multicast" is a shortcut for 224.0.0.0/4 and ff00::/8 */
+
+                                prefix.in.s_addr = htobe32((UINT32_C(224) << 24));
+                                r = bus_append_ip_address_access(m, AF_INET, &prefix, 4);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                                prefix.in6 = (struct in6_addr) {
+                                        .__in6_u.__u6_addr32[0] = htobe32(0xff000000)
+                                };
+                                r = bus_append_ip_address_access(m, AF_INET6, &prefix, 8);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+
+                        } else {
+                                r = in_addr_prefix_from_string_auto(eq, &family, &prefix, &prefixlen);
+                                if (r < 0)
+                                        return log_error_errno(r, "Failed to parse IP address prefix: %s", eq);
+
+                                r = bus_append_ip_address_access(m, family, &prefix, prefixlen);
+                                if (r < 0)
+                                        return bus_log_create_error(r);
+                        }
+
+                        r = sd_bus_message_close_container(m);
+                        if (r < 0)
+                                return bus_log_create_error(r);
+
+                        r = sd_bus_message_close_container(m);
+                        if (r < 0)
+                                return bus_log_create_error(r);
+                }
+
        } else if (streq(field, "CPUSchedulingPolicy")) {
                int n;

--- a/src/shared/firewall-util.c
+++ b/src/shared/firewall-util.c
@ -72,7 +72,7 @@ static int entry_fill_basics(
        }
        if (source) {
                entry->ip.src = source->in;
-                in_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
+                in4_addr_prefixlen_to_netmask(&entry->ip.smsk, source_prefixlen);
        }

        if (out_interface) {
@ -84,7 +84,7 @@ static int entry_fill_basics(
        }
        if (destination) {
                entry->ip.dst = destination->in;
-                in_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
+                in4_addr_prefixlen_to_netmask(&entry->ip.dmsk, destination_prefixlen);
        }

        return 0;
--- a/src/shared/linux/bpf.h
+++ b/src/shared/linux/bpf.h
@ -0,0 +1,673 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __LINUX_BPF_H__
+#define __LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64	0x07	/* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW		0x18	/* double word */
+#define BPF_XADD	0xc0	/* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV		0xb0	/* mov reg to reg */
+#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END		0xd0	/* flags for endianness conversion: */
+#define BPF_TO_LE	0x00	/* convert to little-endian */
+#define BPF_TO_BE	0x08	/* convert to big-endian */
+#define BPF_FROM_LE	BPF_TO_LE
+#define BPF_FROM_BE	BPF_TO_BE
+
+#define BPF_JNE		0x50	/* jump != */
+#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
+#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_CALL	0x80	/* function call */
+#define BPF_EXIT	0x90	/* function return */
+
+/* Register numbers */
+enum {
+        BPF_REG_0 = 0,
+        BPF_REG_1,
+        BPF_REG_2,
+        BPF_REG_3,
+        BPF_REG_4,
+        BPF_REG_5,
+        BPF_REG_6,
+        BPF_REG_7,
+        BPF_REG_8,
+        BPF_REG_9,
+        BPF_REG_10,
+        __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+struct bpf_insn {
+        __u8	code;		/* opcode */
+        __u8	dst_reg:4;	/* dest register */
+        __u8	src_reg:4;	/* source register */
+        __s16	off;		/* signed offset */
+        __s32	imm;		/* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+        __u32	prefixlen;	/* up to 32 for AF_INET, 128 for AF_INET6 */
+        __u8	data[0];	/* Arbitrary size */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+        BPF_MAP_CREATE,
+        BPF_MAP_LOOKUP_ELEM,
+        BPF_MAP_UPDATE_ELEM,
+        BPF_MAP_DELETE_ELEM,
+        BPF_MAP_GET_NEXT_KEY,
+        BPF_PROG_LOAD,
+        BPF_OBJ_PIN,
+        BPF_OBJ_GET,
+        BPF_PROG_ATTACH,
+        BPF_PROG_DETACH,
+        BPF_PROG_TEST_RUN,
+};
+
+enum bpf_map_type {
+        BPF_MAP_TYPE_UNSPEC,
+        BPF_MAP_TYPE_HASH,
+        BPF_MAP_TYPE_ARRAY,
+        BPF_MAP_TYPE_PROG_ARRAY,
+        BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+        BPF_MAP_TYPE_PERCPU_HASH,
+        BPF_MAP_TYPE_PERCPU_ARRAY,
+        BPF_MAP_TYPE_STACK_TRACE,
+        BPF_MAP_TYPE_CGROUP_ARRAY,
+        BPF_MAP_TYPE_LRU_HASH,
+        BPF_MAP_TYPE_LRU_PERCPU_HASH,
+        BPF_MAP_TYPE_LPM_TRIE,
+        BPF_MAP_TYPE_ARRAY_OF_MAPS,
+        BPF_MAP_TYPE_HASH_OF_MAPS,
+};
+
+enum bpf_prog_type {
+        BPF_PROG_TYPE_UNSPEC,
+        BPF_PROG_TYPE_SOCKET_FILTER,
+        BPF_PROG_TYPE_KPROBE,
+        BPF_PROG_TYPE_SCHED_CLS,
+        BPF_PROG_TYPE_SCHED_ACT,
+        BPF_PROG_TYPE_TRACEPOINT,
+        BPF_PROG_TYPE_XDP,
+        BPF_PROG_TYPE_PERF_EVENT,
+        BPF_PROG_TYPE_CGROUP_SKB,
+        BPF_PROG_TYPE_CGROUP_SOCK,
+        BPF_PROG_TYPE_LWT_IN,
+        BPF_PROG_TYPE_LWT_OUT,
+        BPF_PROG_TYPE_LWT_XMIT,
+};
+
+enum bpf_attach_type {
+        BPF_CGROUP_INET_INGRESS,
+        BPF_CGROUP_INET_EGRESS,
+        BPF_CGROUP_INET_SOCK_CREATE,
+        __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
+ * to the given target_fd cgroup the descendent cgroup will be able to
+ * override effective bpf program that was inherited from this cgroup
+ */
+#define BPF_F_ALLOW_OVERRIDE	(1U << 0)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT	(1U << 0)
+
+#define BPF_PSEUDO_MAP_FD	1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY		0 /* create new element or update existing */
+#define BPF_NOEXIST	1 /* create new element if it didn't exist */
+#define BPF_EXIST	2 /* update existing element */
+
+#define BPF_F_NO_PREALLOC	(1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU	(1U << 1)
+
+union bpf_attr {
+        struct { /* anonymous struct used by BPF_MAP_CREATE command */
+                __u32	map_type;	/* one of enum bpf_map_type */
+                __u32	key_size;	/* size of key in bytes */
+                __u32	value_size;	/* size of value in bytes */
+                __u32	max_entries;	/* max number of entries in a map */
+                __u32	map_flags;	/* prealloc or not */
+                __u32	inner_map_fd;	/* fd pointing to the inner map */
+        };
+
+        struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+                __u32		map_fd;
+                __aligned_u64	key;
+                union {
+                        __aligned_u64 value;
+                        __aligned_u64 next_key;
+                };
+                __u64		flags;
+        };
+
+        struct { /* anonymous struct used by BPF_PROG_LOAD command */
+                __u32		prog_type;	/* one of enum bpf_prog_type */
+                __u32		insn_cnt;
+                __aligned_u64	insns;
+                __aligned_u64	license;
+                __u32		log_level;	/* verbosity level of verifier */
+                __u32		log_size;	/* size of user buffer */
+                __aligned_u64	log_buf;	/* user supplied buffer */
+                __u32		kern_version;	/* checked when prog_type=kprobe */
+                __u32		prog_flags;
+        };
+
+        struct { /* anonymous struct used by BPF_OBJ_* commands */
+                __aligned_u64	pathname;
+                __u32		bpf_fd;
+        };
+
+        struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+                __u32		target_fd;	/* container object to attach to */
+                __u32		attach_bpf_fd;	/* eBPF program to attach */
+                __u32		attach_type;
+                __u32		attach_flags;
+        };
+
+        struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+                __u32		prog_fd;
+                __u32		retval;
+                __u32		data_size_in;
+                __u32		data_size_out;
+                __aligned_u64	data_in;
+                __aligned_u64	data_out;
+                __u32		repeat;
+                __u32		duration;
+        } test;
+} __attribute__((aligned(8)));
+
+/* BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(&map, &key)
+ *     Return: Map value or NULL
+ *
+ * int bpf_map_update_elem(&map, &key, &value, flags)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_map_delete_elem(&map, &key)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_probe_read(void *dst, int size, void *src)
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_ktime_get_ns(void)
+ *     Return: current ktime
+ *
+ * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
+ *     Return: length of buffer written or negative error
+ *
+ * u32 bpf_prandom_u32(void)
+ *     Return: random value
+ *
+ * u32 bpf_raw_smp_processor_id(void)
+ *     Return: SMP processor ID
+ *
+ * int bpf_skb_store_bytes(skb, offset, from, len, flags)
+ *     store bytes into packet
+ *     @skb: pointer to skb
+ *     @offset: offset within packet from skb->mac_header
+ *     @from: pointer where to copy bytes from
+ *     @len: number of bytes to store into packet
+ *     @flags: bit 0 - if true, recompute skb->csum
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_l3_csum_replace(skb, offset, from, to, flags)
+ *     recompute IP checksum
+ *     @skb: pointer to skb
+ *     @offset: offset within packet where IP checksum is located
+ *     @from: old value of header field
+ *     @to: new value of header field
+ *     @flags: bits 0-3 - size of header field
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_l4_csum_replace(skb, offset, from, to, flags)
+ *     recompute TCP/UDP checksum
+ *     @skb: pointer to skb
+ *     @offset: offset within packet where TCP/UDP checksum is located
+ *     @from: old value of header field
+ *     @to: new value of header field
+ *     @flags: bits 0-3 - size of header field
+ *             bit 4 - is pseudo header
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_tail_call(ctx, prog_array_map, index)
+ *     jump into another BPF program
+ *     @ctx: context pointer passed to next program
+ *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+ *     @index: index inside array that selects specific program to run
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_clone_redirect(skb, ifindex, flags)
+ *     redirect to another netdev
+ *     @skb: pointer to skb
+ *     @ifindex: ifindex of the net device
+ *     @flags: bit 0 - if set, redirect to ingress instead of egress
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ *     Return: current->tgid << 32 | current->pid
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ *     Return: current_gid << 32 | current_uid
+ *
+ * int bpf_get_current_comm(char *buf, int size_of_buf)
+ *     stores current->comm into buf
+ *     Return: 0 on success or negative error
+ *
+ * u32 bpf_get_cgroup_classid(skb)
+ *     retrieve a proc's classid
+ *     @skb: pointer to skb
+ *     Return: classid if != 0
+ *
+ * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_vlan_pop(skb)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_get_tunnel_key(skb, key, size, flags)
+ * int bpf_skb_set_tunnel_key(skb, key, size, flags)
+ *     retrieve or populate tunnel metadata
+ *     @skb: pointer to skb
+ *     @key: pointer to 'struct bpf_tunnel_key'
+ *     @size: size of 'struct bpf_tunnel_key'
+ *     @flags: room for future extensions
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_perf_event_read(&map, index)
+ *     Return: Number events read or error code
+ *
+ * int bpf_redirect(ifindex, flags)
+ *     redirect to another netdev
+ *     @ifindex: ifindex of the net device
+ *     @flags: bit 0 - if set, redirect to ingress instead of egress
+ *             other bits - reserved
+ *     Return: TC_ACT_REDIRECT
+ *
+ * u32 bpf_get_route_realm(skb)
+ *     retrieve a dst's tclassid
+ *     @skb: pointer to skb
+ *     Return: realm if != 0
+ *
+ * int bpf_perf_event_output(ctx, map, index, data, size)
+ *     output perf raw sample
+ *     @ctx: struct pt_regs*
+ *     @map: pointer to perf_event_array map
+ *     @index: index of event in the map
+ *     @data: data on stack to be output as raw data
+ *     @size: size of data
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_get_stackid(ctx, map, flags)
+ *     walk user or kernel stack and return id
+ *     @ctx: struct pt_regs*
+ *     @map: pointer to stack_trace map
+ *     @flags: bits 0-7 - numer of stack frames to skip
+ *             bit 8 - collect user stack instead of kernel
+ *             bit 9 - compare stacks by hash only
+ *             bit 10 - if two different stacks hash into the same stackid
+ *                      discard old
+ *             other bits - reserved
+ *     Return: >= 0 stackid on success or negative error
+ *
+ * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
+ *     calculate csum diff
+ *     @from: raw from buffer
+ *     @from_size: length of from buffer
+ *     @to: raw to buffer
+ *     @to_size: length of to buffer
+ *     @seed: optional seed
+ *     Return: csum result or negative error code
+ *
+ * int bpf_skb_get_tunnel_opt(skb, opt, size)
+ *     retrieve tunnel options metadata
+ *     @skb: pointer to skb
+ *     @opt: pointer to raw tunnel option data
+ *     @size: size of @opt
+ *     Return: option size
+ *
+ * int bpf_skb_set_tunnel_opt(skb, opt, size)
+ *     populate tunnel options metadata
+ *     @skb: pointer to skb
+ *     @opt: pointer to raw tunnel option data
+ *     @size: size of @opt
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_proto(skb, proto, flags)
+ *     Change protocol of the skb. Currently supported is v4 -> v6,
+ *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
+ *     program is expected to fill the new headers via skb_store_bytes
+ *     and lX_csum_replace.
+ *     @skb: pointer to skb
+ *     @proto: new skb->protocol type
+ *     @flags: reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_type(skb, type)
+ *     Change packet type of skb.
+ *     @skb: pointer to skb
+ *     @type: new skb->pkt_type type
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_under_cgroup(skb, map, index)
+ *     Check cgroup2 membership of skb
+ *     @skb: pointer to skb
+ *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ *     @index: index of the cgroup in the bpf_map
+ *     Return:
+ *       == 0 skb failed the cgroup2 descendant test
+ *       == 1 skb succeeded the cgroup2 descendant test
+ *        < 0 error
+ *
+ * u32 bpf_get_hash_recalc(skb)
+ *     Retrieve and possibly recalculate skb->hash.
+ *     @skb: pointer to skb
+ *     Return: hash
+ *
+ * u64 bpf_get_current_task(void)
+ *     Returns current task_struct
+ *     Return: current
+ *
+ * int bpf_probe_write_user(void *dst, void *src, int len)
+ *     safely attempt to write to a location
+ *     @dst: destination address in userspace
+ *     @src: source address on stack
+ *     @len: number of bytes to copy
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_current_task_under_cgroup(map, index)
+ *     Check cgroup2 membership of current task
+ *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ *     @index: index of the cgroup in the bpf_map
+ *     Return:
+ *       == 0 current failed the cgroup2 descendant test
+ *       == 1 current succeeded the cgroup2 descendant test
+ *        < 0 error
+ *
+ * int bpf_skb_change_tail(skb, len, flags)
+ *     The helper will resize the skb to the given new size, to be used f.e.
+ *     with control messages.
+ *     @skb: pointer to skb
+ *     @len: new skb length
+ *     @flags: reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_pull_data(skb, len)
+ *     The helper will pull in non-linear data in case the skb is non-linear
+ *     and not all of len are part of the linear section. Only needed for
+ *     read/write with direct packet access.
+ *     @skb: pointer to skb
+ *     @len: len to make read/writeable
+ *     Return: 0 on success or negative error
+ *
+ * s64 bpf_csum_update(skb, csum)
+ *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+ *     @skb: pointer to skb
+ *     @csum: csum to add
+ *     Return: csum on success or negative error
+ *
+ * void bpf_set_hash_invalid(skb)
+ *     Invalidate current skb->hash.
+ *     @skb: pointer to skb
+ *
+ * int bpf_get_numa_node_id()
+ *     Return: Id of current NUMA node.
+ *
+ * int bpf_skb_change_head()
+ *     Grows headroom of skb and adjusts MAC header offset accordingly.
+ *     Will extends/reallocae as required automatically.
+ *     May change skb data pointer and will thus invalidate any check
+ *     performed for direct packet access.
+ *     @skb: pointer to skb
+ *     @len: length of header to be pushed in front
+ *     @flags: Flags (unused for now)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_xdp_adjust_head(xdp_md, delta)
+ *     Adjust the xdp_md.data by delta
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: An positive/negative integer to be added to xdp_md.data
+ *     Return: 0 on success or negative on error
+ *
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ *     Copy a NUL terminated string from unsafe address. In case the string
+ *     length is smaller than size, the target is not padded with further NUL
+ *     bytes. In case the string length is larger than size, just count-1
+ *     bytes are copied and the last byte is set to NUL.
+ *     @dst: destination address
+ *     @size: maximum number of bytes to copy, including the trailing NUL
+ *     @unsafe_ptr: unsafe address
+ *     Return:
+ *       > 0 length of the string including the trailing NUL on success
+ *       < 0 error
+ *
+ * u64 bpf_get_socket_cookie(skb)
+ *     Get the cookie for the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: 8 Bytes non-decreasing number on success or 0 if the socket
+ *     field is missing inside sk_buff
+ *
+ * u32 bpf_get_socket_uid(skb)
+ *     Get the owner uid of the socket stored inside sk_buff.
+ *     @skb: pointer to skb
+ *     Return: uid of the socket owner on success or overflowuid if failed.
+ */
+#define __BPF_FUNC_MAPPER(FN)		\
+        FN(unspec),			\
+        FN(map_lookup_elem),		\
+        FN(map_update_elem),		\
+        FN(map_delete_elem),		\
+        FN(probe_read),			\
+        FN(ktime_get_ns),		\
+        FN(trace_printk),		\
+        FN(get_prandom_u32),		\
+        FN(get_smp_processor_id),	\
+        FN(skb_store_bytes),		\
+        FN(l3_csum_replace),		\
+        FN(l4_csum_replace),		\
+        FN(tail_call),			\
+        FN(clone_redirect),		\
+        FN(get_current_pid_tgid),	\
+        FN(get_current_uid_gid),	\
+        FN(get_current_comm),		\
+        FN(get_cgroup_classid),		\
+        FN(skb_vlan_push),		\
+        FN(skb_vlan_pop),		\
+        FN(skb_get_tunnel_key),		\
+        FN(skb_set_tunnel_key),		\
+        FN(perf_event_read),		\
+        FN(redirect),			\
+        FN(get_route_realm),		\
+        FN(perf_event_output),		\
+        FN(skb_load_bytes),		\
+        FN(get_stackid),		\
+        FN(csum_diff),			\
+        FN(skb_get_tunnel_opt),		\
+        FN(skb_set_tunnel_opt),		\
+        FN(skb_change_proto),		\
+        FN(skb_change_type),		\
+        FN(skb_under_cgroup),		\
+        FN(get_hash_recalc),		\
+        FN(get_current_task),		\
+        FN(probe_write_user),		\
+        FN(current_task_under_cgroup),	\
+        FN(skb_change_tail),		\
+        FN(skb_pull_data),		\
+        FN(csum_update),		\
+        FN(set_hash_invalid),		\
+        FN(get_numa_node_id),		\
+        FN(skb_change_head),		\
+        FN(xdp_adjust_head),		\
+        FN(probe_read_str),		\
+        FN(get_socket_cookie),		\
+        FN(get_socket_uid),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+        __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+        __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM		(1ULL << 0)
+#define BPF_F_INVALIDATE_HASH		(1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK		0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR		(1ULL << 4)
+#define BPF_F_MARK_MANGLED_0		(1ULL << 5)
+#define BPF_F_MARK_ENFORCE		(1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS			(1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6		(1ULL << 0)
+
+/* BPF_FUNC_get_stackid flags. */
+#define BPF_F_SKIP_FIELD_MASK		0xffULL
+#define BPF_F_USER_STACK		(1ULL << 8)
+#define BPF_F_FAST_STACK_CMP		(1ULL << 9)
+#define BPF_F_REUSE_STACKID		(1ULL << 10)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
+#define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+#define BPF_F_INDEX_MASK		0xffffffffULL
+#define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+        __u32 len;
+        __u32 pkt_type;
+        __u32 mark;
+        __u32 queue_mapping;
+        __u32 protocol;
+        __u32 vlan_present;
+        __u32 vlan_tci;
+        __u32 vlan_proto;
+        __u32 priority;
+        __u32 ingress_ifindex;
+        __u32 ifindex;
+        __u32 tc_index;
+        __u32 cb[5];
+        __u32 hash;
+        __u32 tc_classid;
+        __u32 data;
+        __u32 data_end;
+        __u32 napi_id;
+};
+
+struct bpf_tunnel_key {
+        __u32 tunnel_id;
+        union {
+                __u32 remote_ipv4;
+                __u32 remote_ipv6[4];
+        };
+        __u8 tunnel_tos;
+        __u8 tunnel_ttl;
+        __u16 tunnel_ext;
+        __u32 tunnel_label;
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+        BPF_OK = 0,
+        /* 1 reserved */
+        BPF_DROP = 2,
+        /* 3-6 reserved */
+        BPF_REDIRECT = 7,
+        /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+        __u32 bound_dev_if;
+        __u32 family;
+        __u32 type;
+        __u32 protocol;
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+        XDP_ABORTED = 0,
+        XDP_DROP,
+        XDP_PASS,
+        XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+        __u32 data;
+        __u32 data_end;
+};
+
+#endif /* __LINUX_BPF_H__ */
--- a/src/shared/linux/bpf_common.h
+++ b/src/shared/linux/bpf_common.h
@ -0,0 +1,55 @@
+#ifndef __LINUX_BPF_COMMON_H__
+#define __LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define		BPF_LD		0x00
+#define		BPF_LDX		0x01
+#define		BPF_ST		0x02
+#define		BPF_STX		0x03
+#define		BPF_ALU		0x04
+#define		BPF_JMP		0x05
+#define		BPF_RET		0x06
+#define		BPF_MISC        0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)  ((code) & 0x18)
+#define		BPF_W		0x00
+#define		BPF_H		0x08
+#define		BPF_B		0x10
+#define BPF_MODE(code)  ((code) & 0xe0)
+#define		BPF_IMM		0x00
+#define		BPF_ABS		0x20
+#define		BPF_IND		0x40
+#define		BPF_MEM		0x60
+#define		BPF_LEN		0x80
+#define		BPF_MSH		0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code)    ((code) & 0xf0)
+#define		BPF_ADD		0x00
+#define		BPF_SUB		0x10
+#define		BPF_MUL		0x20
+#define		BPF_DIV		0x30
+#define		BPF_OR		0x40
+#define		BPF_AND		0x50
+#define		BPF_LSH		0x60
+#define		BPF_RSH		0x70
+#define		BPF_NEG		0x80
+#define		BPF_MOD		0x90
+#define		BPF_XOR		0xa0
+
+#define		BPF_JA		0x00
+#define		BPF_JEQ		0x10
+#define		BPF_JGT		0x20
+#define		BPF_JGE		0x30
+#define		BPF_JSET        0x40
+#define BPF_SRC(code)   ((code) & 0x08)
+#define		BPF_K		0x00
+#define		BPF_X		0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* __LINUX_BPF_COMMON_H__ */
--- a/src/shared/linux/libbpf.h
+++ b/src/shared/linux/libbpf.h
@ -0,0 +1,198 @@
+/* eBPF mini library */
+#ifndef __LIBBPF_H
+#define __LIBBPF_H
+
+#include <linux/bpf.h>
+
+struct bpf_insn;
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC)				\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = 0,					\
+                .imm   = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC)				\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = 0,					\
+                .imm   = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM)				\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
+                .dst_reg = DST,					\
+                .src_reg = 0,					\
+                .off   = 0,					\
+                .imm   = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM)				\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
+                .dst_reg = DST,					\
+                .src_reg = 0,					\
+                .off   = 0,					\
+                .imm   = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC)					\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = 0,					\
+                .imm   = 0 })
+
+#define BPF_MOV32_REG(DST, SRC)					\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ALU | BPF_MOV | BPF_X,		\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = 0,					\
+                .imm   = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM)					\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
+                .dst_reg = DST,					\
+                .src_reg = 0,					\
+                .off   = 0,					\
+                .imm   = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM)					\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ALU | BPF_MOV | BPF_K,		\
+                .dst_reg = DST,					\
+                .src_reg = 0,					\
+                .off   = 0,					\
+                .imm   = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM)					\
+        BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
+        ((struct bpf_insn) {					\
+                .code  = BPF_LD | BPF_DW | BPF_IMM,		\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = 0,					\
+                .imm   = (__u32) (IMM) }),			\
+        ((struct bpf_insn) {					\
+                .code  = 0, /* zero is reserved opcode */	\
+                .dst_reg = 0,					\
+                .src_reg = 0,					\
+                .off   = 0,					\
+                .imm   = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD	1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD)				\
+        BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM)					\
+        ((struct bpf_insn) {					\
+                .code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
+                .dst_reg = 0,					\
+                .src_reg = 0,					\
+                .off   = 0,					\
+                .imm   = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
+        ((struct bpf_insn) {					\
+                .code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = OFF,					\
+                .imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
+        ((struct bpf_insn) {					\
+                .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = OFF,					\
+                .imm   = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)			\
+        ((struct bpf_insn) {					\
+                .code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,	\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = OFF,					\
+                .imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
+        ((struct bpf_insn) {					\
+                .code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
+                .dst_reg = DST,					\
+                .src_reg = 0,					\
+                .off   = OFF,					\
+                .imm   = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF)				\
+        ((struct bpf_insn) {					\
+                .code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = OFF,					\
+                .imm   = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
+        ((struct bpf_insn) {					\
+                .code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
+                .dst_reg = DST,					\
+                .src_reg = 0,					\
+                .off   = OFF,					\
+                .imm   = IMM })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
+        ((struct bpf_insn) {					\
+                .code  = CODE,					\
+                .dst_reg = DST,					\
+                .src_reg = SRC,					\
+                .off   = OFF,					\
+                .imm   = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN()						\
+        ((struct bpf_insn) {					\
+                .code  = BPF_JMP | BPF_EXIT,			\
+                .dst_reg = 0,					\
+                .src_reg = 0,					\
+                .off   = 0,					\
+                .imm   = 0 })
+
+#endif
--- a/src/systemctl/systemctl.c
+++ b/src/systemctl/systemctl.c
@ -3878,6 +3878,9 @@ typedef struct UnitStatusInfo {
        uint64_t tasks_current;
        uint64_t tasks_max;

+        uint64_t ip_ingress_bytes;
+        uint64_t ip_egress_bytes;
+
        LIST_HEAD(ExecStatusInfo, exec);
 } UnitStatusInfo;

@ -4194,6 +4197,14 @@ static void print_status_info(
        if (i->status_errno > 0)
                printf("    Error: %i (%s)\n", i->status_errno, strerror(i->status_errno));

+        if (i->ip_ingress_bytes != (uint64_t) -1 && i->ip_egress_bytes != (uint64_t) -1) {
+                char buf_in[FORMAT_BYTES_MAX], buf_out[FORMAT_BYTES_MAX];
+
+                printf("       IP: %s in, %s out\n",
+                        format_bytes(buf_in, sizeof(buf_in), i->ip_ingress_bytes),
+                        format_bytes(buf_out, sizeof(buf_out), i->ip_egress_bytes));
+        }
+
        if (i->tasks_current != (uint64_t) -1) {
                printf("    Tasks: %" PRIu64, i->tasks_current);

@ -4484,6 +4495,10 @@ static int status_property(const char *name, sd_bus_message *m, UnitStatusInfo *
                        i->next_elapse_monotonic = u;
                else if (streq(name, "NextElapseUSecRealtime"))
                        i->next_elapse_real = u;
+                else if (streq(name, "IPIngressBytes"))
+                        i->ip_ingress_bytes = u;
+                else if (streq(name, "IPEgressBytes"))
+                        i->ip_egress_bytes = u;

                break;
        }
@ -4998,6 +5013,8 @@ static int show_one(
                .cpu_usage_nsec = (uint64_t) -1,
                .tasks_current = (uint64_t) -1,
                .tasks_max = (uint64_t) -1,
+                .ip_ingress_bytes = (uint64_t) -1,
+                .ip_egress_bytes = (uint64_t) -1,
        };
        int r;

--- a/src/systemd/sd-messages.h
+++ b/src/systemd/sd-messages.h
@ -103,6 +103,9 @@ _SD_BEGIN_DECLARATIONS;
 #define SD_MESSAGE_UNIT_RESTART_SCHEDULED_STR                   \
                                          SD_ID128_MAKE_STR(5e,b0,34,94,b6,58,48,70,a5,36,b3,37,29,08,09,b3)

+#define SD_MESSAGE_UNIT_RESOURCES         SD_ID128_MAKE(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+#define SD_MESSAGE_UNIT_RESOURCES_STR     SD_ID128_MAKE_STR(ae,8f,7b,86,6b,03,47,b9,af,31,fe,1c,80,b1,27,c0)
+
 #define SD_MESSAGE_SPAWN_FAILED           SD_ID128_MAKE(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)
 #define SD_MESSAGE_SPAWN_FAILED_STR       SD_ID128_MAKE_STR(64,12,57,65,1c,1b,4e,c9,a8,62,4d,7a,40,a9,e1,e7)

--- a/src/test/meson.build
+++ b/src/test/meson.build
@ -277,6 +277,10 @@ tests += [
         [],
         []],

+        [['src/test/test-in-addr-util.c'],
+         [],
+         []],
+
        [['src/test/test-barrier.c'],
         [],
         []],
@ -335,6 +339,17 @@ tests += [
         [libbasic],
         []],

+        [['src/test/test-bpf.c',
+          'src/test/test-helper.c'],
+         [libcore,
+          libshared],
+         [libmount,
+          threads,
+          librt,
+          libseccomp,
+          libselinux,
+          libblkid]],
+
        [['src/test/test-hashmap.c',
          'src/test/test-hashmap-plain.c',
          test_hashmap_ordered_c],
--- a/src/test/test-bpf.c
+++ b/src/test/test-bpf.c
@ -0,0 +1,162 @@
+/***
+  This file is part of systemd.
+
+  Copyright 2016 Daniel Mack
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <linux/libbpf.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "bpf-firewall.h"
+#include "bpf-program.h"
+#include "load-fragment.h"
+#include "manager.h"
+#include "rm-rf.h"
+#include "service.h"
+#include "test-helper.h"
+#include "tests.h"
+#include "unit.h"
+
+int main(int argc, char *argv[]) {
+        struct bpf_insn exit_insn[] = {
+                BPF_MOV64_IMM(BPF_REG_0, 1),
+                BPF_EXIT_INSN()
+        };
+
+        _cleanup_(rm_rf_physical_and_freep) char *runtime_dir = NULL;
+        CGroupContext *cc = NULL;
+        _cleanup_(bpf_program_unrefp) BPFProgram *p = NULL;
+        Manager *m = NULL;
+        Unit *u;
+        char log_buf[65535];
+        int r;
+
+        log_set_max_level(LOG_DEBUG);
+        log_parse_environment();
+        log_open();
+
+        enter_cgroup_subroot();
+        assert_se(set_unit_path(get_testdata_dir("")) >= 0);
+        assert_se(runtime_dir = setup_fake_runtime_dir());
+
+        r = bpf_program_new(BPF_PROG_TYPE_CGROUP_SKB, &p);
+        assert(r == 0);
+
+        r = bpf_program_add_instructions(p, exit_insn, ELEMENTSOF(exit_insn));
+        assert(r == 0);
+
+        if (getuid() != 0) {
+                log_notice("Not running as root, skipping kernel related tests.");
+                return EXIT_TEST_SKIP;
+        }
+
+        r = bpf_firewall_supported();
+        if (r == 0) {
+                log_notice("BPF firewalling not supported, skipping");
+                return EXIT_TEST_SKIP;
+        }
+        assert_se(r > 0);
+
+        r = bpf_program_load_kernel(p, log_buf, ELEMENTSOF(log_buf));
+        assert(r >= 0);
+
+        p = bpf_program_unref(p);
+
+        /* The simple tests suceeded. Now let's try full unit-based use-case. */
+
+        assert_se(manager_new(UNIT_FILE_USER, true, &m) >= 0);
+        assert_se(manager_startup(m, NULL, NULL) >= 0);
+
+        assert_se(u = unit_new(m, sizeof(Service)));
+        assert_se(unit_add_name(u, "foo.service") == 0);
+        assert_se(cc = unit_get_cgroup_context(u));
+        u->perpetual = true;
+
+        cc->ip_accounting = true;
+
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "10.0.1.0/24", &cc->ip_address_allow, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressAllow", 0, "127.0.0.2", &cc->ip_address_allow, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.3", &cc->ip_address_deny, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "10.0.3.2/24", &cc->ip_address_deny, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.1/25", &cc->ip_address_deny, NULL) == 0);
+        assert_se(config_parse_ip_address_access(u->id, "filename", 1, "Service", 1, "IPAddressDeny", 0, "127.0.0.4", &cc->ip_address_deny, NULL) == 0);
+
+        assert(cc->ip_address_allow);
+        assert(cc->ip_address_allow->items_next);
+        assert(!cc->ip_address_allow->items_next->items_next);
+
+        /* The deny list is defined redundantly, let's ensure it got properly reduced */
+        assert(cc->ip_address_deny);
+        assert(cc->ip_address_deny->items_next);
+        assert(!cc->ip_address_deny->items_next->items_next);
+
+        assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.2 -W 5", SERVICE(u)->exec_command, u) == 0);
+        assert_se(config_parse_exec(u->id, "filename", 1, "Service", 1, "ExecStart", SERVICE_EXEC_START, "/usr/bin/ping -c 1 127.0.0.3 -W 5", SERVICE(u)->exec_command, u) == 0);
+
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]);
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next);
+        assert_se(!SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->command_next);
+
+        SERVICE(u)->type = SERVICE_ONESHOT;
+        u->load_state = UNIT_LOADED;
+
+        unit_dump(u, stdout, NULL);
+
+        r = bpf_firewall_compile(u);
+        if (IN_SET(r, -ENOTTY, -ENOSYS, -EPERM )) {
+                /* Kernel doesn't support the necessary bpf bits, or masked out via seccomp? */
+                manager_free(m);
+                return EXIT_TEST_SKIP;
+        }
+        assert_se(r >= 0);
+
+        assert(u->ip_bpf_ingress);
+        assert(u->ip_bpf_egress);
+
+        r = bpf_program_load_kernel(u->ip_bpf_ingress, log_buf, ELEMENTSOF(log_buf));
+
+        log_notice("log:");
+        log_notice("-------");
+        log_notice("%s", log_buf);
+        log_notice("-------");
+
+        assert(r >= 0);
+
+        r = bpf_program_load_kernel(u->ip_bpf_egress, log_buf, ELEMENTSOF(log_buf));
+
+        log_notice("log:");
+        log_notice("-------");
+        log_notice("%s", log_buf);
+        log_notice("-------");
+
+        assert(r >= 0);
+
+        assert(unit_start(u) >= 0);
+
+        while (!IN_SET(SERVICE(u)->state, SERVICE_DEAD, SERVICE_FAILED))
+                assert_se(sd_event_run(m->event, UINT64_MAX) >= 0);
+
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.code == CLD_EXITED &&
+                  SERVICE(u)->exec_command[SERVICE_EXEC_START]->exec_status.status == EXIT_SUCCESS);
+
+        assert_se(SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.code != CLD_EXITED ||
+                  SERVICE(u)->exec_command[SERVICE_EXEC_START]->command_next->exec_status.status != EXIT_SUCCESS);
+
+        manager_free(m);
+
+        return 0;
+}
--- a/src/test/test-fileio.c
+++ b/src/test/test-fileio.c
@ -609,9 +609,9 @@ static void test_writing_tmpfile(void) {
        int fd, r;
        struct iovec iov[3];

-        IOVEC_SET_STRING(iov[0], "abc\n");
-        IOVEC_SET_STRING(iov[1], ALPHANUMERICAL "\n");
-        IOVEC_SET_STRING(iov[2], "");
+        iov[0] = IOVEC_MAKE_STRING("abc\n");
+        iov[1] = IOVEC_MAKE_STRING(ALPHANUMERICAL "\n");
+        iov[2] = IOVEC_MAKE_STRING("");

        fd = mkostemp_safe(name);
        printf("tmpfile: %s", name);
--- a/src/test/test-in-addr-util.c
+++ b/src/test/test-in-addr-util.c
@ -0,0 +1,75 @@
+/***
+  This file is part of systemd
+
+  Copyright 2017 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <netinet/in.h>
+
+#include "in-addr-util.h"
+
+static void test_in_addr_prefix_from_string(const char *p, int family, int ret, const union in_addr_union *u, unsigned char prefixlen) {
+        union in_addr_union q;
+        unsigned char l;
+        int r;
+
+        r = in_addr_prefix_from_string(p, family, &q, &l);
+        assert_se(r == ret);
+
+        if (r >= 0) {
+                int f;
+
+                assert_se(in_addr_equal(family, &q, u));
+                assert_se(l == prefixlen);
+
+                r = in_addr_prefix_from_string_auto(p, &f, &q, &l);
+                assert_se(r >= 0);
+
+                assert_se(f == family);
+                assert_se(in_addr_equal(family, &q, u));
+                assert_se(l == prefixlen);
+        }
+}
+
+int main(int argc, char *argv[]) {
+        test_in_addr_prefix_from_string("", AF_INET, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("/", AF_INET, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("/8", AF_INET, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("1.2.3.4", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
+        test_in_addr_prefix_from_string("1.2.3.4/0", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 0);
+        test_in_addr_prefix_from_string("1.2.3.4/1", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 1);
+        test_in_addr_prefix_from_string("1.2.3.4/2", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 2);
+        test_in_addr_prefix_from_string("1.2.3.4/32", AF_INET, 0, &(union in_addr_union) { .in = (struct in_addr) { .s_addr = htobe32(0x01020304) } }, 32);
+        test_in_addr_prefix_from_string("1.2.3.4/33", AF_INET, -ERANGE, NULL, 0);
+        test_in_addr_prefix_from_string("1.2.3.4/-1", AF_INET, -ERANGE, NULL, 0);
+        test_in_addr_prefix_from_string("::1", AF_INET, -EINVAL, NULL, 0);
+
+        test_in_addr_prefix_from_string("", AF_INET6, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("/", AF_INET6, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("/8", AF_INET6, -EINVAL, NULL, 0);
+        test_in_addr_prefix_from_string("::1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
+        test_in_addr_prefix_from_string("::1/0", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 0);
+        test_in_addr_prefix_from_string("::1/1", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 1);
+        test_in_addr_prefix_from_string("::1/2", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 2);
+        test_in_addr_prefix_from_string("::1/32", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 32);
+        test_in_addr_prefix_from_string("::1/33", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 33);
+        test_in_addr_prefix_from_string("::1/64", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 64);
+        test_in_addr_prefix_from_string("::1/128", AF_INET6, 0, &(union in_addr_union) { .in6 = IN6ADDR_LOOPBACK_INIT }, 128);
+        test_in_addr_prefix_from_string("::1/129", AF_INET6, -ERANGE, NULL, 0);
+        test_in_addr_prefix_from_string("::1/-1", AF_INET6, -ERANGE, NULL, 0);
+
+        return 0;
+}