Merge pull request #11927 from poettering/network-namespace-path

Add NetworkNamespacePath= to unit files
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2019-03-12 14:29:14 +01:00 committed by GitHub
commit fb6692ed33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 195 additions and 49 deletions

View File

@ -1100,7 +1100,29 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
<para>Note that the implementation of this setting might be impossible (for example if network namespaces are
not available), and the unit should be written in a way that does not solely rely on this setting for
security.</para></listitem>
security.</para>
<para>When this option is used on a socket unit any sockets bound on behalf of this unit will be
bound within a private network namespace. This may be combined with
<varname>JoinsNamespaceOf=</varname> to listen on sockets inside of network namespaces of other
services.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>NetworkNamespacePath=</varname></term>
<listitem><para>Takes an absolute file system path refererring to a Linux network namespace
pseudo-file (i.e. a file like <filename>/proc/$PID/ns/net</filename> or a bind mount or symlink to
one). When set the invoked processes are added to the network namespace referenced by that path. The
path has to point to a valid namespace file at the moment the processes are forked off. If this
option is used <varname>PrivateNetwork=</varname> has no effect. If this option is used together with
<varname>JoinsNamespaceOf=</varname> then it only has an effect if this unit is started before any of
the listed units that have <varname>PrivateNetwork=</varname> or
<varname>NetworkNamespacePath=</varname> configured, as otherwise the network namespace of those
units is reused.</para>
<para>When this option is used on a socket unit any sockets bound on behalf of this unit will be
bound within the specified network namespace.</para></listitem>
</varlistentry>
<varlistentry>

View File

@ -728,23 +728,18 @@
<varlistentry>
<term><varname>JoinsNamespaceOf=</varname></term>
<listitem><para>For units that start processes (such as
service units), lists one or more other units whose network
and/or temporary file namespace to join. This only applies to
unit types which support the
<varname>PrivateNetwork=</varname> and
<listitem><para>For units that start processes (such as service units), lists one or more other units
whose network and/or temporary file namespace to join. This only applies to unit types which support
the <varname>PrivateNetwork=</varname>, <varname>NetworkNamespacePath=</varname> and
<varname>PrivateTmp=</varname> directives (see
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry>
for details). If a unit that has this setting set is started,
its processes will see the same <filename>/tmp</filename>,
<filename>/var/tmp</filename> and network namespace as one
listed unit that is started. If multiple listed units are
already started, it is not defined which namespace is joined.
Note that this setting only has an effect if
<varname>PrivateNetwork=</varname> and/or
<varname>PrivateTmp=</varname> is enabled for both the unit
that joins the namespace and the unit whose namespace is
joined.</para></listitem>
<citerefentry><refentrytitle>systemd.exec</refentrytitle><manvolnum>5</manvolnum></citerefentry> for
details). If a unit that has this setting set is started, its processes will see the same
<filename>/tmp</filename>, <filename>/var/tmp</filename> and network namespace as one listed unit
that is started. If multiple listed units are already started, it is not defined which namespace is
joined. Note that this setting only has an effect if
<varname>PrivateNetwork=</varname>/<varname>NetworkNamespacePath=</varname> and/or
<varname>PrivateTmp=</varname> is enabled for both the unit that joins the namespace and the unit
whose namespace is joined.</para></listitem>
</varlistentry>
<varlistentry>

View File

@ -778,6 +778,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
SD_BUS_PROPERTY("MountAPIVFS", "b", bus_property_get_bool, offsetof(ExecContext, mount_apivfs), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("KeyringMode", "s", property_get_exec_keyring_mode, offsetof(ExecContext, keyring_mode), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("ProtectHostname", "b", bus_property_get_bool, offsetof(ExecContext, protect_hostname), SD_BUS_VTABLE_PROPERTY_CONST),
SD_BUS_PROPERTY("NetworkNamespacePath", "s", NULL, offsetof(ExecContext, network_namespace_path), SD_BUS_VTABLE_PROPERTY_CONST),
/* Obsolete/redundant properties: */
SD_BUS_PROPERTY("Capabilities", "s", property_get_empty_string, 0, SD_BUS_VTABLE_PROPERTY_CONST|SD_BUS_VTABLE_HIDDEN),
@ -1217,6 +1218,9 @@ int bus_exec_context_set_transient_property(
if (streq(name, "MountFlags"))
return bus_set_transient_mount_flags(u, name, &c->mount_flags, message, flags, error);
if (streq(name, "NetworkNamespacePath"))
return bus_set_transient_path(u, name, &c->network_namespace_path, message, flags, error);
if (streq(name, "SupplementaryGroups")) {
_cleanup_strv_free_ char **l = NULL;
char **p;

View File

@ -3062,6 +3062,14 @@ static int exec_child(
}
}
if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
if (r < 0) {
*exit_status = EXIT_NETWORK;
return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
}
}
r = setup_input(context, params, socket_fd, named_iofds);
if (r < 0) {
*exit_status = EXIT_STDIN;
@ -3272,13 +3280,17 @@ static int exec_child(
}
}
if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
if (ns_type_supported(NAMESPACE_NET)) {
r = setup_netns(runtime->netns_storage_socket);
if (r < 0) {
*exit_status = EXIT_NETWORK;
return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
}
} else if (context->network_namespace_path) {
*exit_status = EXIT_NETWORK;
return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP), "NetworkNamespacePath= is not supported, refusing.");
} else
log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
}
@ -3879,6 +3891,8 @@ void exec_context_done(ExecContext *c) {
c->stdin_data = mfree(c->stdin_data);
c->stdin_data_size = 0;
c->network_namespace_path = mfree(c->network_namespace_path);
}
int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
@ -4556,6 +4570,11 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
prefix, s);
}
if (c->network_namespace_path)
fprintf(f,
"%sNetworkNamespacePath: %s\n",
prefix, c->network_namespace_path);
if (c->syscall_errno > 0) {
const char *errno_name;
@ -4855,18 +4874,23 @@ static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
}
static void exec_runtime_freep(ExecRuntime **rt) {
if (*rt)
(void) exec_runtime_free(*rt, false);
(void) exec_runtime_free(*rt, false);
}
static int exec_runtime_allocate(ExecRuntime **rt) {
assert(rt);
static int exec_runtime_allocate(ExecRuntime **ret) {
ExecRuntime *n;
*rt = new0(ExecRuntime, 1);
if (!*rt)
assert(ret);
n = new(ExecRuntime, 1);
if (!n)
return -ENOMEM;
(*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
*n = (ExecRuntime) {
.netns_storage_socket = { -1, -1 },
};
*ret = n;
return 0;
}
@ -4929,7 +4953,7 @@ static int exec_runtime_add(
static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
_cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
_cleanup_close_pair_ int netns_storage_socket[2] = {-1, -1};
_cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
int r;
assert(m);
@ -4937,7 +4961,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
assert(id);
/* It is not necessary to create ExecRuntime object. */
if (!c->private_network && !c->private_tmp)
if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
return 0;
if (c->private_tmp) {
@ -4946,7 +4970,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
return r;
}
if (c->private_network) {
if (c->private_network || c->network_namespace_path) {
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
return -errno;
}
@ -4956,8 +4980,7 @@ static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, E
return r;
/* Avoid cleanup */
netns_storage_socket[0] = -1;
netns_storage_socket[1] = -1;
netns_storage_socket[0] = netns_storage_socket[1] = -1;
return 1;
}

View File

@ -279,6 +279,8 @@ struct ExecContext {
bool nice_set:1;
bool ioprio_set:1;
bool cpu_sched_set:1;
char *network_namespace_path;
};
static inline bool exec_context_restrict_namespaces_set(const ExecContext *c) {

View File

@ -114,6 +114,7 @@ $1.PrivateDevices, config_parse_bool, 0,
$1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables)
$1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules)
$1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups)
$1.NetworkNamespacePath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.network_namespace_path)
$1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network)
$1.PrivateUsers, config_parse_bool, 0, offsetof($1, exec_context.private_users)
$1.PrivateMounts, config_parse_bool, 0, offsetof($1, exec_context.private_mounts)

View File

@ -1661,14 +1661,14 @@ int setup_netns(int netns_storage_socket[static 2]) {
netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
if (netns == -EAGAIN) {
/* Nothing stored yet, so let's create a new namespace */
/* Nothing stored yet, so let's create a new namespace. */
if (unshare(CLONE_NEWNET) < 0) {
r = -errno;
goto fail;
}
loopback_setup();
(void) loopback_setup();
netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
if (netns < 0) {
@ -1703,6 +1703,59 @@ fail:
return r;
}
int open_netns_path(int netns_storage_socket[static 2], const char *path) {
_cleanup_close_ int netns = -1;
int q, r;
assert(netns_storage_socket);
assert(netns_storage_socket[0] >= 0);
assert(netns_storage_socket[1] >= 0);
assert(path);
/* If the storage socket doesn't contain a netns fd yet, open one via the file system and store it in
* it. This is supposed to be called ahead of time, i.e. before setup_netns() which will allocate a
* new anonymous netns if needed. */
if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
return -errno;
netns = receive_one_fd(netns_storage_socket[0], MSG_DONTWAIT);
if (netns == -EAGAIN) {
/* Nothing stored yet. Open the file from the file system. */
netns = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
if (netns < 0) {
r = -errno;
goto fail;
}
r = fd_is_network_ns(netns);
if (r == 0) { /* Not a netns? Refuse early. */
r = -EINVAL;
goto fail;
}
if (r < 0 && r != -EUCLEAN) /* EUCLEAN: we don't know */
goto fail;
r = 1;
} else if (netns < 0) {
r = netns;
goto fail;
} else
r = 0; /* Already allocated */
q = send_one_fd(netns_storage_socket[1], netns, MSG_DONTWAIT);
if (q < 0) {
r = q;
goto fail;
}
fail:
(void) lockf(netns_storage_socket[0], F_ULOCK, 0);
return r;
}
bool ns_type_supported(NamespaceType type) {
const char *t, *ns_proc;

View File

@ -93,6 +93,7 @@ int setup_tmp_dirs(
char **var_tmp_dir);
int setup_netns(int netns_storage_socket[static 2]);
int open_netns_path(int netns_storage_socket[static 2], const char *path);
const char* protect_home_to_string(ProtectHome p) _const_;
ProtectHome protect_home_from_string(const char *s) _pure_;

View File

@ -1473,6 +1473,25 @@ static int socket_address_listen_do(
log_unit_error_errno(u, error, fmt, strna(_t)); \
})
static int fork_needed(const SocketAddress *address, const ExecContext *context) {
int r;
assert(address);
assert(context);
/* Check if we need to do the cgroup or netns stuff. If not we can do things much simpler. */
if (IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6)) {
r = bpf_firewall_supported();
if (r < 0)
return r;
if (r != BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
return true;
}
return context->private_network || context->network_namespace_path;
}
static int socket_address_listen_in_cgroup(
Socket *s,
const SocketAddress *address,
@ -1485,18 +1504,34 @@ static int socket_address_listen_in_cgroup(
assert(s);
assert(address);
/* This is a wrapper around socket_address_listen(), that forks off a helper process inside the socket's cgroup
* in which the socket is actually created. This way we ensure the socket is actually properly attached to the
* unit's cgroup for the purpose of BPF filtering and such. */
/* This is a wrapper around socket_address_listen(), that forks off a helper process inside the
* socket's cgroup and network namespace in which the socket is actually created. This way we ensure
* the socket is actually properly attached to the unit's cgroup for the purpose of BPF filtering and
* such. */
if (!IN_SET(address->sockaddr.sa.sa_family, AF_INET, AF_INET6))
goto shortcut; /* BPF filtering only applies to IPv4 + IPv6, shortcut things for other protocols */
r = bpf_firewall_supported();
r = fork_needed(address, &s->exec_context);
if (r < 0)
return r;
if (r == BPF_FIREWALL_UNSUPPORTED) /* If BPF firewalling isn't supported anyway — there's no point in this forking complexity */
goto shortcut;
if (r == 0) {
/* Shortcut things... */
fd = socket_address_listen_do(s, address, label);
if (fd < 0)
return log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
return fd;
}
r = unit_setup_exec_runtime(UNIT(s));
if (r < 0)
return log_unit_error_errno(UNIT(s), r, "Failed acquire runtime: %m");
if (s->exec_context.network_namespace_path &&
s->exec_runtime &&
s->exec_runtime->netns_storage_socket[0] >= 0) {
r = open_netns_path(s->exec_runtime->netns_storage_socket, s->exec_context.network_namespace_path);
if (r < 0)
return log_unit_error_errno(UNIT(s), r, "Failed to open network namespace path %s: %m", s->exec_context.network_namespace_path);
}
if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, pair) < 0)
return log_unit_error_errno(UNIT(s), errno, "Failed to create communication channel: %m");
@ -1509,6 +1544,23 @@ static int socket_address_listen_in_cgroup(
pair[0] = safe_close(pair[0]);
if ((s->exec_context.private_network || s->exec_context.network_namespace_path) &&
s->exec_runtime &&
s->exec_runtime->netns_storage_socket[0] >= 0) {
if (ns_type_supported(NAMESPACE_NET)) {
r = setup_netns(s->exec_runtime->netns_storage_socket);
if (r < 0) {
log_unit_error_errno(UNIT(s), r, "Failed to join network namespace: %m");
_exit(EXIT_NETWORK);
}
} else if (s->exec_context.network_namespace_path) {
log_unit_error(UNIT(s), "Network namespace path configured but network namespaces not supported.");
_exit(EXIT_NETWORK);
} else
log_unit_warning(UNIT(s), "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
}
fd = socket_address_listen_do(s, address, label);
if (fd < 0) {
log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
@ -1538,13 +1590,6 @@ static int socket_address_listen_in_cgroup(
return log_address_error_errno(UNIT(s), address, fd, "Failed to receive listening socket (%s): %m");
return fd;
shortcut:
fd = socket_address_listen_do(s, address, label);
if (fd < 0)
return log_address_error_errno(UNIT(s), address, fd, "Failed to create listening socket (%s): %m");
return fd;
}
DEFINE_TRIVIAL_CLEANUP_FUNC(Socket *, socket_close_fds);

View File

@ -744,7 +744,7 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con
"UtmpIdentifier", "UtmpMode", "PAMName", "TTYPath",
"WorkingDirectory", "RootDirectory", "SyslogIdentifier",
"ProtectSystem", "ProtectHome", "SELinuxContext", "RootImage",
"RuntimeDirectoryPreserve", "Personality", "KeyringMode"))
"RuntimeDirectoryPreserve", "Personality", "KeyringMode", "NetworkNamespacePath"))
return bus_append_string(m, field, eq);