nspawn: add a new --no-new-privileges= cmdline option to nspawn

This simply controls the PR_SET_NO_NEW_PRIVS flag for the container.
This too is primarily relevant to provide OCI runtime compaitiblity, but
might have other uses too, in particular as it nicely complements the
existing --capability= and --drop-capability= flags.
This commit is contained in:
Lennart Poettering 2018-05-07 19:35:48 +02:00
parent 3a9530e5f1
commit 66edd96310
6 changed files with 50 additions and 4 deletions

View File

@ -733,6 +733,17 @@
above).</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--no-new-privileges=</option></term>
<listitem><para>Takes a boolean argument. Specifies the value of the <constant>PR_SET_NO_NEW_PRIVS</constant>
flag for the container payload. Defaults to off. When turned on the payload code of the container cannot
acquire new privileges, i.e. the "setuid" file bit as well as file system capabilities will not have an effect
anymore. See <citerefentry
project='man-pages'><refentrytitle>prctl</refentrytitle><manvolnum>2</manvolnum></citerefentry> for details
about this flag. </para></listitem>
</varlistentry>
<varlistentry>
<term><option>--system-call-filter=</option></term>

View File

@ -222,6 +222,17 @@
all cases.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>NoNewPrivileges=</varname></term>
<listitem><para>Takes a boolean argument that controls the <constant>PR_SET_NO_NEW_PRIVS</constant> flag for
the container payload. This is equivalent to the
<option>--no-new-privileges=</option> command line switch. See
<citerefentry><refentrytitle>systemd-nspawn</refentrytitle><manvolnum>1</manvolnum></citerefentry> for
details.</para>
</listitem>
</varlistentry>
<varlistentry>
<term><varname>KillSignal=</varname></term>

View File

@ -50,6 +50,7 @@ Exec.LimitNICE, config_parse_rlimit, RLIMIT_NICE, of
Exec.LimitRTPRIO, config_parse_rlimit, RLIMIT_RTPRIO, offsetof(Settings, rlimit)
Exec.LimitRTTIME, config_parse_rlimit, RLIMIT_RTTIME, offsetof(Settings, rlimit)
Exec.Hostname, config_parse_hostname, 0, offsetof(Settings, hostname)
Exec.NoNewPrivileges, config_parse_tristate, 0, offsetof(Settings, no_new_privileges)
Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)
Files.Volatile, config_parse_volatile_mode, 0, offsetof(Settings, volatile_mode)
Files.Bind, config_parse_bind, 0, 0

View File

@ -36,6 +36,7 @@ int settings_load(FILE *f, const char *path, Settings **ret) {
s->userns_mode = _USER_NAMESPACE_MODE_INVALID;
s->uid_shift = UID_INVALID;
s->uid_range = UID_INVALID;
s->no_new_privileges = -1;
s->read_only = -1;
s->volatile_mode = _VOLATILE_MODE_INVALID;

View File

@ -50,9 +50,10 @@ typedef enum SettingsMask {
SETTING_PIVOT_ROOT = UINT64_C(1) << 15,
SETTING_SYSCALL_FILTER = UINT64_C(1) << 16,
SETTING_HOSTNAME = UINT64_C(1) << 17,
SETTING_RLIMIT_FIRST = UINT64_C(1) << 18, /* we define one bit per resource limit here */
SETTING_RLIMIT_LAST = UINT64_C(1) << (18 + _RLIMIT_MAX - 1),
_SETTINGS_MASK_ALL = (UINT64_C(1) << (18 + _RLIMIT_MAX)) - 1
SETTING_NO_NEW_PRIVILEGES = UINT64_C(1) << 18,
SETTING_RLIMIT_FIRST = UINT64_C(1) << 19, /* we define one bit per resource limit here */
SETTING_RLIMIT_LAST = UINT64_C(1) << (19 + _RLIMIT_MAX - 1),
_SETTINGS_MASK_ALL = (UINT64_C(1) << (19 + _RLIMIT_MAX)) - 1
} SettingsMask;
typedef struct Settings {
@ -76,6 +77,7 @@ typedef struct Settings {
char **syscall_blacklist;
struct rlimit *rlimit[_RLIMIT_MAX];
char *hostname;
int no_new_privileges;
/* [Image] */
int read_only;

View File

@ -203,6 +203,7 @@ static size_t arg_root_hash_size = 0;
static char **arg_syscall_whitelist = NULL;
static char **arg_syscall_blacklist = NULL;
static struct rlimit *arg_rlimit[_RLIMIT_MAX] = {};
static bool arg_no_new_privileges = false;
static void help(void) {
printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
@ -446,6 +447,7 @@ static int parse_argv(int argc, char *argv[]) {
ARG_SYSTEM_CALL_FILTER,
ARG_RLIMIT,
ARG_HOSTNAME,
ARG_NO_NEW_PRIVILEGES,
};
static const struct option options[] = {
@ -462,6 +464,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "read-only", no_argument, NULL, ARG_READ_ONLY },
{ "capability", required_argument, NULL, ARG_CAPABILITY },
{ "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
{ "no-new-privileges", required_argument, NULL, ARG_NO_NEW_PRIVILEGES },
{ "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
{ "bind", required_argument, NULL, ARG_BIND },
{ "bind-ro", required_argument, NULL, ARG_BIND_RO },
@ -773,6 +776,15 @@ static int parse_argv(int argc, char *argv[]) {
break;
}
case ARG_NO_NEW_PRIVILEGES:
r = parse_boolean(optarg);
if (r < 0)
return log_error_errno(r, "Failed to parse --no-new-privileges= argument: %s", optarg);
arg_no_new_privileges = r;
arg_settings_mask |= SETTING_NO_NEW_PRIVILEGES;
break;
case 'j':
arg_link_journal = LINK_GUEST;
arg_link_journal_try = true;
@ -2463,6 +2475,10 @@ static int inner_child(
if (r < 0)
return r;
if (arg_no_new_privileges)
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0)
return log_error_errno(errno, "Failed to disable new privileges: %m");
/* LXC sets container=lxc, so follow the scheme here */
envp[n_env++] = strjoina("container=", arg_container_service_name);
@ -3339,6 +3355,10 @@ static int load_settings(void) {
settings->hostname)
free_and_replace(arg_hostname, settings->hostname);
if ((arg_settings_mask & SETTING_NO_NEW_PRIVILEGES) == 0 &&
settings->no_new_privileges >= 0)
arg_no_new_privileges = settings->no_new_privileges;
return 0;
}
@ -4229,7 +4249,7 @@ int main(int argc, char *argv[]) {
assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1) >= 0);
if (prctl(PR_SET_CHILD_SUBREAPER, 1) < 0) {
if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) < 0) {
r = log_error_errno(errno, "Failed to become subreaper: %m");
goto finish;
}