2013-10-30 15:34:50 +01:00
|
|
|
/***
|
|
|
|
This file is part of systemd.
|
|
|
|
|
|
|
|
Copyright 2013 Lennart Poettering
|
|
|
|
|
|
|
|
systemd is free software; you can redistribute it and/or modify it
|
|
|
|
under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
systemd is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***/
|
|
|
|
|
|
|
|
#include <fcntl.h>
|
2015-11-16 22:09:36 +01:00
|
|
|
#include <unistd.h>
|
2013-10-30 15:34:50 +01:00
|
|
|
|
2015-10-25 13:14:12 +01:00
|
|
|
#include "bus-container.h"
|
2013-10-30 15:34:50 +01:00
|
|
|
#include "bus-internal.h"
|
|
|
|
#include "bus-socket.h"
|
2015-10-25 13:14:12 +01:00
|
|
|
#include "fd-util.h"
|
|
|
|
#include "process-util.h"
|
|
|
|
#include "util.h"
|
2013-10-30 15:34:50 +01:00
|
|
|
|
2013-12-13 22:02:47 +01:00
|
|
|
int bus_container_connect_socket(sd_bus *b) {
|
2015-09-07 19:52:11 +02:00
|
|
|
_cleanup_close_pair_ int pair[2] = { -1, -1 };
|
namespace helpers: Allow entering a UID namespace
To be able to use `systemd-run` or `machinectl login` on a container
that is in a private user namespace, the sub-process must have entered
the user namespace before connecting to the container's D-Bus, otherwise
the UID and GID in the peer credentials are garbage.
So we extend namespace_open and namespace_enter to support UID namespaces,
and we enter the UID namespace in bus_container_connect_{socket,kernel}.
namespace_open will degrade to a no-op if user namespaces are not enabled
in the kernel.
Special handling is required for the setns call in namespace_enter with
a user namespace, since transitioning to your own namespace is forbidden,
as it would result in re-entering your user namespace as root.
Arguably it may be valid to check this at the call site, rather than
inside namespace_enter, but it is less code to do it inside, and if the
intention of calling namespace_enter is to *be* in the target namespace,
rather than to transition to the target namespace, it is a reasonable
approach.
The check for whether the user namespace is the same must happen before
entering namespaces, as we may not be able to access /proc during the
intermediate transition stage.
We can't instead attempt to enter the user namespace and then ignore
the failure from it being the same namespace, since the error code is
not distinct, and we can't compare namespaces while mid-transition.
2015-08-17 10:52:13 +02:00
|
|
|
_cleanup_close_ int pidnsfd = -1, mntnsfd = -1, usernsfd = -1, rootfd = -1;
|
2014-12-23 19:11:48 +01:00
|
|
|
pid_t child;
|
2013-12-13 22:02:47 +01:00
|
|
|
siginfo_t si;
|
2015-09-07 19:52:11 +02:00
|
|
|
int r, error_buf = 0;
|
|
|
|
ssize_t n;
|
2013-10-30 15:34:50 +01:00
|
|
|
|
|
|
|
assert(b);
|
|
|
|
assert(b->input_fd < 0);
|
|
|
|
assert(b->output_fd < 0);
|
2014-12-23 19:11:48 +01:00
|
|
|
assert(b->nspid > 0 || b->machine);
|
2013-10-30 15:34:50 +01:00
|
|
|
|
2014-12-23 19:11:48 +01:00
|
|
|
if (b->nspid <= 0) {
|
|
|
|
r = container_get_leader(b->machine, &b->nspid);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
}
|
2013-10-30 15:34:50 +01:00
|
|
|
|
namespace helpers: Allow entering a UID namespace
To be able to use `systemd-run` or `machinectl login` on a container
that is in a private user namespace, the sub-process must have entered
the user namespace before connecting to the container's D-Bus, otherwise
the UID and GID in the peer credentials are garbage.
So we extend namespace_open and namespace_enter to support UID namespaces,
and we enter the UID namespace in bus_container_connect_{socket,kernel}.
namespace_open will degrade to a no-op if user namespaces are not enabled
in the kernel.
Special handling is required for the setns call in namespace_enter with
a user namespace, since transitioning to your own namespace is forbidden,
as it would result in re-entering your user namespace as root.
Arguably it may be valid to check this at the call site, rather than
inside namespace_enter, but it is less code to do it inside, and if the
intention of calling namespace_enter is to *be* in the target namespace,
rather than to transition to the target namespace, it is a reasonable
approach.
The check for whether the user namespace is the same must happen before
entering namespaces, as we may not be able to access /proc during the
intermediate transition stage.
We can't instead attempt to enter the user namespace and then ignore
the failure from it being the same namespace, since the error code is
not distinct, and we can't compare namespaces while mid-transition.
2015-08-17 10:52:13 +02:00
|
|
|
r = namespace_open(b->nspid, &pidnsfd, &mntnsfd, NULL, &usernsfd, &rootfd);
|
2013-10-30 15:34:50 +01:00
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
|
|
|
|
if (b->input_fd < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
b->output_fd = b->input_fd;
|
|
|
|
|
2013-12-23 03:43:43 +01:00
|
|
|
bus_socket_setup(b);
|
2013-10-30 15:34:50 +01:00
|
|
|
|
2015-09-07 19:52:11 +02:00
|
|
|
if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, pair) < 0)
|
|
|
|
return -errno;
|
|
|
|
|
2013-10-30 15:34:50 +01:00
|
|
|
child = fork();
|
|
|
|
if (child < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
if (child == 0) {
|
2013-12-17 01:57:27 +01:00
|
|
|
pid_t grandchild;
|
2013-10-30 15:34:50 +01:00
|
|
|
|
2015-09-07 19:52:11 +02:00
|
|
|
pair[0] = safe_close(pair[0]);
|
|
|
|
|
namespace helpers: Allow entering a UID namespace
To be able to use `systemd-run` or `machinectl login` on a container
that is in a private user namespace, the sub-process must have entered
the user namespace before connecting to the container's D-Bus, otherwise
the UID and GID in the peer credentials are garbage.
So we extend namespace_open and namespace_enter to support UID namespaces,
and we enter the UID namespace in bus_container_connect_{socket,kernel}.
namespace_open will degrade to a no-op if user namespaces are not enabled
in the kernel.
Special handling is required for the setns call in namespace_enter with
a user namespace, since transitioning to your own namespace is forbidden,
as it would result in re-entering your user namespace as root.
Arguably it may be valid to check this at the call site, rather than
inside namespace_enter, but it is less code to do it inside, and if the
intention of calling namespace_enter is to *be* in the target namespace,
rather than to transition to the target namespace, it is a reasonable
approach.
The check for whether the user namespace is the same must happen before
entering namespaces, as we may not be able to access /proc during the
intermediate transition stage.
We can't instead attempt to enter the user namespace and then ignore
the failure from it being the same namespace, since the error code is
not distinct, and we can't compare namespaces while mid-transition.
2015-08-17 10:52:13 +02:00
|
|
|
r = namespace_enter(pidnsfd, mntnsfd, -1, usernsfd, rootfd);
|
2013-12-13 22:02:47 +01:00
|
|
|
if (r < 0)
|
2015-09-07 19:52:11 +02:00
|
|
|
_exit(EXIT_FAILURE);
|
2013-10-30 15:34:50 +01:00
|
|
|
|
2013-12-17 01:57:27 +01:00
|
|
|
/* We just changed PID namespace, however it will only
|
|
|
|
* take effect on the children we now fork. Hence,
|
|
|
|
* let's fork another time, and connect from this
|
|
|
|
* grandchild, so that SO_PEERCRED of our connection
|
|
|
|
* comes from a process from within the container, and
|
|
|
|
* not outside of it */
|
2013-10-30 15:34:50 +01:00
|
|
|
|
2013-12-17 01:57:27 +01:00
|
|
|
grandchild = fork();
|
|
|
|
if (grandchild < 0)
|
2015-09-07 19:52:11 +02:00
|
|
|
_exit(EXIT_FAILURE);
|
2013-12-17 01:57:27 +01:00
|
|
|
|
|
|
|
if (grandchild == 0) {
|
|
|
|
|
|
|
|
r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
|
|
|
|
if (r < 0) {
|
2015-09-07 19:52:11 +02:00
|
|
|
/* Try to send error up */
|
|
|
|
error_buf = errno;
|
|
|
|
(void) write(pair[1], &error_buf, sizeof(error_buf));
|
|
|
|
_exit(EXIT_FAILURE);
|
2013-12-17 01:57:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
_exit(EXIT_SUCCESS);
|
2013-10-30 15:34:50 +01:00
|
|
|
}
|
|
|
|
|
2013-12-17 01:57:27 +01:00
|
|
|
r = wait_for_terminate(grandchild, &si);
|
|
|
|
if (r < 0)
|
2015-09-07 19:52:11 +02:00
|
|
|
_exit(EXIT_FAILURE);
|
2013-12-17 01:57:27 +01:00
|
|
|
|
|
|
|
if (si.si_code != CLD_EXITED)
|
2015-09-07 19:52:11 +02:00
|
|
|
_exit(EXIT_FAILURE);
|
2013-12-17 01:57:27 +01:00
|
|
|
|
|
|
|
_exit(si.si_status);
|
2013-10-30 15:34:50 +01:00
|
|
|
}
|
|
|
|
|
2015-09-07 19:52:11 +02:00
|
|
|
pair[1] = safe_close(pair[1]);
|
|
|
|
|
2013-10-30 15:34:50 +01:00
|
|
|
r = wait_for_terminate(child, &si);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
|
2015-09-07 19:52:11 +02:00
|
|
|
n = read(pair[0], &error_buf, sizeof(error_buf));
|
|
|
|
if (n < 0)
|
|
|
|
return -errno;
|
|
|
|
|
|
|
|
if (n > 0) {
|
|
|
|
if (n != sizeof(error_buf))
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
if (error_buf < 0)
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
if (error_buf == EINPROGRESS)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (error_buf > 0)
|
|
|
|
return -error_buf;
|
|
|
|
}
|
|
|
|
|
2013-10-30 15:34:50 +01:00
|
|
|
if (si.si_code != CLD_EXITED)
|
|
|
|
return -EIO;
|
|
|
|
|
2013-12-13 22:02:47 +01:00
|
|
|
if (si.si_status != EXIT_SUCCESS)
|
2013-10-30 15:34:50 +01:00
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
return bus_socket_start_auth(b);
|
|
|
|
}
|