core: rework how device units get set up

This reworks how device units are "powered on".

This makes sure that any device changes that might have happened while
we were restarting/reloading will be noticed properly. For that we'll
now properly serialize/deserialize both the device unit state and the
device "found" flags, and restore these initially in the "coldplug"
phase of the manager deserialization. While enumerating the udev devices
during startup we'll put together a new "found" flags mask, which we'll
the switch to in the "catchup" phase of the manager deserialization,
which follows the "coldplug" phase.

Note that during the "coldplug" phase no unit state change events are
generated, which is different for the "catchall" phase which will do
that. Thus we correctly make sure that the deserialized state won't pull
in new deps, but any device's change while we were reloading would.

Fixes: #8832
Replaces: #8675
This commit is contained in:
Lennart Poettering 2018-06-05 17:28:32 +02:00
parent 69ce73d18d
commit 66f3fdbb07
2 changed files with 103 additions and 88 deletions

View File

@ -30,6 +30,7 @@ static const UnitActiveState state_translation_table[_DEVICE_STATE_MAX] = {
};
static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask);
static void device_unset_sysfs(Device *d) {
Hashmap *devices;
@ -101,6 +102,8 @@ static void device_init(Unit *u) {
u->job_running_timeout = u->manager->default_timeout_start_usec;
u->ignore_on_isolate = true;
d->deserialized_state = _DEVICE_STATE_INVALID;
}
static void device_done(Unit *u) {
@ -130,33 +133,37 @@ static int device_coldplug(Unit *u) {
assert(d);
assert(d->state == DEVICE_DEAD);
/* This should happen only when we reexecute PID1 from an old version
* which didn't serialize d->found. In this case simply assume that the
* device was in plugged state right before we started reexecuting which
* might be a wrong assumption. */
if (d->found == DEVICE_FOUND_UDEV_DB)
d->found = DEVICE_FOUND_UDEV;
/* First, let's put the deserialized state and found mask into effect, if we have it. */
if (d->found & DEVICE_FOUND_UDEV)
/* If udev says the device is around, it's around */
device_set_state(d, DEVICE_PLUGGED);
else if (d->found != DEVICE_NOT_FOUND && d->deserialized_state != DEVICE_PLUGGED)
/* If a device is found in /proc/self/mountinfo or
* /proc/swaps, and was not yet announced via udev,
* it's "tentatively" around. */
device_set_state(d, DEVICE_TENTATIVE);
if (d->deserialized_state < 0 ||
(d->deserialized_state == d->state &&
d->deserialized_found == d->found))
return 0;
d->found = d->deserialized_found;
device_set_state(d, d->deserialized_state);
return 0;
}
static void device_catchup(Unit *u) {
Device *d = DEVICE(u);
assert(d);
/* Second, let's update the state with the enumerated state if it's different */
if (d->enumerated_found == d->found)
return;
device_update_found_one(d, d->enumerated_found, DEVICE_FOUND_MASK);
}
static const struct {
DeviceFound flag;
const char *name;
} device_found_map[] = {
{ DEVICE_FOUND_UDEV, "found-udev" },
{ DEVICE_FOUND_UDEV_DB, "found-udev-db" },
{ DEVICE_FOUND_MOUNT, "found-mount" },
{ DEVICE_FOUND_SWAP, "found-swap" },
{ DEVICE_FOUND_UDEV, "found-udev" },
{ DEVICE_FOUND_MOUNT, "found-mount" },
{ DEVICE_FOUND_SWAP, "found-swap" },
};
static int device_found_to_string_many(DeviceFound flags, char **ret) {
@ -236,18 +243,6 @@ static int device_deserialize_item(Unit *u, const char *key, const char *value,
assert(value);
assert(fds);
/* The device was known at the time units were serialized but it's not
* anymore at the time units are deserialized. This happens when PID1 is
* re-executed after having switched to the new rootfs: devices were
* enumerated but udevd wasn't running yet thus the list of devices
* (handled by systemd) to initialize was empty. In such case we wait
* for the device events to be re-triggered by udev so device units are
* properly re-initialized. */
if (d->found == DEVICE_NOT_FOUND) {
assert(d->sysfs == NULL);
return 0;
}
if (streq(key, "state")) {
DeviceState state;
@ -258,9 +253,9 @@ static int device_deserialize_item(Unit *u, const char *key, const char *value,
d->deserialized_state = state;
} else if (streq(key, "found")) {
r = device_found_from_string_many(value, &d->found);
r = device_found_from_string_many(value, &d->deserialized_found);
if (r < 0)
log_unit_debug(u, "Failed to parse found value, ignoring: %s", value);
log_unit_debug_errno(u, r, "Failed to parse found value, ignoring: %s", value);
} else
log_unit_debug(u, "Unknown serialization key: %s", key);
@ -438,8 +433,10 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
if (dev) {
sysfs = udev_device_get_syspath(dev);
if (!sysfs)
if (!sysfs) {
log_debug("Couldn't get syspath from udev device, ignoring.");
return 0;
}
}
r = unit_name_from_path(path, ".device", &e);
@ -448,17 +445,21 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
u = manager_get_unit(m, e);
if (u) {
/* The device unit can still be present even if the device was unplugged: a mount unit can reference it hence
* preventing the GC to have garbaged it. That's desired since the device unit may have a dependency on the
* mount unit which was added during the loading of the later. */
if (dev && DEVICE(u)->state == DEVICE_PLUGGED) {
/* The device unit can still be present even if the device was unplugged: a mount unit can reference it
* hence preventing the GC to have garbaged it. That's desired since the device unit may have a
* dependency on the mount unit which was added during the loading of the later. When the device is
* plugged the sysfs might not be initialized yet, as we serialize the device's state but do not
* serialize the sysfs path across reloads/reexecs. Hence, when coming back from a reload/restart we
* might have the state valid, but not the sysfs path. Hence, let's filter out conflicting devices, but
* let's accept devices in any state with no sysfs path set. */
/* This unit is in plugged state: we're sure it's attached to a device. */
if (!path_equal(DEVICE(u)->sysfs, sysfs)) {
log_unit_debug(u, "Dev %s appeared twice with different sysfs paths %s and %s",
e, DEVICE(u)->sysfs, sysfs);
return -EEXIST;
}
if (DEVICE(u)->state == DEVICE_PLUGGED &&
DEVICE(u)->sysfs &&
sysfs &&
!path_equal(DEVICE(u)->sysfs, sysfs)) {
log_unit_debug(u, "Device %s appeared twice with different sysfs paths %s and %s, ignoring the latter.",
e, DEVICE(u)->sysfs, sysfs);
return -EEXIST;
}
delete = false;
@ -470,24 +471,26 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
delete = true;
r = unit_new_for_name(m, sizeof(Device), e, &u);
if (r < 0)
if (r < 0) {
log_error_errno(r, "Failed to allocate device unit %s: %m", e);
goto fail;
}
unit_add_to_load_queue(u);
}
/* If this was created via some dependency and has not
* actually been seen yet ->sysfs will not be
/* If this was created via some dependency and has not actually been seen yet ->sysfs will not be
* initialized. Hence initialize it if necessary. */
if (sysfs) {
r = device_set_sysfs(DEVICE(u), sysfs);
if (r < 0)
if (r < 0) {
log_error_errno(r, "Failed to set sysfs path %s for device unit %s: %m", sysfs, e);
goto fail;
}
(void) device_update_description(u, dev, path);
/* The additional systemd udev properties we only interpret
* for the main object */
/* The additional systemd udev properties we only interpret for the main object */
if (main)
(void) device_add_udev_wants(u, dev);
}
@ -499,13 +502,11 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
device_upgrade_mount_deps(u);
/* Note that this won't dispatch the load queue, the caller has to do that if needed and appropriate */
unit_add_to_dbus_queue(u);
return 0;
fail:
log_unit_warning_errno(u, r, "Failed to set up device unit: %m");
if (delete)
unit_free(u);
@ -584,45 +585,51 @@ static int device_process_new(Manager *m, struct udev_device *dev) {
return 0;
}
static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask) {
DeviceFound n, previous;
static void device_found_changed(Device *d, DeviceFound previous, DeviceFound now) {
assert(d);
n = (d->found & ~mask) | (found & mask);
if (n == d->found)
return;
previous = d->found;
d->found = n;
if (MANAGER_IS_RUNNING(UNIT(d)->manager))
return;
/* Didn't exist before, but does now? if so, generate a new invocation ID for it */
if (previous == DEVICE_NOT_FOUND && d->found != DEVICE_NOT_FOUND)
if (previous == DEVICE_NOT_FOUND && now != DEVICE_NOT_FOUND)
(void) unit_acquire_invocation_id(UNIT(d));
if (d->found & DEVICE_FOUND_UDEV)
/* When the device is known to udev we consider it
* plugged. */
if (FLAGS_SET(now, DEVICE_FOUND_UDEV))
/* When the device is known to udev we consider it plugged. */
device_set_state(d, DEVICE_PLUGGED);
else if (d->found != DEVICE_NOT_FOUND && (previous & DEVICE_FOUND_UDEV) == 0)
/* If the device has not been seen by udev yet, but is
* now referenced by the kernel, then we assume the
else if (now != DEVICE_NOT_FOUND && !FLAGS_SET(previous, DEVICE_FOUND_UDEV))
/* If the device has not been seen by udev yet, but is now referenced by the kernel, then we assume the
* kernel knows it now, and udev might soon too. */
device_set_state(d, DEVICE_TENTATIVE);
else {
/* If nobody sees the device, or if the device was
* previously seen by udev and now is only referenced
* from the kernel, then we consider the device is
* gone, the kernel just hasn't noticed it yet. */
/* If nobody sees the device, or if the device was previously seen by udev and now is only referenced
* from the kernel, then we consider the device is gone, the kernel just hasn't noticed it yet. */
device_set_state(d, DEVICE_DEAD);
device_unset_sysfs(d);
}
}
static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask) {
assert(d);
if (MANAGER_IS_RUNNING(UNIT(d)->manager)) {
DeviceFound n, previous;
/* When we are already running, then apply the new mask right-away, and trigger state changes
* right-away */
n = (d->found & ~mask) | (found & mask);
if (n == d->found)
return;
previous = d->found;
d->found = n;
device_found_changed(d, previous, n);
} else
/* We aren't running yet, let's apply the new mask to the shadow variable instead, which we'll apply as
* soon as we catch-up with the state. */
d->enumerated_found = (d->enumerated_found & ~mask) | (found & mask);
}
static void device_update_found_by_sysfs(Manager *m, const char *sysfs, DeviceFound found, DeviceFound mask) {
Device *d, *l, *n;
@ -824,7 +831,7 @@ static void device_enumerate(Manager *m) {
continue;
(void) device_process_new(m, dev);
device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV_DB, DEVICE_FOUND_UDEV_DB);
device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV, DEVICE_FOUND_UDEV);
}
return;
@ -995,7 +1002,11 @@ void device_found_node(Manager *m, const char *node, DeviceFound found, DeviceFo
/* This is called whenever we find a device referenced in /proc/swaps or /proc/self/mounts. Such a device might
* be mounted/enabled at a time where udev has not finished probing it yet, and we thus haven't learned about
* it yet. In this case we will set the device unit to "tentative" state. */
* it yet. In this case we will set the device unit to "tentative" state.
*
* This takes a pair of DeviceFound flags parameters. The 'mask' parameter is a bit mask that indicates which
* bits of 'found' to copy into the per-device DeviceFound flags field. Thus, this function may be used to set
* and unset individual bits in a single call, while merging partially with previous state. */
if ((found & mask) != 0) {
_cleanup_(udev_device_unrefp) struct udev_device *dev = NULL;
@ -1039,6 +1050,7 @@ const UnitVTable device_vtable = {
.load = unit_load_fragment_and_dropin_optional,
.coldplug = device_coldplug,
.catchup = device_catchup,
.serialize = device_serialize,
.deserialize_item = device_deserialize_item,

View File

@ -11,26 +11,29 @@
typedef struct Device Device;
/* A mask specifying where we have seen the device currently. This is a bitmask because the device might show up
* asynchronously from each other at various places. For example, in very common case a device might already be mounted
* before udev finished probing it (think: a script setting up a loopback block device, formatting it and mounting it
* in quick succession). Hence we need to track precisely where it is already visible and where not. */
typedef enum DeviceFound {
DEVICE_NOT_FOUND = 0,
DEVICE_FOUND_UDEV = 1 << 1,
DEVICE_FOUND_UDEV_DB = 1 << 2,
DEVICE_FOUND_MOUNT = 1 << 3,
DEVICE_FOUND_SWAP = 1 << 4,
DEVICE_NOT_FOUND = 0,
DEVICE_FOUND_UDEV = 1U << 1, /* The device has shown up in the udev database */
DEVICE_FOUND_MOUNT = 1U << 2, /* The device has shown up in /proc/self/mountinfo */
DEVICE_FOUND_SWAP = 1U << 3, /* The device has shown up in /proc/swaps */
DEVICE_FOUND_MASK = DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP,
} DeviceFound;
struct Device {
Unit meta;
char *sysfs;
DeviceFound found;
/* In order to be able to distinguish dependencies on
different device nodes we might end up creating multiple
devices for the same sysfs path. We chain them up here. */
/* In order to be able to distinguish dependencies on different device nodes we might end up creating multiple
* devices for the same sysfs path. We chain them up here. */
LIST_FIELDS(struct Device, same_sysfs);
DeviceState state, deserialized_state;
DeviceFound found, deserialized_found, enumerated_found;
bool bind_mounts;
};