Merge pull request #17211 from poettering/udev-loop-fixes

two udev fixes, split out of #16859
2020-10-09 17:16:07 +02:00 · 2020-10-09 17:16:07 +02:00 · 6c08f84ac6
parent d2ec13fa8a b8380cc67a
commit 6c08f84ac6
2 changed files with 56 additions and 2 deletions
--- a/src/shared/udev-util.c
+++ b/src/shared/udev-util.c
@ -154,6 +154,22 @@ static int device_monitor_handler(sd_device_monitor *monitor, sd_device *device,
        assert(data->sysname || data->devlink);
        assert(!data->device);

+        /* Ignore REMOVE events here. We are waiting for initialization after all, not de-initialization. We
+         * might see a REMOVE event from an earlier use of the device (devices by the same name are recycled
+         * by the kernel after all), which we should not get confused by. After all we cannot distinguish use
+         * cycles of the devices, as the udev queue is entirely asynchronous.
+         *
+         * If we see a REMOVE event here for the use cycle we actually care about then we won't notice of
+         * course, but that should be OK, given the timeout logic used on the wait loop: this will be noticed
+         * by means of -ETIMEDOUT. Thus we won't notice immediately, but eventually, and that should be
+         * sufficient for an error path that should regularly not happen.
+         *
+         * (And yes, we only need to special case REMOVE. It's the only "negative" event type, where a device
+         * ceases to exist. All other event types are "positive": the device exists and is registered in the
+         * udev database, thus whenever we see the event, we can consider it initialized.) */
+        if (device_for_action(device, DEVICE_ACTION_REMOVE))
+                return 0;
+
        if (data->sysname && sd_device_get_sysname(device, &sysname) >= 0 && streq(sysname, data->sysname))
                goto found;

--- a/src/udev/udevd.c
+++ b/src/udev/udevd.c
@ -459,6 +459,43 @@ static int worker_process_device(Manager *manager, sd_device *dev) {
                return -ENOMEM;

        r = worker_lock_block_device(dev, &fd_lock);
+        if (r == -EAGAIN) {
+                /* So this is a block device and the device is locked currently via the BSD advisory locks —
+                 * someone else is exclusively using it. This means we don't run our udev rules now, to not
+                 * interfere. However we want to know when the device is unlocked again, and retrigger the
+                 * device again then, so that the rules are run eventually. For that we use IN_CLOSE_WRITE
+                 * inotify watches (which isn't exactly the same as waiting for the BSD locks to release, but
+                 * not totally off, as long as unlock+close() is done together, as it usually is).
+                 *
+                 * (The user-facing side of this: https://systemd.io/BLOCK_DEVICE_LOCKING)
+                 *
+                 * There's a bit of a chicken and egg problem here for this however: inotify watching is
+                 * supposed to be enabled via an option set via udev rules (OPTIONS+="watch"). If we skip the
+                 * udev rules here however (as we just said we do), we would thus never see that specific
+                 * udev rule, and thus never turn on inotify watching. But in order to catch up eventually
+                 * and run them we we need the inotify watching: hence a classic chicken and egg problem.
+                 *
+                 * Our way out here: if we see the block device locked, unconditionally watch the device via
+                 * inotify, regardless of any explicit request via OPTIONS+="watch". Thus, a device that is
+                 * currently locked via the BSD file locks will be treated as if we ran a single udev rule
+                 * only for it: the one that turns on inotify watching for it. If we eventually see the
+                 * inotify IN_CLOSE_WRITE event, and then run the rules after all and we then realize that
+                 * this wasn't actually requested (i.e. no OPTIONS+="watch" set) we'll simply turn off the
+                 * watching again (see below). Effectively this means: inotify watching is now enabled either
+                 * a) when the udev rules say so, or b) while the device is locked.
+                 *
+                 * Worst case scenario hence: in the (unlikely) case someone locked the device and we clash
+                 * with that we might do inotify watching for a brief moment for a device where we actually
+                 * weren't supposed to. But that shouldn't be too bad, in particular as BSD locks being taken
+                 * on a block device is kinda an indication that the inotify logic is desired too, to some
+                 * degree — they go hand-in-hand after all. */
+
+                log_device_debug(dev, "Block device is currently locked, installing watch to wait until the lock is released.");
+                (void) udev_watch_begin(dev);
+
+                /* Now the watch is installed, let's lock the device again, maybe in the meantime things changed */
+                r = worker_lock_block_device(dev, &fd_lock);
+        }
        if (r < 0)
                return r;

@ -475,13 +512,14 @@ static int worker_process_device(Manager *manager, sd_device *dev) {
                /* in case rtnl was initialized */
                manager->rtnl = sd_netlink_ref(udev_event->rtnl);

-        /* apply/restore inotify watch */
+        /* apply/restore/end inotify watch */
        if (udev_event->inotify_watch) {
                (void) udev_watch_begin(dev);
                r = device_update_db(dev);
                if (r < 0)
                        return log_device_debug_errno(dev, r, "Failed to update database under /run/udev/data/: %m");
-        }
+        } else
+                (void) udev_watch_end(dev);

        log_device_debug(dev, "Device (SEQNUM=%"PRIu64", ACTION=%s) processed",
                         seqnum, device_action_to_string(action));