Skip to content

Commit 66f3fdb

Browse files
committed
core: rework how device units get set up
This reworks how device units are "powered on". This makes sure that any device changes that might have happened while we were restarting/reloading will be noticed properly. For that we'll now properly serialize/deserialize both the device unit state and the device "found" flags, and restore these initially in the "coldplug" phase of the manager deserialization. While enumerating the udev devices during startup we'll put together a new "found" flags mask, which we'll the switch to in the "catchup" phase of the manager deserialization, which follows the "coldplug" phase. Note that during the "coldplug" phase no unit state change events are generated, which is different for the "catchall" phase which will do that. Thus we correctly make sure that the deserialized state won't pull in new deps, but any device's change while we were reloading would. Fixes: #8832 Replaces: #8675
1 parent 69ce73d commit 66f3fdb

2 files changed

Lines changed: 104 additions & 89 deletions

File tree

src/core/device.c

Lines changed: 92 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ static const UnitActiveState state_translation_table[_DEVICE_STATE_MAX] = {
3030
};
3131

3232
static int device_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
33+
static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask);
3334

3435
static void device_unset_sysfs(Device *d) {
3536
Hashmap *devices;
@@ -101,6 +102,8 @@ static void device_init(Unit *u) {
101102
u->job_running_timeout = u->manager->default_timeout_start_usec;
102103

103104
u->ignore_on_isolate = true;
105+
106+
d->deserialized_state = _DEVICE_STATE_INVALID;
104107
}
105108

106109
static void device_done(Unit *u) {
@@ -130,33 +133,37 @@ static int device_coldplug(Unit *u) {
130133
assert(d);
131134
assert(d->state == DEVICE_DEAD);
132135

133-
/* This should happen only when we reexecute PID1 from an old version
134-
* which didn't serialize d->found. In this case simply assume that the
135-
* device was in plugged state right before we started reexecuting which
136-
* might be a wrong assumption. */
137-
if (d->found == DEVICE_FOUND_UDEV_DB)
138-
d->found = DEVICE_FOUND_UDEV;
136+
/* First, let's put the deserialized state and found mask into effect, if we have it. */
139137

140-
if (d->found & DEVICE_FOUND_UDEV)
141-
/* If udev says the device is around, it's around */
142-
device_set_state(d, DEVICE_PLUGGED);
143-
else if (d->found != DEVICE_NOT_FOUND && d->deserialized_state != DEVICE_PLUGGED)
144-
/* If a device is found in /proc/self/mountinfo or
145-
* /proc/swaps, and was not yet announced via udev,
146-
* it's "tentatively" around. */
147-
device_set_state(d, DEVICE_TENTATIVE);
138+
if (d->deserialized_state < 0 ||
139+
(d->deserialized_state == d->state &&
140+
d->deserialized_found == d->found))
141+
return 0;
148142

143+
d->found = d->deserialized_found;
144+
device_set_state(d, d->deserialized_state);
149145
return 0;
150146
}
151147

148+
static void device_catchup(Unit *u) {
149+
Device *d = DEVICE(u);
150+
151+
assert(d);
152+
153+
/* Second, let's update the state with the enumerated state if it's different */
154+
if (d->enumerated_found == d->found)
155+
return;
156+
157+
device_update_found_one(d, d->enumerated_found, DEVICE_FOUND_MASK);
158+
}
159+
152160
static const struct {
153161
DeviceFound flag;
154162
const char *name;
155163
} device_found_map[] = {
156-
{ DEVICE_FOUND_UDEV, "found-udev" },
157-
{ DEVICE_FOUND_UDEV_DB, "found-udev-db" },
158-
{ DEVICE_FOUND_MOUNT, "found-mount" },
159-
{ DEVICE_FOUND_SWAP, "found-swap" },
164+
{ DEVICE_FOUND_UDEV, "found-udev" },
165+
{ DEVICE_FOUND_MOUNT, "found-mount" },
166+
{ DEVICE_FOUND_SWAP, "found-swap" },
160167
};
161168

162169
static int device_found_to_string_many(DeviceFound flags, char **ret) {
@@ -236,18 +243,6 @@ static int device_deserialize_item(Unit *u, const char *key, const char *value,
236243
assert(value);
237244
assert(fds);
238245

239-
/* The device was known at the time units were serialized but it's not
240-
* anymore at the time units are deserialized. This happens when PID1 is
241-
* re-executed after having switched to the new rootfs: devices were
242-
* enumerated but udevd wasn't running yet thus the list of devices
243-
* (handled by systemd) to initialize was empty. In such case we wait
244-
* for the device events to be re-triggered by udev so device units are
245-
* properly re-initialized. */
246-
if (d->found == DEVICE_NOT_FOUND) {
247-
assert(d->sysfs == NULL);
248-
return 0;
249-
}
250-
251246
if (streq(key, "state")) {
252247
DeviceState state;
253248

@@ -258,9 +253,9 @@ static int device_deserialize_item(Unit *u, const char *key, const char *value,
258253
d->deserialized_state = state;
259254

260255
} else if (streq(key, "found")) {
261-
r = device_found_from_string_many(value, &d->found);
256+
r = device_found_from_string_many(value, &d->deserialized_found);
262257
if (r < 0)
263-
log_unit_debug(u, "Failed to parse found value, ignoring: %s", value);
258+
log_unit_debug_errno(u, r, "Failed to parse found value, ignoring: %s", value);
264259

265260
} else
266261
log_unit_debug(u, "Unknown serialization key: %s", key);
@@ -438,8 +433,10 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
438433

439434
if (dev) {
440435
sysfs = udev_device_get_syspath(dev);
441-
if (!sysfs)
436+
if (!sysfs) {
437+
log_debug("Couldn't get syspath from udev device, ignoring.");
442438
return 0;
439+
}
443440
}
444441

445442
r = unit_name_from_path(path, ".device", &e);
@@ -448,17 +445,21 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
448445

449446
u = manager_get_unit(m, e);
450447
if (u) {
451-
/* The device unit can still be present even if the device was unplugged: a mount unit can reference it hence
452-
* preventing the GC to have garbaged it. That's desired since the device unit may have a dependency on the
453-
* mount unit which was added during the loading of the later. */
454-
if (dev && DEVICE(u)->state == DEVICE_PLUGGED) {
455-
456-
/* This unit is in plugged state: we're sure it's attached to a device. */
457-
if (!path_equal(DEVICE(u)->sysfs, sysfs)) {
458-
log_unit_debug(u, "Dev %s appeared twice with different sysfs paths %s and %s",
459-
e, DEVICE(u)->sysfs, sysfs);
460-
return -EEXIST;
461-
}
448+
/* The device unit can still be present even if the device was unplugged: a mount unit can reference it
449+
* hence preventing the GC to have garbaged it. That's desired since the device unit may have a
450+
* dependency on the mount unit which was added during the loading of the later. When the device is
451+
* plugged the sysfs might not be initialized yet, as we serialize the device's state but do not
452+
* serialize the sysfs path across reloads/reexecs. Hence, when coming back from a reload/restart we
453+
* might have the state valid, but not the sysfs path. Hence, let's filter out conflicting devices, but
454+
* let's accept devices in any state with no sysfs path set. */
455+
456+
if (DEVICE(u)->state == DEVICE_PLUGGED &&
457+
DEVICE(u)->sysfs &&
458+
sysfs &&
459+
!path_equal(DEVICE(u)->sysfs, sysfs)) {
460+
log_unit_debug(u, "Device %s appeared twice with different sysfs paths %s and %s, ignoring the latter.",
461+
e, DEVICE(u)->sysfs, sysfs);
462+
return -EEXIST;
462463
}
463464

464465
delete = false;
@@ -470,24 +471,26 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
470471
delete = true;
471472

472473
r = unit_new_for_name(m, sizeof(Device), e, &u);
473-
if (r < 0)
474+
if (r < 0) {
475+
log_error_errno(r, "Failed to allocate device unit %s: %m", e);
474476
goto fail;
477+
}
475478

476479
unit_add_to_load_queue(u);
477480
}
478481

479-
/* If this was created via some dependency and has not
480-
* actually been seen yet ->sysfs will not be
482+
/* If this was created via some dependency and has not actually been seen yet ->sysfs will not be
481483
* initialized. Hence initialize it if necessary. */
482484
if (sysfs) {
483485
r = device_set_sysfs(DEVICE(u), sysfs);
484-
if (r < 0)
486+
if (r < 0) {
487+
log_error_errno(r, "Failed to set sysfs path %s for device unit %s: %m", sysfs, e);
485488
goto fail;
489+
}
486490

487491
(void) device_update_description(u, dev, path);
488492

489-
/* The additional systemd udev properties we only interpret
490-
* for the main object */
493+
/* The additional systemd udev properties we only interpret for the main object */
491494
if (main)
492495
(void) device_add_udev_wants(u, dev);
493496
}
@@ -499,13 +502,11 @@ static int device_setup_unit(Manager *m, struct udev_device *dev, const char *pa
499502
device_upgrade_mount_deps(u);
500503

501504
/* Note that this won't dispatch the load queue, the caller has to do that if needed and appropriate */
502-
503505
unit_add_to_dbus_queue(u);
506+
504507
return 0;
505508

506509
fail:
507-
log_unit_warning_errno(u, r, "Failed to set up device unit: %m");
508-
509510
if (delete)
510511
unit_free(u);
511512

@@ -584,45 +585,51 @@ static int device_process_new(Manager *m, struct udev_device *dev) {
584585
return 0;
585586
}
586587

587-
static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask) {
588-
DeviceFound n, previous;
589-
588+
static void device_found_changed(Device *d, DeviceFound previous, DeviceFound now) {
590589
assert(d);
591590

592-
n = (d->found & ~mask) | (found & mask);
593-
if (n == d->found)
594-
return;
595-
596-
previous = d->found;
597-
d->found = n;
598-
599-
if (MANAGER_IS_RUNNING(UNIT(d)->manager))
600-
return;
601-
602591
/* Didn't exist before, but does now? if so, generate a new invocation ID for it */
603-
if (previous == DEVICE_NOT_FOUND && d->found != DEVICE_NOT_FOUND)
592+
if (previous == DEVICE_NOT_FOUND && now != DEVICE_NOT_FOUND)
604593
(void) unit_acquire_invocation_id(UNIT(d));
605594

606-
if (d->found & DEVICE_FOUND_UDEV)
607-
/* When the device is known to udev we consider it
608-
* plugged. */
595+
if (FLAGS_SET(now, DEVICE_FOUND_UDEV))
596+
/* When the device is known to udev we consider it plugged. */
609597
device_set_state(d, DEVICE_PLUGGED);
610-
else if (d->found != DEVICE_NOT_FOUND && (previous & DEVICE_FOUND_UDEV) == 0)
611-
/* If the device has not been seen by udev yet, but is
612-
* now referenced by the kernel, then we assume the
598+
else if (now != DEVICE_NOT_FOUND && !FLAGS_SET(previous, DEVICE_FOUND_UDEV))
599+
/* If the device has not been seen by udev yet, but is now referenced by the kernel, then we assume the
613600
* kernel knows it now, and udev might soon too. */
614601
device_set_state(d, DEVICE_TENTATIVE);
615602
else {
616-
/* If nobody sees the device, or if the device was
617-
* previously seen by udev and now is only referenced
618-
* from the kernel, then we consider the device is
619-
* gone, the kernel just hasn't noticed it yet. */
620-
603+
/* If nobody sees the device, or if the device was previously seen by udev and now is only referenced
604+
* from the kernel, then we consider the device is gone, the kernel just hasn't noticed it yet. */
621605
device_set_state(d, DEVICE_DEAD);
622606
device_unset_sysfs(d);
623607
}
624608
}
625609

610+
static void device_update_found_one(Device *d, DeviceFound found, DeviceFound mask) {
611+
assert(d);
612+
613+
if (MANAGER_IS_RUNNING(UNIT(d)->manager)) {
614+
DeviceFound n, previous;
615+
616+
/* When we are already running, then apply the new mask right-away, and trigger state changes
617+
* right-away */
618+
619+
n = (d->found & ~mask) | (found & mask);
620+
if (n == d->found)
621+
return;
622+
623+
previous = d->found;
624+
d->found = n;
625+
626+
device_found_changed(d, previous, n);
627+
} else
628+
/* We aren't running yet, let's apply the new mask to the shadow variable instead, which we'll apply as
629+
* soon as we catch-up with the state. */
630+
d->enumerated_found = (d->enumerated_found & ~mask) | (found & mask);
631+
}
632+
626633
static void device_update_found_by_sysfs(Manager *m, const char *sysfs, DeviceFound found, DeviceFound mask) {
627634
Device *d, *l, *n;
628635

@@ -824,7 +831,7 @@ static void device_enumerate(Manager *m) {
824831
continue;
825832

826833
(void) device_process_new(m, dev);
827-
device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV_DB, DEVICE_FOUND_UDEV_DB);
834+
device_update_found_by_sysfs(m, sysfs, DEVICE_FOUND_UDEV, DEVICE_FOUND_UDEV);
828835
}
829836

830837
return;
@@ -995,7 +1002,11 @@ void device_found_node(Manager *m, const char *node, DeviceFound found, DeviceFo
9951002

9961003
/* This is called whenever we find a device referenced in /proc/swaps or /proc/self/mounts. Such a device might
9971004
* be mounted/enabled at a time where udev has not finished probing it yet, and we thus haven't learned about
998-
* it yet. In this case we will set the device unit to "tentative" state. */
1005+
* it yet. In this case we will set the device unit to "tentative" state.
1006+
*
1007+
* This takes a pair of DeviceFound flags parameters. The 'mask' parameter is a bit mask that indicates which
1008+
* bits of 'found' to copy into the per-device DeviceFound flags field. Thus, this function may be used to set
1009+
* and unset individual bits in a single call, while merging partially with previous state. */
9991010

10001011
if ((found & mask) != 0) {
10011012
_cleanup_(udev_device_unrefp) struct udev_device *dev = NULL;
@@ -1039,6 +1050,7 @@ const UnitVTable device_vtable = {
10391050
.load = unit_load_fragment_and_dropin_optional,
10401051

10411052
.coldplug = device_coldplug,
1053+
.catchup = device_catchup,
10421054

10431055
.serialize = device_serialize,
10441056
.deserialize_item = device_deserialize_item,

src/core/device.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,29 @@
1111

1212
typedef struct Device Device;
1313

14+
/* A mask specifying where we have seen the device currently. This is a bitmask because the device might show up
15+
* asynchronously from each other at various places. For example, in very common case a device might already be mounted
16+
* before udev finished probing it (think: a script setting up a loopback block device, formatting it and mounting it
17+
* in quick succession). Hence we need to track precisely where it is already visible and where not. */
1418
typedef enum DeviceFound {
15-
DEVICE_NOT_FOUND = 0,
16-
DEVICE_FOUND_UDEV = 1 << 1,
17-
DEVICE_FOUND_UDEV_DB = 1 << 2,
18-
DEVICE_FOUND_MOUNT = 1 << 3,
19-
DEVICE_FOUND_SWAP = 1 << 4,
19+
DEVICE_NOT_FOUND = 0,
20+
DEVICE_FOUND_UDEV = 1U << 1, /* The device has shown up in the udev database */
21+
DEVICE_FOUND_MOUNT = 1U << 2, /* The device has shown up in /proc/self/mountinfo */
22+
DEVICE_FOUND_SWAP = 1U << 3, /* The device has shown up in /proc/swaps */
23+
DEVICE_FOUND_MASK = DEVICE_FOUND_UDEV|DEVICE_FOUND_MOUNT|DEVICE_FOUND_SWAP,
2024
} DeviceFound;
2125

2226
struct Device {
2327
Unit meta;
2428

2529
char *sysfs;
26-
DeviceFound found;
2730

28-
/* In order to be able to distinguish dependencies on
29-
different device nodes we might end up creating multiple
30-
devices for the same sysfs path. We chain them up here. */
31+
/* In order to be able to distinguish dependencies on different device nodes we might end up creating multiple
32+
* devices for the same sysfs path. We chain them up here. */
3133
LIST_FIELDS(struct Device, same_sysfs);
3234

3335
DeviceState state, deserialized_state;
36+
DeviceFound found, deserialized_found, enumerated_found;
3437

3538
bool bind_mounts;
3639
};

0 commit comments

Comments
 (0)