From 39a18c60d07319ebfcfd476556729c2cadd616d6 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 23 Apr 2012 01:24:04 +0200 Subject: [PATCH] job: serialize jobs properly Jobs were not preserved correctly over a daemon-reload operation. A systemctl process waiting for a job completion received a job removal signal. The job itself changed its id. The job timeout started ticking all over again. This fixes the deficiencies. --- src/core/dbus-job.c | 6 +- src/core/job.c | 162 ++++++++++++++++++++++++++++++++++++++++++-- src/core/job.h | 6 ++ src/core/unit.c | 47 ++++++++++--- src/core/unit.h | 4 +- 5 files changed, 204 insertions(+), 21 deletions(-) diff --git a/src/core/dbus-job.c b/src/core/dbus-job.c index ef839ffb..4b20d495 100644 --- a/src/core/dbus-job.c +++ b/src/core/dbus-job.c @@ -232,7 +232,7 @@ static int job_send_message(Job *j, DBusMessage* (*new_message)(Job *j)) { assert(j); assert(new_message); - if (bus_has_subscriber(j->manager)) { + if (bus_has_subscriber(j->manager) || j->forgot_bus_clients) { m = new_message(j); if (!m) goto oom; @@ -347,7 +347,7 @@ void bus_job_send_change_signal(Job *j) { j->in_dbus_queue = false; } - if (!bus_has_subscriber(j->manager) && !j->bus_client_list) { + if (!bus_has_subscriber(j->manager) && !j->bus_client_list && !j->forgot_bus_clients) { j->sent_dbus_new_signal = true; return; } @@ -366,7 +366,7 @@ oom: void bus_job_send_removed_signal(Job *j) { assert(j); - if (!bus_has_subscriber(j->manager) && !j->bus_client_list) + if (!bus_has_subscriber(j->manager) && !j->bus_client_list && !j->forgot_bus_clients) return; if (!j->sent_dbus_new_signal) diff --git a/src/core/job.c b/src/core/job.c index 07d4fc3c..326460d5 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -47,22 +47,36 @@ JobBusClient* job_bus_client_new(DBusConnection *connection, const char *name) { return cl; } -Job* job_new(Unit *unit, JobType type) { +Job* job_new_raw(Unit *unit) { Job *j; - assert(type < _JOB_TYPE_MAX); + /* used for deserialization */ + assert(unit); - if (!(j = new0(Job, 1))) + j = new0(Job, 1); + if (!j) return NULL; j->manager = unit->manager; - j->id = j->manager->current_job_id++; - j->type = type; j->unit = unit; - j->timer_watch.type = WATCH_INVALID; + return j; +} + +Job* job_new(Unit *unit, JobType type) { + Job *j; + + assert(type < _JOB_TYPE_MAX); + + j = job_new_raw(unit); + if (!j) + return NULL; + + j->id = j->manager->current_job_id++; + j->type = type; + /* We don't link it here, that's what job_dependency() is for */ return j; @@ -105,7 +119,9 @@ void job_uninstall(Job *j) { assert(j->unit->job == j); /* Detach from next 'bigger' objects */ - bus_job_send_removed_signal(j); + /* daemon-reload should be transparent to job observers */ + if (j->manager->n_reloading <= 0) + bus_job_send_removed_signal(j); j->unit->job = NULL; unit_add_to_gc_queue(j->unit); @@ -180,6 +196,18 @@ Job* job_install(Job *j) { return j; } +void job_install_deserialized(Job *j) { + assert(!j->installed); + + if (j->unit->job) { + log_debug("Unit %s already has a job installed. Not installing deserialized job.", j->unit->id); + return; + } + j->unit->job = j; + j->installed = true; + log_debug("Reinstalled deserialized job %s/%s as %u", j->unit->id, job_type_to_string(j->type), (unsigned) j->id); +} + JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts) { JobDependency *l; @@ -733,6 +761,126 @@ void job_timer_event(Job *j, uint64_t n_elapsed, Watch *w) { job_finish_and_invalidate(j, JOB_TIMEOUT); } +int job_serialize(Job *j, FILE *f, FDSet *fds) { + fprintf(f, "job-id=%u\n", j->id); + fprintf(f, "job-type=%s\n", job_type_to_string(j->type)); + fprintf(f, "job-state=%s\n", job_state_to_string(j->state)); + fprintf(f, "job-override=%s\n", yes_no(j->override)); + fprintf(f, "job-sent-dbus-new-signal=%s\n", yes_no(j->sent_dbus_new_signal)); + fprintf(f, "job-ignore-order=%s\n", yes_no(j->ignore_order)); + /* Cannot save bus clients. Just note the fact that we're losing + * them. job_send_message() will fallback to broadcasting. */ + fprintf(f, "job-forgot-bus-clients=%s\n", + yes_no(j->forgot_bus_clients || j->bus_client_list)); + if (j->timer_watch.type == WATCH_JOB_TIMER) { + int copy = fdset_put_dup(fds, j->timer_watch.fd); + if (copy < 0) + return copy; + fprintf(f, "job-timer-watch-fd=%d\n", copy); + } + + /* End marker */ + fputc('\n', f); + return 0; +} + +int job_deserialize(Job *j, FILE *f, FDSet *fds) { + for (;;) { + char line[LINE_MAX], *l, *v; + size_t k; + + if (!fgets(line, sizeof(line), f)) { + if (feof(f)) + return 0; + return -errno; + } + + char_array_0(line); + l = strstrip(line); + + /* End marker */ + if (l[0] == 0) + return 0; + + k = strcspn(l, "="); + + if (l[k] == '=') { + l[k] = 0; + v = l+k+1; + } else + v = l+k; + + if (streq(l, "job-id")) { + if (safe_atou32(v, &j->id) < 0) + log_debug("Failed to parse job id value %s", v); + } else if (streq(l, "job-type")) { + JobType t = job_type_from_string(v); + if (t < 0) + log_debug("Failed to parse job type %s", v); + else + j->type = t; + } else if (streq(l, "job-state")) { + JobState s = job_state_from_string(v); + if (s < 0) + log_debug("Failed to parse job state %s", v); + else + j->state = s; + } else if (streq(l, "job-override")) { + int b = parse_boolean(v); + if (b < 0) + log_debug("Failed to parse job override flag %s", v); + else + j->override = j->override || b; + } else if (streq(l, "job-sent-dbus-new-signal")) { + int b = parse_boolean(v); + if (b < 0) + log_debug("Failed to parse job sent_dbus_new_signal flag %s", v); + else + j->sent_dbus_new_signal = j->sent_dbus_new_signal || b; + } else if (streq(l, "job-ignore-order")) { + int b = parse_boolean(v); + if (b < 0) + log_debug("Failed to parse job ignore_order flag %s", v); + else + j->ignore_order = j->ignore_order || b; + } else if (streq(l, "job-forgot-bus-clients")) { + int b = parse_boolean(v); + if (b < 0) + log_debug("Failed to parse job forgot_bus_clients flag %s", v); + else + j->forgot_bus_clients = j->forgot_bus_clients || b; + } else if (streq(l, "job-timer-watch-fd")) { + int fd; + if (safe_atoi(v, &fd) < 0 || fd < 0 || !fdset_contains(fds, fd)) + log_debug("Failed to parse job-timer-watch-fd value %s", v); + else { + if (j->timer_watch.type == WATCH_JOB_TIMER) + close_nointr_nofail(j->timer_watch.fd); + + j->timer_watch.type = WATCH_JOB_TIMER; + j->timer_watch.fd = fdset_remove(fds, fd); + j->timer_watch.data.job = j; + } + } + } +} + +int job_coldplug(Job *j) { + struct epoll_event ev; + + if (j->timer_watch.type != WATCH_JOB_TIMER) + return 0; + + zero(ev); + ev.data.ptr = &j->timer_watch; + ev.events = EPOLLIN; + + if (epoll_ctl(j->manager->epoll_fd, EPOLL_CTL_ADD, j->timer_watch.fd, &ev) < 0) + return -errno; + + return 0; +} + static const char* const job_state_table[_JOB_STATE_MAX] = { [JOB_WAITING] = "waiting", [JOB_RUNNING] = "running" diff --git a/src/core/job.h b/src/core/job.h index e869856d..9da7e84a 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -141,15 +141,21 @@ struct Job { bool in_dbus_queue:1; bool sent_dbus_new_signal:1; bool ignore_order:1; + bool forgot_bus_clients:1; }; JobBusClient* job_bus_client_new(DBusConnection *connection, const char *name); Job* job_new(Unit *unit, JobType type); +Job* job_new_raw(Unit *unit); void job_free(Job *job); Job* job_install(Job *j); +void job_install_deserialized(Job *j); void job_uninstall(Job *j); void job_dump(Job *j, FILE*f, const char *prefix); +int job_serialize(Job *j, FILE *f, FDSet *fds); +int job_deserialize(Job *j, FILE *f, FDSet *fds); +int job_coldplug(Job *j); JobDependency* job_dependency_new(Job *subject, Job *object, bool matters, bool conflicts); void job_dependency_free(JobDependency *l); diff --git a/src/core/unit.c b/src/core/unit.c index ed3f1763..1f8d52b3 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -2288,8 +2288,10 @@ int unit_serialize(Unit *u, FILE *f, FDSet *fds) { if ((r = UNIT_VTABLE(u)->serialize(u, f, fds)) < 0) return r; - if (u->job) - unit_serialize_item(u, f, "job", job_type_to_string(u->job->type)); + if (u->job) { + fprintf(f, "job\n"); + job_serialize(u->job, f, fds); + } dual_timestamp_serialize(f, "inactive-exit-timestamp", &u->inactive_exit_timestamp); dual_timestamp_serialize(f, "active-enter-timestamp", &u->active_enter_timestamp); @@ -2368,13 +2370,32 @@ int unit_deserialize(Unit *u, FILE *f, FDSet *fds) { v = l+k; if (streq(l, "job")) { - JobType type; - - if ((type = job_type_from_string(v)) < 0) - log_debug("Failed to parse job type value %s", v); - else - u->deserialized_job = type; + if (v[0] == '\0') { + /* new-style serialized job */ + Job *j = job_new_raw(u); + if (!j) + return -ENOMEM; + + r = job_deserialize(j, f, fds); + if (r < 0) { + job_free(j); + return r; + } + job_install_deserialized(j); + r = hashmap_put(u->manager->jobs, UINT32_TO_PTR(j->id), j); + if (r < 0) { + job_free(j); + return r; + } + } else { + /* legacy */ + JobType type = job_type_from_string(v); + if (type < 0) + log_debug("Failed to parse job type value %s", v); + else + u->deserialized_job = type; + } continue; } else if (streq(l, "inactive-exit-timestamp")) { dual_timestamp_deserialize(v, &u->inactive_exit_timestamp); @@ -2450,8 +2471,14 @@ int unit_coldplug(Unit *u) { if ((r = UNIT_VTABLE(u)->coldplug(u)) < 0) return r; - if (u->deserialized_job >= 0) { - if ((r = manager_add_job(u->manager, u->deserialized_job, u, JOB_IGNORE_REQUIREMENTS, false, NULL, NULL)) < 0) + if (u->job) { + r = job_coldplug(u->job); + if (r < 0) + return r; + } else if (u->deserialized_job >= 0) { + /* legacy */ + r = manager_add_job(u->manager, u->deserialized_job, u, JOB_IGNORE_REQUIREMENTS, false, NULL, NULL); + if (r < 0) return r; u->deserialized_job = _JOB_TYPE_INVALID; diff --git a/src/core/unit.h b/src/core/unit.h index d8f4644c..1b777bfa 100644 --- a/src/core/unit.h +++ b/src/core/unit.h @@ -200,7 +200,9 @@ struct Unit { unsigned gc_marker; /* When deserializing, temporarily store the job type for this - * unit here, if there was a job scheduled */ + * unit here, if there was a job scheduled. + * Only for deserializing from a legacy version. New style uses full + * serialized jobs. */ int deserialized_job; /* This is actually of type JobType */ /* Error code when we didn't manage to load the unit (negative) */ -- 2.39.5