From: Michael Olbrich Date: Wed, 1 Feb 2012 16:17:12 +0000 (+0100) Subject: service: add watchdog timestamp X-Git-Tag: v40~18 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a6927d7ffc18c51fbb9940f5f1e89f5c7695ed63;p=systemd service: add watchdog timestamp This patch adds WatchdogTimestamp[Monotonic] to the systemd service D-Bus API. The timestamp is updated to the current time when the service calls 'sd_nofity("WATCHDOG=1\n")'. Using a timestamp instead of an 'alive' flag has two advantages: 1. No timeout is needed to define when a service is no longer alive. This simplifies both configuration (no timeout value) and implementation (no timeout event). 2. It is more robust. A 'dead' service might not be detected should systemd 'forget' to reset an 'alive' flag. It is much less likely to get a valid new timestamp if a service died. --- diff --git a/man/sd_notify.xml b/man/sd_notify.xml index 02091460..9797a5f8 100644 --- a/man/sd_notify.xml +++ b/man/sd_notify.xml @@ -151,6 +151,18 @@ itself. Example: "MAINPID=4711" + + + WATCHDOG=1 + + Tells systemd to + update the watchdog timestamp. + Services using this feature should do + this in regular intervals. A watchdog + framework can use the timestamps to + detect failed + services. + It is recommended to prefix variable names that diff --git a/src/dbus-service.c b/src/dbus-service.c index e1f63709..5d95b379 100644 --- a/src/dbus-service.c +++ b/src/dbus-service.c @@ -43,6 +43,8 @@ " \n" \ " \n" \ " \n" \ + " \n" \ + " \n" \ BUS_EXEC_COMMAND_INTERFACE("ExecStartPre") \ BUS_EXEC_COMMAND_INTERFACE("ExecStart") \ BUS_EXEC_COMMAND_INTERFACE("ExecStartPost") \ @@ -86,6 +88,8 @@ const char bus_service_invalidating_properties[] = "ExecStop\0" "ExecStopPost\0" "ExecMain\0" + "WatchdogTimestamp\0" + "WatchdogTimestampMonotonic\0" "MainPID\0" "ControlPID\0" "StatusText\0"; @@ -106,32 +110,34 @@ static const BusProperty bus_exec_main_status_properties[] = { }; static const BusProperty bus_service_properties[] = { - { "Type", bus_service_append_type, "s", offsetof(Service, type) }, - { "Restart", bus_service_append_restart, "s", offsetof(Service, restart) }, - { "PIDFile", bus_property_append_string, "s", offsetof(Service, pid_file), true }, - { "NotifyAccess", bus_service_append_notify_access, "s", offsetof(Service, notify_access) }, - { "RestartUSec", bus_property_append_usec, "t", offsetof(Service, restart_usec) }, - { "TimeoutUSec", bus_property_append_usec, "t", offsetof(Service, timeout_usec) }, + { "Type", bus_service_append_type, "s", offsetof(Service, type) }, + { "Restart", bus_service_append_restart, "s", offsetof(Service, restart) }, + { "PIDFile", bus_property_append_string, "s", offsetof(Service, pid_file), true }, + { "NotifyAccess", bus_service_append_notify_access, "s", offsetof(Service, notify_access) }, + { "RestartUSec", bus_property_append_usec, "t", offsetof(Service, restart_usec) }, + { "TimeoutUSec", bus_property_append_usec, "t", offsetof(Service, timeout_usec) }, + { "WatchdogTimestamp", bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.realtime) }, + { "WatchdogTimestampMonotonic",bus_property_append_usec, "t", offsetof(Service, watchdog_timestamp.monotonic) }, BUS_EXEC_COMMAND_PROPERTY("ExecStartPre", offsetof(Service, exec_command[SERVICE_EXEC_START_PRE]), true ), BUS_EXEC_COMMAND_PROPERTY("ExecStart", offsetof(Service, exec_command[SERVICE_EXEC_START]), true ), BUS_EXEC_COMMAND_PROPERTY("ExecStartPost", offsetof(Service, exec_command[SERVICE_EXEC_START_POST]), true ), BUS_EXEC_COMMAND_PROPERTY("ExecReload", offsetof(Service, exec_command[SERVICE_EXEC_RELOAD]), true ), BUS_EXEC_COMMAND_PROPERTY("ExecStop", offsetof(Service, exec_command[SERVICE_EXEC_STOP]), true ), BUS_EXEC_COMMAND_PROPERTY("ExecStopPost", offsetof(Service, exec_command[SERVICE_EXEC_STOP_POST]), true ), - { "PermissionsStartOnly", bus_property_append_bool, "b", offsetof(Service, permissions_start_only) }, - { "RootDirectoryStartOnly", bus_property_append_bool, "b", offsetof(Service, root_directory_start_only) }, - { "RemainAfterExit", bus_property_append_bool, "b", offsetof(Service, remain_after_exit) }, - { "GuessMainPID", bus_property_append_bool, "b", offsetof(Service, guess_main_pid) }, - { "MainPID", bus_property_append_pid, "u", offsetof(Service, main_pid) }, - { "ControlPID", bus_property_append_pid, "u", offsetof(Service, control_pid) }, - { "BusName", bus_property_append_string, "s", offsetof(Service, bus_name), true }, - { "StatusText", bus_property_append_string, "s", offsetof(Service, status_text), true }, + { "PermissionsStartOnly", bus_property_append_bool, "b", offsetof(Service, permissions_start_only) }, + { "RootDirectoryStartOnly", bus_property_append_bool, "b", offsetof(Service, root_directory_start_only) }, + { "RemainAfterExit", bus_property_append_bool, "b", offsetof(Service, remain_after_exit) }, + { "GuessMainPID", bus_property_append_bool, "b", offsetof(Service, guess_main_pid) }, + { "MainPID", bus_property_append_pid, "u", offsetof(Service, main_pid) }, + { "ControlPID", bus_property_append_pid, "u", offsetof(Service, control_pid) }, + { "BusName", bus_property_append_string, "s", offsetof(Service, bus_name), true }, + { "StatusText", bus_property_append_string, "s", offsetof(Service, status_text), true }, #ifdef HAVE_SYSV_COMPAT - { "SysVRunLevels", bus_property_append_string, "s", offsetof(Service, sysv_runlevels), true }, - { "SysVStartPriority", bus_property_append_int, "i", offsetof(Service, sysv_start_priority) }, - { "SysVPath", bus_property_append_string, "s", offsetof(Service, sysv_path), true }, + { "SysVRunLevels", bus_property_append_string, "s", offsetof(Service, sysv_runlevels), true }, + { "SysVStartPriority", bus_property_append_int, "i", offsetof(Service, sysv_start_priority) }, + { "SysVPath", bus_property_append_string, "s", offsetof(Service, sysv_path), true }, #endif - { "FsckPassNo", bus_property_append_int, "i", offsetof(Service, fsck_passno) }, + { "FsckPassNo", bus_property_append_int, "i", offsetof(Service, fsck_passno) }, { NULL, } }; diff --git a/src/service.c b/src/service.c index 4dcd3060..d2a2dfc6 100644 --- a/src/service.c +++ b/src/service.c @@ -205,6 +205,19 @@ static void service_connection_unref(Service *s) { unit_ref_unset(&s->accept_socket); } +static void service_stop_watchdog(Service *s) { + assert(s); + + s->watchdog_timestamp.realtime = 0; + s->watchdog_timestamp.monotonic = 0; +} + +static void service_reset_watchdog(Service *s) { + assert(s); + + dual_timestamp_get(&s->watchdog_timestamp); +} + static void service_done(Unit *u) { Service *s = SERVICE(u); @@ -1476,6 +1489,9 @@ static void service_set_state(Service *s, ServiceState state) { service_connection_unref(s); } + if (state == SERVICE_STOP) + service_stop_watchdog(s); + /* For the inactive states unit_notify() will trim the cgroup, * but for exit we have to do that ourselves... */ if (state == SERVICE_EXITED && UNIT(s)->manager->n_reloading <= 0) @@ -2411,6 +2427,8 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) { unit_serialize_item_format(u, f, "main-exec-status-status", "%i", s->main_exec_status.status); } } + if (dual_timestamp_is_set(&s->watchdog_timestamp)) + dual_timestamp_serialize(f, "watchdog-timestamp", &s->watchdog_timestamp); return 0; } @@ -2511,6 +2529,8 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value, dual_timestamp_deserialize(value, &s->main_exec_status.start_timestamp); else if (streq(key, "main-exec-status-exit")) dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp); + else if (streq(key, "watchdog-timestamp")) + dual_timestamp_deserialize(value, &s->watchdog_timestamp); else log_debug("Unknown serialization key '%s'", key); @@ -3069,6 +3089,10 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) { } } + if (strv_find(tags, "WATCHDOG=1")) { + log_debug("%s: got WATCHDOG=1", u->id); + service_reset_watchdog(s); + } /* Notify clients about changed status or main pid */ unit_add_to_dbus_queue(u); diff --git a/src/service.h b/src/service.h index 0b4f8be8..dbae68b8 100644 --- a/src/service.h +++ b/src/service.h @@ -100,6 +100,8 @@ struct Service { usec_t restart_usec; usec_t timeout_usec; + dual_timestamp watchdog_timestamp; + ExecCommand* exec_command[_SERVICE_EXEC_COMMAND_MAX]; ExecContext exec_context; diff --git a/src/systemd/sd-daemon.h b/src/systemd/sd-daemon.h index eb2a6065..7b664bf4 100644 --- a/src/systemd/sd-daemon.h +++ b/src/systemd/sd-daemon.h @@ -217,6 +217,11 @@ int sd_is_mq(int fd, const char *path); MAINPID=... The main pid of a daemon, in case systemd did not fork off the process itself. Example: "MAINPID=4711" + WATCHDOG=1 Tells systemd to update the watchdog timestamp. + Services using this feature should do this in + regular intervals. A watchdog framework can use the + timestamps to detect failed services. + Daemons can choose to send additional variables. However, it is recommended to prefix variable names not listed above with X_.