From: Lennart Poettering Date: Thu, 23 Sep 2010 13:01:41 +0000 (+0200) Subject: readahead: implement minimal readahead logic based on fanotify(), mincore() and reada... X-Git-Tag: v11~95 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=22be093ffb403a1c474037939ca9b88b1ee39f77;p=systemd readahead: implement minimal readahead logic based on fanotify(), mincore() and readahead() --- diff --git a/.gitignore b/.gitignore index 7cecf1c3..b5fc8d72 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +systemd-readahead-collect +systemd-readahead-replay systemd-reply-password systemd-ask-password-agent systemd-ask-password diff --git a/Makefile.am b/Makefile.am index 0ac6b1ac..2cd3debf 100644 --- a/Makefile.am +++ b/Makefile.am @@ -92,7 +92,9 @@ rootlibexec_PROGRAMS = \ systemd-remount-api-vfs \ systemd-kmsg-syslogd \ systemd-vconsole-setup \ - systemd-reply-password + systemd-reply-password \ + systemd-readahead-collect \ + systemd-readahead-replay noinst_PROGRAMS = \ test-engine \ @@ -699,6 +701,30 @@ systemd_reply_password_SOURCES = \ systemd_reply_password_LDADD = \ libsystemd-basic.la +systemd_readahead_collect_SOURCES = \ + src/readahead-collect.c \ + src/sd-daemon.c \ + src/readahead-common.c + +systemd_readahead_collect_CFLAGS = \ + $(UDEV_CFLAGS) + +systemd_readahead_collect_LDADD = \ + libsystemd-basic.la \ + $(UDEV_LIBS) + +systemd_readahead_replay_SOURCES = \ + src/readahead-replay.c \ + src/sd-daemon.c \ + src/readahead-common.c + +systemd_readahead_replay_CFLAGS = \ + $(UDEV_CFLAGS) + +systemd_readahead_replay_LDADD = \ + libsystemd-basic.la \ + $(UDEV_LIBS) + systemd_cgls_SOURCES = \ src/cgls.c \ src/cgroup-show.c \ diff --git a/configure.ac b/configure.ac index e3c1fdde..da627a00 100644 --- a/configure.ac +++ b/configure.ac @@ -27,6 +27,7 @@ AM_INIT_AUTOMAKE([foreign 1.11 -Wall -Wno-portability silent-rules tar-pax subdi AC_SUBST(PACKAGE_URL, [http://www.freedesktop.org/wiki/Software/systemd]) AC_CANONICAL_HOST +AC_DEFINE_UNQUOTED([CANONICAL_HOST], "$host", [Canonical host string.]) AM_SILENT_RULES([yes]) diff --git a/src/hashmap.c b/src/hashmap.c index 51f00131..4b187057 100644 --- a/src/hashmap.c +++ b/src/hashmap.c @@ -476,6 +476,21 @@ void* hashmap_steal_first(Hashmap *h) { return data; } +void* hashmap_steal_first_key(Hashmap *h) { + void *key; + + if (!h) + return NULL; + + if (!h->iterate_list_head) + return NULL; + + key = (void*) h->iterate_list_head->key; + remove_entry(h, h->iterate_list_head); + + return key; +} + unsigned hashmap_size(Hashmap *h) { if (!h) diff --git a/src/hashmap.h b/src/hashmap.h index c48d6b31..ac5a8ae0 100644 --- a/src/hashmap.h +++ b/src/hashmap.h @@ -72,6 +72,7 @@ void *hashmap_iterate_skip(Hashmap *h, const void *key, Iterator *i); void hashmap_clear(Hashmap *h); void *hashmap_steal_first(Hashmap *h); +void *hashmap_steal_first_key(Hashmap *h); void* hashmap_first(Hashmap *h); void* hashmap_last(Hashmap *h); diff --git a/src/linux/fanotify.h b/src/linux/fanotify.h new file mode 100644 index 00000000..63531a6b --- /dev/null +++ b/src/linux/fanotify.h @@ -0,0 +1,98 @@ +#ifndef _LINUX_FANOTIFY_H +#define _LINUX_FANOTIFY_H + +#include + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_CLOSE_WRITE 0x00000008 /* Unwrittable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Writtable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* interested in child events */ + +/* FIXME currently Q's have no limit.... */ +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ + +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ + +/* flags used for fanotify_init() */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 + +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY) + +/* + * All of the events - we build the list by hand so that we can add flags in + * the future and not break backward compatibility. Apps will get only the + * events that they originally wanted. Be sure to add new events here! + */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All events which require a permission response from userspace + */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 2 + +struct fanotify_event_metadata { + __u32 event_len; + __u32 vers; + __u64 mask; + __s32 fd; + __s32 pid; +} __attribute__ ((packed)); + +struct fanotify_response { + __s32 fd; + __u32 response; +} __attribute__ ((packed)); + +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + +#endif /* _LINUX_FANOTIFY_H */ diff --git a/src/macro.h b/src/macro.h index 44b48177..12ccbb15 100644 --- a/src/macro.h +++ b/src/macro.h @@ -27,6 +27,8 @@ #include #include +#define PAGE_SIZE 4096 + #define _printf_attr_(a,b) __attribute__ ((format (printf, a, b))) #define _sentinel_ __attribute__ ((sentinel)) #define _noreturn_ __attribute__((noreturn)) @@ -49,6 +51,10 @@ static inline size_t ALIGN(size_t l) { return ((l + sizeof(void*) - 1) & ~(sizeof(void*) - 1)); } +static inline size_t PAGE_ALIGN(size_t l) { + return ((l + PAGE_SIZE - 1) & ~(PAGE_SIZE -1)); +} + #define ELEMENTSOF(x) (sizeof(x)/sizeof((x)[0])) #define MAX(a,b) \ diff --git a/src/missing.h b/src/missing.h index 418dbb8c..defe885e 100644 --- a/src/missing.h +++ b/src/missing.h @@ -76,4 +76,29 @@ static inline int pivot_root(const char *new_root, const char *put_old) { return syscall(SYS_pivot_root, new_root, put_old); } +#ifdef __x86_64__ +#ifndef __NR_fanotify_init +#define __NR_fanotify_init 300 +#endif +#ifndef __NR_fanotify_mark +#define __NR_fanotify_mark 301 +#endif +#else +#ifndef __NR_fanotify_init +#define __NR_fanotify_init 338 +#endif +#ifndef __NR_fanotify_mark +#define __NR_fanotify_mark 339 +#endif +#endif + +static inline int fanotify_init(unsigned int flags, unsigned int event_f_flags) { + return syscall(__NR_fanotify_init, flags, event_f_flags); +} + +static inline int fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, + int dfd, const char *pathname) { + return syscall(__NR_fanotify_mark, fanotify_fd, flags, mask, dfd, pathname); +} + #endif diff --git a/src/readahead-collect.c b/src/readahead-collect.c new file mode 100644 index 00000000..93a04521 --- /dev/null +++ b/src/readahead-collect.c @@ -0,0 +1,437 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2010 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "missing.h" +#include "util.h" +#include "set.h" +#include "sd-daemon.h" +#include "ioprio.h" +#include "readahead-common.h" + +/* + fixme: + + - BTRFS_IOC_DEFRAG +*/ + +#define MINCORE_VEC_SIZE (READAHEAD_FILE_SIZE_MAX/PAGE_SIZE) + +static int pack_file(FILE *pack, const char *fn) { + struct stat st; + void *start = MAP_FAILED; + uint8_t vec[MINCORE_VEC_SIZE]; + uint32_t b, c; + size_t l, pages; + bool mapped; + int r = 0, fd = -1, k; + + assert(pack); + assert(fn); + + if ((fd = open(fn, O_RDONLY|O_CLOEXEC|O_NOATIME|O_NOCTTY|O_NOFOLLOW)) < 0) { + log_warning("open(%s) failed: %m", fn); + r = -errno; + goto finish; + } + + if ((k = file_verify(fd, fn, &st)) <= 0) { + r = k; + goto finish; + } + + l = PAGE_ALIGN(st.st_size); + if ((start = mmap(NULL, l, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) { + log_warning("mmap(%s) failed: %m", fn); + r = -errno; + goto finish; + } + + if (mincore(start, l, vec) < 0) { + log_warning("mincore(%s) failed: %m", fn); + r = -errno; + goto finish; + } + + fputs(fn, pack); + fputc('\n', pack); + + pages = l / PAGE_SIZE; + mapped = false; + for (c = 0; c < pages; c++) { + bool new_mapped = (vec[c] & 1); + + if (!mapped && new_mapped) + b = c; + else if (mapped && !new_mapped) { + fwrite(&b, sizeof(b), 1, pack); + fwrite(&c, sizeof(c), 1, pack); + + log_debug("%s: page %u to %u", fn, b, c); + } + + mapped = new_mapped; + } + + /* We don't write any range data if we should read the entire file */ + if (mapped && b > 0) { + fwrite(&b, sizeof(b), 1, pack); + fwrite(&c, sizeof(c), 1, pack); + + log_debug("%s: page %u to %u", fn, b, c); + } + + /* End marker */ + b = 0; + fwrite(&b, sizeof(b), 1, pack); + fwrite(&b, sizeof(b), 1, pack); + +finish: + if (start != MAP_FAILED) + munmap(start, l); + + if (fd >= 0) + close_nointr_nofail(fd); + + return r; +} + +static unsigned long fd_first_block(int fd) { + struct { + struct fiemap fiemap; + struct fiemap_extent extent; + } data; + + zero(data); + data.fiemap.fm_length = ~0ULL; + data.fiemap.fm_extent_count = 1; + + if (ioctl(fd, FS_IOC_FIEMAP, &data) < 0) + return 0; + + if (data.fiemap.fm_mapped_extents <= 0) + return 0; + + if (data.fiemap.fm_extents[0].fe_flags & FIEMAP_EXTENT_UNKNOWN) + return 0; + + return (unsigned long) data.fiemap.fm_extents[0].fe_physical; +} + +struct item { + const char *path; + unsigned long block; +}; + +static int qsort_compare(const void *a, const void *b) { + const struct item *i, *j; + + i = a; + j = b; + + if (i->block < j->block) + return -1; + if (i->block > j->block) + return 1; + + return strcmp(i->path, j->path); +} + +static int collect(const char *root) { + enum { + FD_FANOTIFY, + FD_SIGNAL, + _FD_MAX + }; + struct pollfd pollfd[_FD_MAX]; + int fanotify_fd = -1, signal_fd = -1, r = 0; + pid_t my_pid; + Hashmap *files = NULL; + Iterator i; + char *p, *q; + sigset_t mask; + FILE *pack = NULL; + char *pack_fn_new = NULL, *pack_fn = NULL; + bool on_ssd; + + assert(root); + + if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(), IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) < 0) + log_warning("Failed to set IDLE IO priority class: %m"); + + assert_se(sigemptyset(&mask) == 0); + sigset_add_many(&mask, SIGINT, SIGTERM, -1); + assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); + + if ((signal_fd = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0) { + log_error("signalfd(): %m"); + r = -errno; + goto finish; + } + + if (!(files = hashmap_new(string_hash_func, string_compare_func))) { + log_error("Failed to allocate set."); + r = -ENOMEM; + goto finish; + } + + if ((fanotify_fd = fanotify_init(FAN_CLOEXEC, O_RDONLY|O_LARGEFILE|O_CLOEXEC|O_NOATIME)) < 0) { + log_error("Failed to create fanotify object: %m"); + r = -errno; + goto finish; + } + + if (fanotify_mark(fanotify_fd, FAN_MARK_ADD|FAN_MARK_MOUNT, FAN_OPEN, AT_FDCWD, root) < 0) { + log_error("Failed to mark %s: %m", root); + r = -errno; + goto finish; + } + + my_pid = getpid(); + + zero(pollfd); + pollfd[FD_FANOTIFY].fd = fanotify_fd; + pollfd[FD_FANOTIFY].events = POLLIN; + pollfd[FD_SIGNAL].fd = signal_fd; + pollfd[FD_SIGNAL].events = POLLIN; + + sd_notify(0, + "READY=1\n" + "STATUS=Collecting readahead data"); + + log_debug("Collecting..."); + + for (;;) { + union { + struct fanotify_event_metadata metadata; + char buffer[4096]; + } data; + ssize_t n; + struct fanotify_event_metadata *m; + + if (poll(pollfd, _FD_MAX, -1) < 0) { + + if (errno == EINTR) + continue; + + log_error("poll(): %m"); + r = -errno; + goto finish; + } + + if (pollfd[FD_SIGNAL].revents != 0) + break; + + if ((n = read(fanotify_fd, &data, sizeof(data))) < 0) { + + if (errno == EINTR || errno == EAGAIN) + continue; + + log_error("Failed to read event: %m"); + r = -errno; + goto finish; + } + + m = &data.metadata; + while (FAN_EVENT_OK(m, n)) { + + if (m->pid != my_pid && m->fd >= 0) { + char fn[PATH_MAX]; + int k; + + snprintf(fn, sizeof(fn), "/proc/self/fd/%i", m->fd); + char_array_0(fn); + + if ((k = readlink_malloc(fn, &p)) >= 0) { + + if (hashmap_get(files, p)) + /* Already read */ + free(p); + else { + unsigned long ul; + + ul = fd_first_block(m->fd); + + if ((k = hashmap_put(files, p, ULONG_TO_PTR(ul))) < 0) { + + if (k != -EEXIST) + log_warning("set_put() failed: %s", strerror(-k)); + + free(p); + } + } + + } else + log_warning("readlink(%s) failed: %s", fn, strerror(-k)); + } + + if (m->fd) + close_nointr_nofail(m->fd); + + m = FAN_EVENT_NEXT(m, n); + } + + } + + if (fanotify_fd >= 0) { + close_nointr_nofail(fanotify_fd); + fanotify_fd = -1; + } + + log_debug("Writing Pack File..."); + + on_ssd = fs_on_ssd(root); + log_debug("On SSD: %s", yes_no(on_ssd)); + + asprintf(&pack_fn, "%s/.readahead", root); + asprintf(&pack_fn_new, "%s/.readahead.new", root); + + if (!pack_fn || !pack_fn_new) { + log_error("Out of memory"); + r = -ENOMEM; + goto finish; + } + + if (!(pack = fopen(pack_fn_new, "we"))) { + log_error("Failed to open pack file: %m"); + r = -errno; + goto finish; + } + + fputs(CANONICAL_HOST "\n", pack); + putc(on_ssd ? 'S' : 'R', pack); + + if (on_ssd) { + + /* On SSD, just write things out in the order the + * files where accessed */ + + HASHMAP_FOREACH_KEY(q, p, files, i) + pack_file(pack, p); + } else { + struct item *ordered, *j; + unsigned k, n; + + /* On rotating media, order things by the block + * numbers */ + + log_debug("Ordering..."); + + n = hashmap_size(files); + if (!(ordered = new(struct item, n))) { + log_error("Out of memory"); + r = -ENOMEM; + goto finish; + } + + j = ordered; + HASHMAP_FOREACH_KEY(q, p, files, i) { + j->path = p; + j->block = PTR_TO_ULONG(q); + j++; + } + + assert(ordered + n == j); + + qsort(ordered, n, sizeof(struct item), qsort_compare); + + for (k = 0; k < n; k++) + pack_file(pack, ordered[k].path); + + free(ordered); + } + + log_debug("Finalizing..."); + + fflush(pack); + + if (ferror(pack)) { + log_error("Failed to write pack file."); + r = -EIO; + goto finish; + } + + if (rename(pack_fn_new, pack_fn) < 0) { + log_error("Failed to rename readahead file: %m"); + r = -errno; + goto finish; + } + + fclose(pack); + pack = NULL; + + log_debug("Done."); + +finish: + if (fanotify_fd >= 0) + close_nointr_nofail(fanotify_fd); + + if (signal_fd >= 0) + close_nointr_nofail(signal_fd); + + if (pack) { + fclose(pack); + unlink(pack_fn_new); + } + + free(pack_fn_new); + free(pack_fn); + + while ((p = hashmap_steal_first_key(files))) + free(q); + + hashmap_free(files); + + return r; +} + +int main(int argc, char *argv[]) { + /* log_set_target(LOG_TARGET_SYSLOG_OR_KMSG); */ + log_parse_environment(); + log_open(); + + log_set_max_level(LOG_DEBUG); + + if (collect("/") < 0) + return 1; + + return 0; +} diff --git a/src/readahead-common.c b/src/readahead-common.c new file mode 100644 index 00000000..8533717d --- /dev/null +++ b/src/readahead-common.c @@ -0,0 +1,107 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2010 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include + +#include "log.h" +#include "readahead-common.h" +#include "util.h" + +int file_verify(int fd, const char *fn, struct stat *st) { + assert(fd >= 0); + assert(fn); + assert(st); + + if (fstat(fd, st) < 0) { + log_warning("fstat(%s) failed: %m", fn); + return -errno; + } + + if (!S_ISREG(st->st_mode)) { + log_debug("Not preloading special file %s", fn); + return 0; + } + + if (st->st_size <= 0 || st->st_size > READAHEAD_FILE_SIZE_MAX) { + log_debug("Not preloading file %s with size out of bounds %zi", fn, st->st_size); + return 0; + } + + return 1; +} + +int fs_on_ssd(const char *p) { + struct stat st; + struct udev *udev = NULL; + struct udev_device *udev_device = NULL, *look_at = NULL; + bool b = false; + const char *devtype, *rotational, *model, *id; + + assert(p); + + if (stat(p, &st) < 0) + return -errno; + + if (!(udev = udev_new())) + return -ENOMEM; + + if (!(udev_device = udev_device_new_from_devnum(udev, 'b', st.st_dev))) + goto finish; + + if ((devtype = udev_device_get_property_value(udev_device, "DEVTYPE")) && + streq(devtype, "partition")) + look_at = udev_device_get_parent(udev_device); + else + look_at = udev_device; + + if (!look_at) + goto finish; + + /* First, try high-level property */ + if ((id = udev_device_get_property_value(look_at, "ID_SSD"))) { + b = streq(id, "1"); + goto finish; + } + + /* Second, try kernel attribute */ + if ((rotational = udev_device_get_sysattr_value(look_at, "queue/rotational"))) + if ((b = streq(rotational, "0"))) + goto finish; + + /* Finally, fallback to heuristics */ + if (!(look_at = udev_device_get_parent(look_at))) + goto finish; + + if ((model = udev_device_get_sysattr_value(look_at, "model"))) + b = !!strstr(model, "SSD"); + +finish: + if (udev_device) + udev_device_unref(udev_device); + + if (udev) + udev_unref(udev); + + return b; +} diff --git a/src/readahead-common.h b/src/readahead-common.h new file mode 100644 index 00000000..da6a74ea --- /dev/null +++ b/src/readahead-common.h @@ -0,0 +1,33 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#ifndef fooreadaheadcommonhfoo +#define fooreadaheadcommonhfoo + +/*** + This file is part of systemd. + + Copyright 2010 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include + +#define READAHEAD_FILE_SIZE_MAX (128*1024*1024) + +int file_verify(int fd, const char *fn, struct stat *st); + +int fs_on_ssd(const char *p); + +#endif diff --git a/src/readahead-replay.c b/src/readahead-replay.c new file mode 100644 index 00000000..b886857f --- /dev/null +++ b/src/readahead-replay.c @@ -0,0 +1,214 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2010 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with systemd; If not, see . +***/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "missing.h" +#include "util.h" +#include "set.h" +#include "sd-daemon.h" +#include "ioprio.h" +#include "readahead-common.h" + +static int unpack_file(FILE *pack) { + char fn[PATH_MAX]; + int r = 0, fd = -1; + bool any = false; + struct stat st; + + assert(pack); + + if (!fgets(fn, sizeof(fn), pack)) + return 0; + + char_array_0(fn); + truncate_nl(fn); + + if ((fd = open(fn, O_RDONLY|O_CLOEXEC|O_NOATIME|O_NOCTTY|O_NOFOLLOW)) < 0) + log_warning("open(%s) failed: %m", fn); + else if (file_verify(fd, fn, &st) <= 0) { + close_nointr_nofail(fd); + fd = -1; + } + + for (;;) { + uint32_t b, c; + + if (fread(&b, sizeof(b), 1, pack) != 1 || + fread(&c, sizeof(c), 1, pack) != 1) { + log_error("Premature end of pack file."); + r = -EIO; + goto finish; + } + + if (b == 0 && c == 0) + break; + + if (c <= b) { + log_error("Invalid pack file."); + r = -EIO; + goto finish; + } + + log_debug("%s: page %u to %u", fn, b, c); + + any = true; + + if (fd >= 0) + if (readahead(fd, b * PAGE_SIZE, (c - b) * PAGE_SIZE) < 0) { + log_warning("readahead() failed: %m"); + goto finish; + } + } + + if (!any && fd >= 0) { + /* if no range is encoded in the pack file this is + * intended to mean that the whole file shall be + * read */ + + if (readahead(fd, 0, st.st_size) < 0) { + log_warning("readahead() failed: %m"); + goto finish; + } + } + +finish: + if (fd >= 0) + close_nointr_nofail(fd); + + return r; +} + +static int replay(const char *root) { + FILE *pack; + char line[LINE_MAX]; + int r = 0; + char *pack_fn = NULL, c; + bool on_ssd; + int prio; + + assert(root); + + if (asprintf(&pack_fn, "%s/.readahead", root) < 0) { + log_error("Out of memory"); + r = -ENOMEM; + goto finish; + } + + if ((!(pack = fopen(pack_fn, "re")))) { + if (errno == -ENOENT) + log_debug("No pack file found."); + else { + log_error("Failed to open pack file: %m"); + r = -errno; + } + + goto finish; + } + + if (!(fgets(line, sizeof(line), pack))) { + log_error("Premature end of pack file."); + r = -EIO; + goto finish; + } + + char_array_0(line); + + if (!streq(line, CANONICAL_HOST "\n")) { + log_debug("Pack file host type mismatch."); + goto finish; + } + + if ((c = getc(pack)) == EOF) { + log_debug("Premature end of pack file."); + r = -EIO; + goto finish; + } + + /* We do not retest SSD here, so that we can start replaying + * before udev is up.*/ + on_ssd = c == 'S'; + log_debug("On SSD: %s", yes_no(on_ssd)); + + if (on_ssd) + prio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0); + else + prio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 7); + + if (ioprio_set(IOPRIO_WHO_PROCESS, getpid(), prio) < 0) + log_warning("Failed to set IDLE IO priority class: %m"); + + sd_notify(0, + "READY=1\n" + "STATUS=Replaying readahead data"); + + log_debug("Replaying..."); + + while (!feof(pack) && !ferror(pack)) { + int k; + + if ((k = unpack_file(pack)) < 0) { + r = k; + goto finish; + } + } + + if (ferror(pack)) { + log_error("Failed to read pack file."); + r = -EIO; + goto finish; + } + + log_debug("Done."); + +finish: + if (pack) + fclose(pack); + + free(pack_fn); + + return r; +} + +int main(int argc, char*argv[]) { + /* log_set_target(LOG_TARGET_SYSLOG_OR_KMSG); */ + log_parse_environment(); + log_open(); + + log_set_max_level(LOG_DEBUG); + + if (replay("/") < 0) + return 1; + + return 0; +}