From: Mathieu Desnoyers Date: Fri, 19 Oct 2007 06:41:06 +0000 (-0700) Subject: Linux Kernel Markers X-Git-Tag: v2.6.24-rc1~148 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8256e47cdc8923e9959eb1d7f95d80da538add80;p=linux-2.6 Linux Kernel Markers The marker activation functions sits in kernel/marker.c. A hash table is used to keep track of the registered probes and armed markers, so the markers within a newly loaded module that should be active can be activated at module load time. marker_query has been removed. marker_get_first, marker_get_next and marker_release should be used as iterators on the markers. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mathieu Desnoyers Acked-by: "Frank Ch. Eigler" Cc: Christoph Hellwig Cc: Rusty Russell Cc: Mike Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5615440027..9f584cc5c5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -12,7 +12,11 @@ /* .data section */ #define DATA_DATA \ *(.data) \ - *(.data.init.refok) + *(.data.init.refok) \ + . = ALIGN(8); \ + VMLINUX_SYMBOL(__start___markers) = .; \ + *(__markers) \ + VMLINUX_SYMBOL(__stop___markers) = .; #define RO_DATA(align) \ . = ALIGN((align)); \ @@ -20,6 +24,7 @@ VMLINUX_SYMBOL(__start_rodata) = .; \ *(.rodata) *(.rodata.*) \ *(__vermagic) /* Kernel version magic */ \ + *(__markers_strings) /* Markers: strings */ \ } \ \ .rodata1 : AT(ADDR(.rodata1) - LOAD_OFFSET) { \ diff --git a/include/linux/marker.h b/include/linux/marker.h new file mode 100644 index 0000000000..8038d89b83 --- /dev/null +++ b/include/linux/marker.h @@ -0,0 +1,130 @@ +#ifndef _LINUX_MARKER_H +#define _LINUX_MARKER_H + +/* + * Code markup for dynamic and static tracing. + * + * See Documentation/marker.txt. + * + * (C) Copyright 2006 Mathieu Desnoyers + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#include + +struct module; +struct marker; + +/** + * marker_probe_func - Type of a marker probe function + * @mdata: pointer of type struct marker + * @private_data: caller site private data + * @fmt: format string + * @...: variable argument list + * + * Type of marker probe functions. They receive the mdata and need to parse the + * format string to recover the variable argument list. + */ +typedef void marker_probe_func(const struct marker *mdata, + void *private_data, const char *fmt, ...); + +struct marker { + const char *name; /* Marker name */ + const char *format; /* Marker format string, describing the + * variable argument list. + */ + char state; /* Marker state. */ + marker_probe_func *call;/* Probe handler function pointer */ + void *private; /* Private probe data */ +} __attribute__((aligned(8))); + +#ifdef CONFIG_MARKERS + +/* + * Note : the empty asm volatile with read constraint is used here instead of a + * "used" attribute to fix a gcc 4.1.x bug. + * Make sure the alignment of the structure in the __markers section will + * not add unwanted padding between the beginning of the section and the + * structure. Force alignment to the same alignment as the section start. + */ +#define __trace_mark(name, call_data, format, args...) \ + do { \ + static const char __mstrtab_name_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name; \ + static const char __mstrtab_format_##name[] \ + __attribute__((section("__markers_strings"))) \ + = format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_name_##name, __mstrtab_format_##name, \ + 0, __mark_empty_function, NULL }; \ + asm volatile("" : : "i" (&__mark_##name)); \ + __mark_check_format(format, ## args); \ + if (unlikely(__mark_##name.state)) { \ + preempt_disable(); \ + (*__mark_##name.call) \ + (&__mark_##name, call_data, \ + format, ## args); \ + preempt_enable(); \ + } \ + } while (0) + +extern void marker_update_probe_range(struct marker *begin, + struct marker *end, struct module *probe_module, int *refcount); +#else /* !CONFIG_MARKERS */ +#define __trace_mark(name, call_data, format, args...) \ + __mark_check_format(format, ## args) +static inline void marker_update_probe_range(struct marker *begin, + struct marker *end, struct module *probe_module, int *refcount) +{ } +#endif /* CONFIG_MARKERS */ + +/** + * trace_mark - Marker + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker. + */ +#define trace_mark(name, format, args...) \ + __trace_mark(name, NULL, format, ## args) + +#define MARK_MAX_FORMAT_LEN 1024 + +/** + * MARK_NOARGS - Format string for a marker with no argument. + */ +#define MARK_NOARGS " " + +/* To be used for string format validity checking with gcc */ +static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...) +{ +} + +extern marker_probe_func __mark_empty_function; + +/* + * Connect a probe to a marker. + * private data pointer must be a valid allocated memory address, or NULL. + */ +extern int marker_probe_register(const char *name, const char *format, + marker_probe_func *probe, void *private); + +/* + * Returns the private data given to marker_probe_register. + */ +extern void *marker_probe_unregister(const char *name); +/* + * Unregister a marker by providing the registered private data. + */ +extern void *marker_probe_unregister_private_data(void *private); + +extern int marker_arm(const char *name); +extern int marker_disarm(const char *name); +extern void *marker_get_private_data(const char *name); + +#endif diff --git a/include/linux/module.h b/include/linux/module.h index 642f325e49..2cbc0b87e3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -354,6 +355,10 @@ struct module /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; +#ifdef CONFIG_MARKERS + struct marker *markers; + unsigned int num_markers; +#endif }; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} @@ -457,6 +462,8 @@ int unregister_module_notifier(struct notifier_block * nb); extern void print_modules(void); +extern void module_update_markers(struct module *probe_module, int *refcount); + #else /* !CONFIG_MODULES... */ #define EXPORT_SYMBOL(sym) #define EXPORT_SYMBOL_GPL(sym) @@ -556,6 +563,11 @@ static inline void print_modules(void) { } +static inline void module_update_markers(struct module *probe_module, + int *refcount) +{ +} + #endif /* CONFIG_MODULES */ struct device_driver; diff --git a/kernel/Kconfig.instrumentation b/kernel/Kconfig.instrumentation index ba5c05ca2a..f5f2c769d9 100644 --- a/kernel/Kconfig.instrumentation +++ b/kernel/Kconfig.instrumentation @@ -40,4 +40,10 @@ config KPROBES for kernel debugging, non-intrusive instrumentation and testing. If in doubt, say "N". +config MARKERS + bool "Activate markers" + help + Place an empty function call at each marker site. Can be + dynamically changed for a probe function. + endif # INSTRUMENTATION diff --git a/kernel/Makefile b/kernel/Makefile index 32b2d8bdc9..05c3e6df85 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o +obj-$(CONFIG_MARKERS) += marker.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/marker.c b/kernel/marker.c new file mode 100644 index 0000000000..ccb48d9a36 --- /dev/null +++ b/kernel/marker.c @@ -0,0 +1,525 @@ +/* + * Copyright (C) 2007 Mathieu Desnoyers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct marker __start___markers[]; +extern struct marker __stop___markers[]; + +/* + * module_mutex nests inside markers_mutex. Markers mutex protects the builtin + * and module markers, the hash table and deferred_sync. + */ +static DEFINE_MUTEX(markers_mutex); + +/* + * Marker deferred synchronization. + * Upon marker probe_unregister, we delay call to synchronize_sched() to + * accelerate mass unregistration (only when there is no more reference to a + * given module do we call synchronize_sched()). However, we need to make sure + * every critical region has ended before we re-arm a marker that has been + * unregistered and then registered back with a different probe data. + */ +static int deferred_sync; + +/* + * Marker hash table, containing the active markers. + * Protected by module_mutex. + */ +#define MARKER_HASH_BITS 6 +#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) + +struct marker_entry { + struct hlist_node hlist; + char *format; + marker_probe_func *probe; + void *private; + int refcount; /* Number of times armed. 0 if disarmed. */ + char name[0]; /* Contains name'\0'format'\0' */ +}; + +static struct hlist_head marker_table[MARKER_TABLE_SIZE]; + +/** + * __mark_empty_function - Empty probe callback + * @mdata: pointer of type const struct marker + * @fmt: format string + * @...: variable argument list + * + * Empty callback provided as a probe to the markers. By providing this to a + * disabled marker, we make sure the execution flow is always valid even + * though the function pointer change and the marker enabling are two distinct + * operations that modifies the execution flow of preemptible code. + */ +void __mark_empty_function(const struct marker *mdata, void *private, + const char *fmt, ...) +{ +} +EXPORT_SYMBOL_GPL(__mark_empty_function); + +/* + * Get marker if the marker is present in the marker hash table. + * Must be called with markers_mutex held. + * Returns NULL if not present. + */ +static struct marker_entry *get_marker(const char *name) +{ + struct hlist_head *head; + struct hlist_node *node; + struct marker_entry *e; + u32 hash = jhash(name, strlen(name), 0); + + head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(name, e->name)) + return e; + } + return NULL; +} + +/* + * Add the marker to the marker hash table. Must be called with markers_mutex + * held. + */ +static int add_marker(const char *name, const char *format, + marker_probe_func *probe, void *private) +{ + struct hlist_head *head; + struct hlist_node *node; + struct marker_entry *e; + size_t name_len = strlen(name) + 1; + size_t format_len = 0; + u32 hash = jhash(name, name_len-1, 0); + + if (format) + format_len = strlen(format) + 1; + head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(name, e->name)) { + printk(KERN_NOTICE + "Marker %s busy, probe %p already installed\n", + name, e->probe); + return -EBUSY; /* Already there */ + } + } + /* + * Using kmalloc here to allocate a variable length element. Could + * cause some memory fragmentation if overused. + */ + e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, + GFP_KERNEL); + if (!e) + return -ENOMEM; + memcpy(&e->name[0], name, name_len); + if (format) { + e->format = &e->name[name_len]; + memcpy(e->format, format, format_len); + trace_mark(core_marker_format, "name %s format %s", + e->name, e->format); + } else + e->format = NULL; + e->probe = probe; + e->private = private; + e->refcount = 0; + hlist_add_head(&e->hlist, head); + return 0; +} + +/* + * Remove the marker from the marker hash table. Must be called with mutex_lock + * held. + */ +static void *remove_marker(const char *name) +{ + struct hlist_head *head; + struct hlist_node *node; + struct marker_entry *e; + int found = 0; + size_t len = strlen(name) + 1; + void *private = NULL; + u32 hash = jhash(name, len-1, 0); + + head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(name, e->name)) { + found = 1; + break; + } + } + if (found) { + private = e->private; + hlist_del(&e->hlist); + kfree(e); + } + return private; +} + +/* + * Set the mark_entry format to the format found in the element. + */ +static int marker_set_format(struct marker_entry **entry, const char *format) +{ + struct marker_entry *e; + size_t name_len = strlen((*entry)->name) + 1; + size_t format_len = strlen(format) + 1; + + e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, + GFP_KERNEL); + if (!e) + return -ENOMEM; + memcpy(&e->name[0], (*entry)->name, name_len); + e->format = &e->name[name_len]; + memcpy(e->format, format, format_len); + e->probe = (*entry)->probe; + e->private = (*entry)->private; + e->refcount = (*entry)->refcount; + hlist_add_before(&e->hlist, &(*entry)->hlist); + hlist_del(&(*entry)->hlist); + kfree(*entry); + *entry = e; + trace_mark(core_marker_format, "name %s format %s", + e->name, e->format); + return 0; +} + +/* + * Sets the probe callback corresponding to one marker. + */ +static int set_marker(struct marker_entry **entry, struct marker *elem) +{ + int ret; + WARN_ON(strcmp((*entry)->name, elem->name) != 0); + + if ((*entry)->format) { + if (strcmp((*entry)->format, elem->format) != 0) { + printk(KERN_NOTICE + "Format mismatch for probe %s " + "(%s), marker (%s)\n", + (*entry)->name, + (*entry)->format, + elem->format); + return -EPERM; + } + } else { + ret = marker_set_format(entry, elem->format); + if (ret) + return ret; + } + elem->call = (*entry)->probe; + elem->private = (*entry)->private; + elem->state = 1; + return 0; +} + +/* + * Disable a marker and its probe callback. + * Note: only after a synchronize_sched() issued after setting elem->call to the + * empty function insures that the original callback is not used anymore. This + * insured by preemption disabling around the call site. + */ +static void disable_marker(struct marker *elem) +{ + elem->state = 0; + elem->call = __mark_empty_function; + /* + * Leave the private data and id there, because removal is racy and + * should be done only after a synchronize_sched(). These are never used + * until the next initialization anyway. + */ +} + +/** + * marker_update_probe_range - Update a probe range + * @begin: beginning of the range + * @end: end of the range + * @probe_module: module address of the probe being updated + * @refcount: number of references left to the given probe_module (out) + * + * Updates the probe callback corresponding to a range of markers. + * Must be called with markers_mutex held. + */ +void marker_update_probe_range(struct marker *begin, + struct marker *end, struct module *probe_module, + int *refcount) +{ + struct marker *iter; + struct marker_entry *mark_entry; + + for (iter = begin; iter < end; iter++) { + mark_entry = get_marker(iter->name); + if (mark_entry && mark_entry->refcount) { + set_marker(&mark_entry, iter); + /* + * ignore error, continue + */ + if (probe_module) + if (probe_module == + __module_text_address((unsigned long)mark_entry->probe)) + (*refcount)++; + } else { + disable_marker(iter); + } + } +} + +/* + * Update probes, removing the faulty probes. + * Issues a synchronize_sched() when no reference to the module passed + * as parameter is found in the probes so the probe module can be + * safely unloaded from now on. + */ +static void marker_update_probes(struct module *probe_module) +{ + int refcount = 0; + + mutex_lock(&markers_mutex); + /* Core kernel markers */ + marker_update_probe_range(__start___markers, + __stop___markers, probe_module, &refcount); + /* Markers in modules. */ + module_update_markers(probe_module, &refcount); + if (probe_module && refcount == 0) { + synchronize_sched(); + deferred_sync = 0; + } + mutex_unlock(&markers_mutex); +} + +/** + * marker_probe_register - Connect a probe to a marker + * @name: marker name + * @format: format string + * @probe: probe handler + * @private: probe private data + * + * private data must be a valid allocated memory address, or NULL. + * Returns 0 if ok, error value on error. + */ +int marker_probe_register(const char *name, const char *format, + marker_probe_func *probe, void *private) +{ + struct marker_entry *entry; + int ret = 0, need_update = 0; + + mutex_lock(&markers_mutex); + entry = get_marker(name); + if (entry && entry->refcount) { + ret = -EBUSY; + goto end; + } + if (deferred_sync) { + synchronize_sched(); + deferred_sync = 0; + } + ret = add_marker(name, format, probe, private); + if (ret) + goto end; + need_update = 1; +end: + mutex_unlock(&markers_mutex); + if (need_update) + marker_update_probes(NULL); + return ret; +} +EXPORT_SYMBOL_GPL(marker_probe_register); + +/** + * marker_probe_unregister - Disconnect a probe from a marker + * @name: marker name + * + * Returns the private data given to marker_probe_register, or an ERR_PTR(). + */ +void *marker_probe_unregister(const char *name) +{ + struct module *probe_module; + struct marker_entry *entry; + void *private; + int need_update = 0; + + mutex_lock(&markers_mutex); + entry = get_marker(name); + if (!entry) { + private = ERR_PTR(-ENOENT); + goto end; + } + entry->refcount = 0; + /* In what module is the probe handler ? */ + probe_module = __module_text_address((unsigned long)entry->probe); + private = remove_marker(name); + deferred_sync = 1; + need_update = 1; +end: + mutex_unlock(&markers_mutex); + if (need_update) + marker_update_probes(probe_module); + return private; +} +EXPORT_SYMBOL_GPL(marker_probe_unregister); + +/** + * marker_probe_unregister_private_data - Disconnect a probe from a marker + * @private: probe private data + * + * Unregister a marker by providing the registered private data. + * Returns the private data given to marker_probe_register, or an ERR_PTR(). + */ +void *marker_probe_unregister_private_data(void *private) +{ + struct module *probe_module; + struct hlist_head *head; + struct hlist_node *node; + struct marker_entry *entry; + int found = 0; + unsigned int i; + int need_update = 0; + + mutex_lock(&markers_mutex); + for (i = 0; i < MARKER_TABLE_SIZE; i++) { + head = &marker_table[i]; + hlist_for_each_entry(entry, node, head, hlist) { + if (entry->private == private) { + found = 1; + goto iter_end; + } + } + } +iter_end: + if (!found) { + private = ERR_PTR(-ENOENT); + goto end; + } + entry->refcount = 0; + /* In what module is the probe handler ? */ + probe_module = __module_text_address((unsigned long)entry->probe); + private = remove_marker(entry->name); + deferred_sync = 1; + need_update = 1; +end: + mutex_unlock(&markers_mutex); + if (need_update) + marker_update_probes(probe_module); + return private; +} +EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); + +/** + * marker_arm - Arm a marker + * @name: marker name + * + * Activate a marker. It keeps a reference count of the number of + * arming/disarming done. + * Returns 0 if ok, error value on error. + */ +int marker_arm(const char *name) +{ + struct marker_entry *entry; + int ret = 0, need_update = 0; + + mutex_lock(&markers_mutex); + entry = get_marker(name); + if (!entry) { + ret = -ENOENT; + goto end; + } + /* + * Only need to update probes when refcount passes from 0 to 1. + */ + if (entry->refcount++) + goto end; + need_update = 1; +end: + mutex_unlock(&markers_mutex); + if (need_update) + marker_update_probes(NULL); + return ret; +} +EXPORT_SYMBOL_GPL(marker_arm); + +/** + * marker_disarm - Disarm a marker + * @name: marker name + * + * Disarm a marker. It keeps a reference count of the number of arming/disarming + * done. + * Returns 0 if ok, error value on error. + */ +int marker_disarm(const char *name) +{ + struct marker_entry *entry; + int ret = 0, need_update = 0; + + mutex_lock(&markers_mutex); + entry = get_marker(name); + if (!entry) { + ret = -ENOENT; + goto end; + } + /* + * Only permit decrement refcount if higher than 0. + * Do probe update only on 1 -> 0 transition. + */ + if (entry->refcount) { + if (--entry->refcount) + goto end; + } else { + ret = -EPERM; + goto end; + } + need_update = 1; +end: + mutex_unlock(&markers_mutex); + if (need_update) + marker_update_probes(NULL); + return ret; +} +EXPORT_SYMBOL_GPL(marker_disarm); + +/** + * marker_get_private_data - Get a marker's probe private data + * @name: marker name + * + * Returns the private data pointer, or an ERR_PTR. + * The private data pointer should _only_ be dereferenced if the caller is the + * owner of the data, or its content could vanish. This is mostly used to + * confirm that a caller is the owner of a registered probe. + */ +void *marker_get_private_data(const char *name) +{ + struct hlist_head *head; + struct hlist_node *node; + struct marker_entry *e; + size_t name_len = strlen(name) + 1; + u32 hash = jhash(name, name_len-1, 0); + int found = 0; + + head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(name, e->name)) { + found = 1; + return e->private; + } + } + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL_GPL(marker_get_private_data); diff --git a/kernel/module.c b/kernel/module.c index 7734595bd3..3202c99500 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1673,6 +1673,8 @@ static struct module *load_module(void __user *umod, unsigned int unusedcrcindex; unsigned int unusedgplindex; unsigned int unusedgplcrcindex; + unsigned int markersindex; + unsigned int markersstringsindex; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ @@ -1939,6 +1941,9 @@ static struct module *load_module(void __user *umod, add_taint_module(mod, TAINT_FORCED_MODULE); } #endif + markersindex = find_sec(hdr, sechdrs, secstrings, "__markers"); + markersstringsindex = find_sec(hdr, sechdrs, secstrings, + "__markers_strings"); /* Now do relocations. */ for (i = 1; i < hdr->e_shnum; i++) { @@ -1961,6 +1966,11 @@ static struct module *load_module(void __user *umod, if (err < 0) goto cleanup; } +#ifdef CONFIG_MARKERS + mod->markers = (void *)sechdrs[markersindex].sh_addr; + mod->num_markers = + sechdrs[markersindex].sh_size / sizeof(*mod->markers); +#endif /* Find duplicate symbols */ err = verify_export_symbols(mod); @@ -1979,6 +1989,11 @@ static struct module *load_module(void __user *umod, add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); +#ifdef CONFIG_MARKERS + if (!mod->taints) + marker_update_probe_range(mod->markers, + mod->markers + mod->num_markers, NULL, NULL); +#endif err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; @@ -2570,3 +2585,18 @@ EXPORT_SYMBOL(module_remove_driver); void struct_module(struct module *mod) { return; } EXPORT_SYMBOL(struct_module); #endif + +#ifdef CONFIG_MARKERS +void module_update_markers(struct module *probe_module, int *refcount) +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry(mod, &modules, list) + if (!mod->taints) + marker_update_probe_range(mod->markers, + mod->markers + mod->num_markers, + probe_module, refcount); + mutex_unlock(&module_mutex); +} +#endif