]> err.no Git - linux-2.6/blob - arch/x86/kernel/mmiotrace/mmio-mod.c
x86: mmiotrace, preview 2
[linux-2.6] / arch / x86 / kernel / mmiotrace / mmio-mod.c
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15  *
16  * Copyright (C) IBM Corporation, 2005
17  *               Jeff Muizelaar, 2006, 2007
18  *               Pekka Paalanen, 2008 <pq@iki.fi>
19  *
20  * Derived from the read-mod example from relay-examples by Tom Zanussi.
21  */
22 #define DEBUG 1
23
24 #include <linux/module.h>
25 #include <linux/relay.h>
26 #include <linux/debugfs.h>
27 #include <linux/proc_fs.h>
28 #include <asm/io.h>
29 #include <linux/version.h>
30 #include <linux/kallsyms.h>
31 #include <asm/pgtable.h>
32 #include <linux/mmiotrace.h>
33 #include <asm/e820.h> /* for ISA_START_ADDRESS */
34 #include <asm/atomic.h>
35 #include <linux/percpu.h>
36
37 #include "pf_in.h"
38
39 #define NAME "mmiotrace: "
40
41 /* This app's relay channel files will appear in /debug/mmio-trace */
42 static const char APP_DIR[] = "mmio-trace";
43 /* the marker injection file in /debug/APP_DIR */
44 static const char MARKER_FILE[] = "mmio-marker";
45
46 struct trap_reason {
47         unsigned long addr;
48         unsigned long ip;
49         enum reason_type type;
50         int active_traces;
51 };
52
53 struct remap_trace {
54         struct list_head list;
55         struct kmmio_probe probe;
56         unsigned long phys;
57         unsigned long id;
58 };
59
60 static const size_t subbuf_size = 256*1024;
61
62 /* Accessed per-cpu. */
63 static DEFINE_PER_CPU(struct trap_reason, pf_reason);
64 static DEFINE_PER_CPU(struct mm_io_header_rw, cpu_trace);
65
66 /* Access to this is not per-cpu. */
67 static DEFINE_PER_CPU(atomic_t, dropped);
68
69 static struct dentry *dir;
70 static struct dentry *enabled_file;
71 static struct dentry *marker_file;
72
73 static DEFINE_MUTEX(mmiotrace_mutex);
74 static DEFINE_SPINLOCK(trace_lock);
75 static atomic_t mmiotrace_enabled;
76 static LIST_HEAD(trace_list);           /* struct remap_trace */
77 static struct rchan *chan;
78
79 /*
80  * Locking in this file:
81  * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections.
82  * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex
83  *   and trace_lock.
84  * - Routines depending on is_enabled() must take trace_lock.
85  * - trace_list users must hold trace_lock.
86  * - is_enabled() guarantees that chan is valid.
87  * - pre/post callbacks assume the effect of is_enabled() being true.
88  */
89
90 /* module parameters */
91 static unsigned int     n_subbufs = 32*4;
92 static unsigned long    filter_offset;
93 static int              nommiotrace;
94 static int              ISA_trace;
95 static int              trace_pc;
96 static int              enable_now;
97
98 module_param(n_subbufs, uint, 0);
99 module_param(filter_offset, ulong, 0);
100 module_param(nommiotrace, bool, 0);
101 module_param(ISA_trace, bool, 0);
102 module_param(trace_pc, bool, 0);
103 module_param(enable_now, bool, 0);
104
105 MODULE_PARM_DESC(n_subbufs, "Number of 256kB buffers, default 128.");
106 MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
107 MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
108 MODULE_PARM_DESC(ISA_trace, "Do not exclude the low ISA range.");
109 MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
110 MODULE_PARM_DESC(enable_now, "Start mmiotracing immediately on module load.");
111
112 static bool is_enabled(void)
113 {
114         return atomic_read(&mmiotrace_enabled);
115 }
116
117 static void record_timestamp(struct mm_io_header *header)
118 {
119         struct timespec now;
120
121         getnstimeofday(&now);
122         header->sec = now.tv_sec;
123         header->nsec = now.tv_nsec;
124 }
125
126 /*
127  * Write callback for the debugfs entry:
128  * Read a marker and write it to the mmio trace log
129  */
130 static ssize_t write_marker(struct file *file, const char __user *buffer,
131                                                 size_t count, loff_t *ppos)
132 {
133         char *event = NULL;
134         struct mm_io_header *headp;
135         ssize_t len = (count > 65535) ? 65535 : count;
136
137         event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
138         if (!event)
139                 return -ENOMEM;
140
141         headp = (struct mm_io_header *)event;
142         headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
143         headp->data_len = len;
144         record_timestamp(headp);
145
146         if (copy_from_user(event + sizeof(*headp), buffer, len)) {
147                 kfree(event);
148                 return -EFAULT;
149         }
150
151         spin_lock_irq(&trace_lock);
152         if (is_enabled())
153                 relay_write(chan, event, sizeof(*headp) + len);
154         else
155                 len = -EINVAL;
156         spin_unlock_irq(&trace_lock);
157         kfree(event);
158         return len;
159 }
160
161 static void print_pte(unsigned long address)
162 {
163         int level;
164         pte_t *pte = lookup_address(address, &level);
165
166         if (!pte) {
167                 pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
168                                                         __func__, address);
169                 return;
170         }
171
172         if (level == PG_LEVEL_2M) {
173                 pr_emerg(NAME "4MB pages are not currently supported: "
174                                                         "0x%08lx\n", address);
175                 BUG();
176         }
177         pr_info(NAME "pte for 0x%lx: 0x%lx 0x%lx\n", address, pte_val(*pte),
178                                                 pte_val(*pte) & _PAGE_PRESENT);
179 }
180
181 /*
182  * For some reason the pre/post pairs have been called in an
183  * unmatched order. Report and die.
184  */
185 static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
186 {
187         const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
188         pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
189                                         "last fault for address: 0x%08lx\n",
190                                         addr, my_reason->addr);
191         print_pte(addr);
192         print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
193         print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
194 #ifdef __i386__
195         pr_emerg("eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
196                         regs->ax, regs->bx, regs->cx, regs->dx);
197         pr_emerg("esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
198                         regs->si, regs->di, regs->bp, regs->sp);
199 #else
200         pr_emerg("rax: %016lx   rcx: %016lx   rdx: %016lx\n",
201                                         regs->ax, regs->cx, regs->dx);
202         pr_emerg("rsi: %016lx   rdi: %016lx   rbp: %016lx   rsp: %016lx\n",
203                                 regs->si, regs->di, regs->bp, regs->sp);
204 #endif
205         put_cpu_var(pf_reason);
206         BUG();
207 }
208
209 static void pre(struct kmmio_probe *p, struct pt_regs *regs,
210                                                 unsigned long addr)
211 {
212         struct trap_reason *my_reason = &get_cpu_var(pf_reason);
213         struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace);
214         const unsigned long instptr = instruction_pointer(regs);
215         const enum reason_type type = get_ins_type(instptr);
216
217         /* it doesn't make sense to have more than one active trace per cpu */
218         if (my_reason->active_traces)
219                 die_kmmio_nesting_error(regs, addr);
220         else
221                 my_reason->active_traces++;
222
223         my_reason->type = type;
224         my_reason->addr = addr;
225         my_reason->ip = instptr;
226
227         my_trace->header.type = MMIO_MAGIC;
228         my_trace->header.pid = 0;
229         my_trace->header.data_len = sizeof(struct mm_io_rw);
230         my_trace->rw.address = addr;
231         /*
232          * struct remap_trace *trace = p->user_data;
233          * phys = addr - trace->probe.addr + trace->phys;
234          */
235
236         /*
237          * Only record the program counter when requested.
238          * It may taint clean-room reverse engineering.
239          */
240         if (trace_pc)
241                 my_trace->rw.pc = instptr;
242         else
243                 my_trace->rw.pc = 0;
244
245         record_timestamp(&my_trace->header);
246
247         switch (type) {
248         case REG_READ:
249                 my_trace->header.type |=
250                         (MMIO_READ << MMIO_OPCODE_SHIFT) |
251                         (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
252                 break;
253         case REG_WRITE:
254                 my_trace->header.type |=
255                         (MMIO_WRITE << MMIO_OPCODE_SHIFT) |
256                         (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
257                 my_trace->rw.value = get_ins_reg_val(instptr, regs);
258                 break;
259         case IMM_WRITE:
260                 my_trace->header.type |=
261                         (MMIO_WRITE << MMIO_OPCODE_SHIFT) |
262                         (get_ins_mem_width(instptr) << MMIO_WIDTH_SHIFT);
263                 my_trace->rw.value = get_ins_imm_val(instptr);
264                 break;
265         default:
266                 {
267                         unsigned char *ip = (unsigned char *)instptr;
268                         my_trace->header.type |=
269                                         (MMIO_UNKNOWN_OP << MMIO_OPCODE_SHIFT);
270                         my_trace->rw.value = (*ip) << 16 | *(ip + 1) << 8 |
271                                                                 *(ip + 2);
272                 }
273         }
274         put_cpu_var(cpu_trace);
275         put_cpu_var(pf_reason);
276 }
277
278 static void post(struct kmmio_probe *p, unsigned long condition,
279                                                         struct pt_regs *regs)
280 {
281         struct trap_reason *my_reason = &get_cpu_var(pf_reason);
282         struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace);
283
284         /* this should always return the active_trace count to 0 */
285         my_reason->active_traces--;
286         if (my_reason->active_traces) {
287                 pr_emerg(NAME "unexpected post handler");
288                 BUG();
289         }
290
291         switch (my_reason->type) {
292         case REG_READ:
293                 my_trace->rw.value = get_ins_reg_val(my_reason->ip, regs);
294                 break;
295         default:
296                 break;
297         }
298         relay_write(chan, my_trace, sizeof(*my_trace));
299         put_cpu_var(cpu_trace);
300         put_cpu_var(pf_reason);
301 }
302
303 /*
304  * subbuf_start() relay callback.
305  *
306  * Defined so that we know when events are dropped due to the buffer-full
307  * condition.
308  */
309 static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
310                                         void *prev_subbuf, size_t prev_padding)
311 {
312         unsigned int cpu = buf->cpu;
313         atomic_t *drop = &per_cpu(dropped, cpu);
314         int count;
315         if (relay_buf_full(buf)) {
316                 if (atomic_inc_return(drop) == 1)
317                         pr_err(NAME "cpu %d buffer full!\n", cpu);
318                 return 0;
319         }
320         count = atomic_read(drop);
321         if (count) {
322                 pr_err(NAME "cpu %d buffer no longer full, missed %d events.\n",
323                                                                 cpu, count);
324                 atomic_sub(count, drop);
325         }
326
327         return 1;
328 }
329
330 static struct file_operations mmio_fops = {
331         .owner = THIS_MODULE,
332 };
333
334 /* file_create() callback.  Creates relay file in debugfs. */
335 static struct dentry *create_buf_file_handler(const char *filename,
336                                                 struct dentry *parent,
337                                                 int mode,
338                                                 struct rchan_buf *buf,
339                                                 int *is_global)
340 {
341         struct dentry *buf_file;
342
343         mmio_fops.read = relay_file_operations.read;
344         mmio_fops.open = relay_file_operations.open;
345         mmio_fops.poll = relay_file_operations.poll;
346         mmio_fops.mmap = relay_file_operations.mmap;
347         mmio_fops.release = relay_file_operations.release;
348         mmio_fops.splice_read = relay_file_operations.splice_read;
349
350         buf_file = debugfs_create_file(filename, mode, parent, buf,
351                                                                 &mmio_fops);
352
353         return buf_file;
354 }
355
356 /* file_remove() default callback.  Removes relay file in debugfs. */
357 static int remove_buf_file_handler(struct dentry *dentry)
358 {
359         debugfs_remove(dentry);
360         return 0;
361 }
362
363 static struct rchan_callbacks relay_callbacks = {
364         .subbuf_start = subbuf_start_handler,
365         .create_buf_file = create_buf_file_handler,
366         .remove_buf_file = remove_buf_file_handler,
367 };
368
369 static void ioremap_trace_core(unsigned long offset, unsigned long size,
370                                                         void __iomem *addr)
371 {
372         static atomic_t next_id;
373         struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
374         struct mm_io_header_map event = {
375                 .header = {
376                         .type = MMIO_MAGIC |
377                                         (MMIO_PROBE << MMIO_OPCODE_SHIFT),
378                         .sec = 0,
379                         .nsec = 0,
380                         .pid = 0,
381                         .data_len = sizeof(struct mm_io_map)
382                 },
383                 .map = {
384                         .phys = offset,
385                         .addr = (unsigned long)addr,
386                         .len  = size,
387                         .pc   = 0
388                 }
389         };
390         record_timestamp(&event.header);
391
392         if (!trace) {
393                 pr_err(NAME "kmalloc failed in ioremap\n");
394                 return;
395         }
396
397         *trace = (struct remap_trace) {
398                 .probe = {
399                         .addr = (unsigned long)addr,
400                         .len = size,
401                         .pre_handler = pre,
402                         .post_handler = post,
403                         .user_data = trace
404                 },
405                 .phys = offset,
406                 .id = atomic_inc_return(&next_id)
407         };
408
409         spin_lock_irq(&trace_lock);
410         if (!is_enabled())
411                 goto not_enabled;
412
413         relay_write(chan, &event, sizeof(event));
414         list_add_tail(&trace->list, &trace_list);
415         if (!nommiotrace)
416                 register_kmmio_probe(&trace->probe);
417
418 not_enabled:
419         spin_unlock_irq(&trace_lock);
420 }
421
422 void
423 mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr)
424 {
425         if (!is_enabled()) /* recheck and proper locking in *_core() */
426                 return;
427
428         pr_debug(NAME "ioremap_*(0x%lx, 0x%lx) = %p\n", offset, size, addr);
429         if ((filter_offset) && (offset != filter_offset))
430                 return;
431         ioremap_trace_core(offset, size, addr);
432 }
433
434 static void iounmap_trace_core(volatile void __iomem *addr)
435 {
436         struct mm_io_header_map event = {
437                 .header = {
438                         .type = MMIO_MAGIC |
439                                 (MMIO_UNPROBE << MMIO_OPCODE_SHIFT),
440                         .sec = 0,
441                         .nsec = 0,
442                         .pid = 0,
443                         .data_len = sizeof(struct mm_io_map)
444                 },
445                 .map = {
446                         .phys = 0,
447                         .addr = (unsigned long)addr,
448                         .len  = 0,
449                         .pc   = 0
450                 }
451         };
452         struct remap_trace *trace;
453         struct remap_trace *tmp;
454         struct remap_trace *found_trace = NULL;
455
456         pr_debug(NAME "Unmapping %p.\n", addr);
457         record_timestamp(&event.header);
458
459         spin_lock_irq(&trace_lock);
460         if (!is_enabled())
461                 goto not_enabled;
462
463         list_for_each_entry_safe(trace, tmp, &trace_list, list) {
464                 if ((unsigned long)addr == trace->probe.addr) {
465                         if (!nommiotrace)
466                                 unregister_kmmio_probe(&trace->probe);
467                         list_del(&trace->list);
468                         found_trace = trace;
469                         break;
470                 }
471         }
472         relay_write(chan, &event, sizeof(event));
473
474 not_enabled:
475         spin_unlock_irq(&trace_lock);
476         if (found_trace) {
477                 synchronize_rcu(); /* unregister_kmmio_probe() requirement */
478                 kfree(found_trace);
479         }
480 }
481
482 void mmiotrace_iounmap(volatile void __iomem *addr)
483 {
484         might_sleep();
485         if (is_enabled()) /* recheck and proper locking in *_core() */
486                 iounmap_trace_core(addr);
487 }
488
489 static void clear_trace_list(void)
490 {
491         struct remap_trace *trace;
492         struct remap_trace *tmp;
493
494         /*
495          * No locking required, because the caller ensures we are in a
496          * critical section via mutex, and is_enabled() is false,
497          * i.e. nothing can traverse or modify this list.
498          * Caller also ensures is_enabled() cannot change.
499          */
500         list_for_each_entry(trace, &trace_list, list) {
501                 pr_notice(NAME "purging non-iounmapped "
502                                         "trace @0x%08lx, size 0x%lx.\n",
503                                         trace->probe.addr, trace->probe.len);
504                 if (!nommiotrace)
505                         unregister_kmmio_probe(&trace->probe);
506         }
507         synchronize_rcu(); /* unregister_kmmio_probe() requirement */
508
509         list_for_each_entry_safe(trace, tmp, &trace_list, list) {
510                 list_del(&trace->list);
511                 kfree(trace);
512         }
513 }
514
515 static ssize_t read_enabled_file_bool(struct file *file,
516                 char __user *user_buf, size_t count, loff_t *ppos)
517 {
518         char buf[3];
519
520         if (is_enabled())
521                 buf[0] = '1';
522         else
523                 buf[0] = '0';
524         buf[1] = '\n';
525         buf[2] = '\0';
526         return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
527 }
528
529 static void enable_mmiotrace(void);
530 static void disable_mmiotrace(void);
531
532 static ssize_t write_enabled_file_bool(struct file *file,
533                 const char __user *user_buf, size_t count, loff_t *ppos)
534 {
535         char buf[32];
536         int buf_size = min(count, (sizeof(buf)-1));
537
538         if (copy_from_user(buf, user_buf, buf_size))
539                 return -EFAULT;
540
541         switch (buf[0]) {
542         case 'y':
543         case 'Y':
544         case '1':
545                 enable_mmiotrace();
546                 break;
547         case 'n':
548         case 'N':
549         case '0':
550                 disable_mmiotrace();
551                 break;
552         }
553
554         return count;
555 }
556
557 /* this ripped from kernel/kprobes.c */
558 static struct file_operations fops_enabled = {
559         .owner =        THIS_MODULE,
560         .read =         read_enabled_file_bool,
561         .write =        write_enabled_file_bool
562 };
563
564 static struct file_operations fops_marker = {
565         .owner =        THIS_MODULE,
566         .write =        write_marker
567 };
568
569 static void enable_mmiotrace(void)
570 {
571         mutex_lock(&mmiotrace_mutex);
572         if (is_enabled())
573                 goto out;
574
575         chan = relay_open("cpu", dir, subbuf_size, n_subbufs,
576                                                 &relay_callbacks, NULL);
577         if (!chan) {
578                 pr_err(NAME "relay app channel creation failed.\n");
579                 goto out;
580         }
581
582         reference_kmmio();
583
584         marker_file = debugfs_create_file("marker", 0660, dir, NULL,
585                                                                 &fops_marker);
586         if (!marker_file)
587                 pr_err(NAME "marker file creation failed.\n");
588
589         if (nommiotrace)
590                 pr_info(NAME "MMIO tracing disabled.\n");
591         if (ISA_trace)
592                 pr_warning(NAME "Warning! low ISA range will be traced.\n");
593         spin_lock_irq(&trace_lock);
594         atomic_inc(&mmiotrace_enabled);
595         spin_unlock_irq(&trace_lock);
596         pr_info(NAME "enabled.\n");
597 out:
598         mutex_unlock(&mmiotrace_mutex);
599 }
600
601 static void disable_mmiotrace(void)
602 {
603         mutex_lock(&mmiotrace_mutex);
604         if (!is_enabled())
605                 goto out;
606
607         spin_lock_irq(&trace_lock);
608         atomic_dec(&mmiotrace_enabled);
609         BUG_ON(is_enabled());
610         spin_unlock_irq(&trace_lock);
611
612         clear_trace_list(); /* guarantees: no more kmmio callbacks */
613         unreference_kmmio();
614         if (marker_file) {
615                 debugfs_remove(marker_file);
616                 marker_file = NULL;
617         }
618         if (chan) {
619                 relay_close(chan);
620                 chan = NULL;
621         }
622
623         pr_info(NAME "disabled.\n");
624 out:
625         mutex_unlock(&mmiotrace_mutex);
626 }
627
628 static int __init init(void)
629 {
630         pr_debug(NAME "load...\n");
631         if (n_subbufs < 2)
632                 return -EINVAL;
633
634         dir = debugfs_create_dir(APP_DIR, NULL);
635         if (!dir) {
636                 pr_err(NAME "Couldn't create relay app directory.\n");
637                 return -ENOMEM;
638         }
639
640         enabled_file = debugfs_create_file("enabled", 0600, dir, NULL,
641                                                                 &fops_enabled);
642         if (!enabled_file) {
643                 pr_err(NAME "Couldn't create enabled file.\n");
644                 debugfs_remove(dir);
645                 return -ENOMEM;
646         }
647
648         if (enable_now)
649                 enable_mmiotrace();
650
651         return 0;
652 }
653
654 static void __exit cleanup(void)
655 {
656         pr_debug(NAME "unload...\n");
657         if (enabled_file)
658                 debugfs_remove(enabled_file);
659         disable_mmiotrace();
660         if (dir)
661                 debugfs_remove(dir);
662 }
663
664 module_init(init);
665 module_exit(cleanup);
666 MODULE_LICENSE("GPL");