]> err.no Git - linux-2.6/blob - kernel/trace/trace.c
ftrace: add tracing_cpumask
[linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/debugfs.h>
18 #include <linux/pagemap.h>
19 #include <linux/hardirq.h>
20 #include <linux/linkage.h>
21 #include <linux/uaccess.h>
22 #include <linux/ftrace.h>
23 #include <linux/module.h>
24 #include <linux/percpu.h>
25 #include <linux/ctype.h>
26 #include <linux/init.h>
27 #include <linux/poll.h>
28 #include <linux/gfp.h>
29 #include <linux/fs.h>
30
31 #include <linux/stacktrace.h>
32
33 #include "trace.h"
34
35 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
36 unsigned long __read_mostly     tracing_thresh;
37
38 static int tracing_disabled = 1;
39
40 static long
41 ns2usecs(cycle_t nsec)
42 {
43         nsec += 500;
44         do_div(nsec, 1000);
45         return nsec;
46 }
47
48 cycle_t ftrace_now(int cpu)
49 {
50         return cpu_clock(cpu);
51 }
52
53 static struct trace_array       global_trace;
54
55 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
56
57 static struct trace_array       max_tr;
58
59 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
60
61 static int                      tracer_enabled = 1;
62 static unsigned long            trace_nr_entries = 65536UL;
63
64 static struct tracer            *trace_types __read_mostly;
65 static struct tracer            *current_trace __read_mostly;
66 static int                      max_tracer_type_len;
67
68 static DEFINE_MUTEX(trace_types_lock);
69 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
70
71 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
72
73 /*
74  * Only trace on a CPU if the bitmask is set:
75  */
76 static cpumask_t tracing_cpumask __read_mostly = CPU_MASK_ALL;
77
78 /*
79  * The tracer itself will not take this lock, but still we want
80  * to provide a consistent cpumask to user-space:
81  */
82 static DEFINE_MUTEX(tracing_cpumask_update_lock);
83
84 /*
85  * Temporary storage for the character representation of the
86  * CPU bitmask:
87  */
88 static char mask_str[NR_CPUS];
89
90 void trace_wake_up(void)
91 {
92         /*
93          * The runqueue_is_locked() can fail, but this is the best we
94          * have for now:
95          */
96         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
97                 wake_up(&trace_wait);
98 }
99
100 #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
101
102 static int __init set_nr_entries(char *str)
103 {
104         if (!str)
105                 return 0;
106         trace_nr_entries = simple_strtoul(str, &str, 0);
107         return 1;
108 }
109 __setup("trace_entries=", set_nr_entries);
110
111 unsigned long nsecs_to_usecs(unsigned long nsecs)
112 {
113         return nsecs / 1000;
114 }
115
116 enum trace_type {
117         __TRACE_FIRST_TYPE = 0,
118
119         TRACE_FN,
120         TRACE_CTX,
121         TRACE_WAKE,
122         TRACE_STACK,
123         TRACE_SPECIAL,
124
125         __TRACE_LAST_TYPE
126 };
127
128 enum trace_flag_type {
129         TRACE_FLAG_IRQS_OFF             = 0x01,
130         TRACE_FLAG_NEED_RESCHED         = 0x02,
131         TRACE_FLAG_HARDIRQ              = 0x04,
132         TRACE_FLAG_SOFTIRQ              = 0x08,
133 };
134
135 #define TRACE_ITER_SYM_MASK \
136         (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
137
138 /* These must match the bit postions above */
139 static const char *trace_options[] = {
140         "print-parent",
141         "sym-offset",
142         "sym-addr",
143         "verbose",
144         "raw",
145         "hex",
146         "bin",
147         "block",
148         "stacktrace",
149         "sched-tree",
150         NULL
151 };
152
153 static DEFINE_SPINLOCK(ftrace_max_lock);
154
155 /*
156  * Copy the new maximum trace into the separate maximum-trace
157  * structure. (this way the maximum trace is permanently saved,
158  * for later retrieval via /debugfs/tracing/latency_trace)
159  */
160 static void
161 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
162 {
163         struct trace_array_cpu *data = tr->data[cpu];
164
165         max_tr.cpu = cpu;
166         max_tr.time_start = data->preempt_timestamp;
167
168         data = max_tr.data[cpu];
169         data->saved_latency = tracing_max_latency;
170
171         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
172         data->pid = tsk->pid;
173         data->uid = tsk->uid;
174         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
175         data->policy = tsk->policy;
176         data->rt_priority = tsk->rt_priority;
177
178         /* record this tasks comm */
179         tracing_record_cmdline(current);
180 }
181
182 void check_pages(struct trace_array_cpu *data)
183 {
184         struct page *page, *tmp;
185
186         BUG_ON(data->trace_pages.next->prev != &data->trace_pages);
187         BUG_ON(data->trace_pages.prev->next != &data->trace_pages);
188
189         list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
190                 BUG_ON(page->lru.next->prev != &page->lru);
191                 BUG_ON(page->lru.prev->next != &page->lru);
192         }
193 }
194
195 void *head_page(struct trace_array_cpu *data)
196 {
197         struct page *page;
198
199         check_pages(data);
200         if (list_empty(&data->trace_pages))
201                 return NULL;
202
203         page = list_entry(data->trace_pages.next, struct page, lru);
204         BUG_ON(&page->lru == &data->trace_pages);
205
206         return page_address(page);
207 }
208
209 static int
210 trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
211 {
212         int len = (PAGE_SIZE - 1) - s->len;
213         va_list ap;
214         int ret;
215
216         if (!len)
217                 return 0;
218
219         va_start(ap, fmt);
220         ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
221         va_end(ap);
222
223         /* If we can't write it all, don't bother writing anything */
224         if (ret > len)
225                 return 0;
226
227         s->len += ret;
228
229         return len;
230 }
231
232 static int
233 trace_seq_puts(struct trace_seq *s, const char *str)
234 {
235         int len = strlen(str);
236
237         if (len > ((PAGE_SIZE - 1) - s->len))
238                 return 0;
239
240         memcpy(s->buffer + s->len, str, len);
241         s->len += len;
242
243         return len;
244 }
245
246 static int
247 trace_seq_putc(struct trace_seq *s, unsigned char c)
248 {
249         if (s->len >= (PAGE_SIZE - 1))
250                 return 0;
251
252         s->buffer[s->len++] = c;
253
254         return 1;
255 }
256
257 static int
258 trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
259 {
260         if (len > ((PAGE_SIZE - 1) - s->len))
261                 return 0;
262
263         memcpy(s->buffer + s->len, mem, len);
264         s->len += len;
265
266         return len;
267 }
268
269 #define HEX_CHARS 17
270
271 static int
272 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
273 {
274         unsigned char hex[HEX_CHARS];
275         unsigned char *data;
276         unsigned char byte;
277         int i, j;
278
279         BUG_ON(len >= HEX_CHARS);
280
281         data = mem;
282
283 #ifdef __BIG_ENDIAN
284         for (i = 0, j = 0; i < len; i++) {
285 #else
286         for (i = len-1, j = 0; i >= 0; i--) {
287 #endif
288                 byte = data[i];
289
290                 hex[j]   = byte & 0x0f;
291                 if (hex[j] >= 10)
292                         hex[j] += 'a' - 10;
293                 else
294                         hex[j] += '0';
295                 j++;
296
297                 hex[j] = byte >> 4;
298                 if (hex[j] >= 10)
299                         hex[j] += 'a' - 10;
300                 else
301                         hex[j] += '0';
302                 j++;
303         }
304         hex[j] = ' ';
305         j++;
306
307         return trace_seq_putmem(s, hex, j);
308 }
309
310 static void
311 trace_seq_reset(struct trace_seq *s)
312 {
313         s->len = 0;
314 }
315
316 static void
317 trace_print_seq(struct seq_file *m, struct trace_seq *s)
318 {
319         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
320
321         s->buffer[len] = 0;
322         seq_puts(m, s->buffer);
323
324         trace_seq_reset(s);
325 }
326
327 static void
328 flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
329 {
330         struct list_head flip_pages;
331
332         INIT_LIST_HEAD(&flip_pages);
333
334         memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
335                 sizeof(struct trace_array_cpu) -
336                 offsetof(struct trace_array_cpu, trace_head_idx));
337
338         check_pages(tr1);
339         check_pages(tr2);
340         list_splice_init(&tr1->trace_pages, &flip_pages);
341         list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
342         list_splice_init(&flip_pages, &tr2->trace_pages);
343         BUG_ON(!list_empty(&flip_pages));
344         check_pages(tr1);
345         check_pages(tr2);
346 }
347
348 void
349 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
350 {
351         struct trace_array_cpu *data;
352         int i;
353
354         WARN_ON_ONCE(!irqs_disabled());
355         spin_lock(&ftrace_max_lock);
356         /* clear out all the previous traces */
357         for_each_possible_cpu(i) {
358                 data = tr->data[i];
359                 flip_trace(max_tr.data[i], data);
360                 tracing_reset(data);
361         }
362
363         __update_max_tr(tr, tsk, cpu);
364         spin_unlock(&ftrace_max_lock);
365 }
366
367 /**
368  * update_max_tr_single - only copy one trace over, and reset the rest
369  * @tr - tracer
370  * @tsk - task with the latency
371  * @cpu - the cpu of the buffer to copy.
372  */
373 void
374 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
375 {
376         struct trace_array_cpu *data = tr->data[cpu];
377         int i;
378
379         WARN_ON_ONCE(!irqs_disabled());
380         spin_lock(&ftrace_max_lock);
381         for_each_possible_cpu(i)
382                 tracing_reset(max_tr.data[i]);
383
384         flip_trace(max_tr.data[cpu], data);
385         tracing_reset(data);
386
387         __update_max_tr(tr, tsk, cpu);
388         spin_unlock(&ftrace_max_lock);
389 }
390
391 int register_tracer(struct tracer *type)
392 {
393         struct tracer *t;
394         int len;
395         int ret = 0;
396
397         if (!type->name) {
398                 pr_info("Tracer must have a name\n");
399                 return -1;
400         }
401
402         mutex_lock(&trace_types_lock);
403         for (t = trace_types; t; t = t->next) {
404                 if (strcmp(type->name, t->name) == 0) {
405                         /* already found */
406                         pr_info("Trace %s already registered\n",
407                                 type->name);
408                         ret = -1;
409                         goto out;
410                 }
411         }
412
413 #ifdef CONFIG_FTRACE_STARTUP_TEST
414         if (type->selftest) {
415                 struct tracer *saved_tracer = current_trace;
416                 struct trace_array_cpu *data;
417                 struct trace_array *tr = &global_trace;
418                 int saved_ctrl = tr->ctrl;
419                 int i;
420                 /*
421                  * Run a selftest on this tracer.
422                  * Here we reset the trace buffer, and set the current
423                  * tracer to be this tracer. The tracer can then run some
424                  * internal tracing to verify that everything is in order.
425                  * If we fail, we do not register this tracer.
426                  */
427                 for_each_possible_cpu(i) {
428                         data = tr->data[i];
429                         if (!head_page(data))
430                                 continue;
431                         tracing_reset(data);
432                 }
433                 current_trace = type;
434                 tr->ctrl = 0;
435                 /* the test is responsible for initializing and enabling */
436                 pr_info("Testing tracer %s: ", type->name);
437                 ret = type->selftest(type, tr);
438                 /* the test is responsible for resetting too */
439                 current_trace = saved_tracer;
440                 tr->ctrl = saved_ctrl;
441                 if (ret) {
442                         printk(KERN_CONT "FAILED!\n");
443                         goto out;
444                 }
445                 /* Only reset on passing, to avoid touching corrupted buffers */
446                 for_each_possible_cpu(i) {
447                         data = tr->data[i];
448                         if (!head_page(data))
449                                 continue;
450                         tracing_reset(data);
451                 }
452                 printk(KERN_CONT "PASSED\n");
453         }
454 #endif
455
456         type->next = trace_types;
457         trace_types = type;
458         len = strlen(type->name);
459         if (len > max_tracer_type_len)
460                 max_tracer_type_len = len;
461
462  out:
463         mutex_unlock(&trace_types_lock);
464
465         return ret;
466 }
467
468 void unregister_tracer(struct tracer *type)
469 {
470         struct tracer **t;
471         int len;
472
473         mutex_lock(&trace_types_lock);
474         for (t = &trace_types; *t; t = &(*t)->next) {
475                 if (*t == type)
476                         goto found;
477         }
478         pr_info("Trace %s not registered\n", type->name);
479         goto out;
480
481  found:
482         *t = (*t)->next;
483         if (strlen(type->name) != max_tracer_type_len)
484                 goto out;
485
486         max_tracer_type_len = 0;
487         for (t = &trace_types; *t; t = &(*t)->next) {
488                 len = strlen((*t)->name);
489                 if (len > max_tracer_type_len)
490                         max_tracer_type_len = len;
491         }
492  out:
493         mutex_unlock(&trace_types_lock);
494 }
495
496 void tracing_reset(struct trace_array_cpu *data)
497 {
498         data->trace_idx = 0;
499         data->trace_head = data->trace_tail = head_page(data);
500         data->trace_head_idx = 0;
501         data->trace_tail_idx = 0;
502 }
503
504 #define SAVED_CMDLINES 128
505 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
506 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
507 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
508 static int cmdline_idx;
509 static DEFINE_SPINLOCK(trace_cmdline_lock);
510 atomic_t trace_record_cmdline_disabled;
511
512 static void trace_init_cmdlines(void)
513 {
514         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
515         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
516         cmdline_idx = 0;
517 }
518
519 void trace_stop_cmdline_recording(void);
520
521 static void trace_save_cmdline(struct task_struct *tsk)
522 {
523         unsigned map;
524         unsigned idx;
525
526         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
527                 return;
528
529         /*
530          * It's not the end of the world if we don't get
531          * the lock, but we also don't want to spin
532          * nor do we want to disable interrupts,
533          * so if we miss here, then better luck next time.
534          */
535         if (!spin_trylock(&trace_cmdline_lock))
536                 return;
537
538         idx = map_pid_to_cmdline[tsk->pid];
539         if (idx >= SAVED_CMDLINES) {
540                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
541
542                 map = map_cmdline_to_pid[idx];
543                 if (map <= PID_MAX_DEFAULT)
544                         map_pid_to_cmdline[map] = (unsigned)-1;
545
546                 map_pid_to_cmdline[tsk->pid] = idx;
547
548                 cmdline_idx = idx;
549         }
550
551         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
552
553         spin_unlock(&trace_cmdline_lock);
554 }
555
556 static char *trace_find_cmdline(int pid)
557 {
558         char *cmdline = "<...>";
559         unsigned map;
560
561         if (!pid)
562                 return "<idle>";
563
564         if (pid > PID_MAX_DEFAULT)
565                 goto out;
566
567         map = map_pid_to_cmdline[pid];
568         if (map >= SAVED_CMDLINES)
569                 goto out;
570
571         cmdline = saved_cmdlines[map];
572
573  out:
574         return cmdline;
575 }
576
577 void tracing_record_cmdline(struct task_struct *tsk)
578 {
579         if (atomic_read(&trace_record_cmdline_disabled))
580                 return;
581
582         trace_save_cmdline(tsk);
583 }
584
585 static inline struct list_head *
586 trace_next_list(struct trace_array_cpu *data, struct list_head *next)
587 {
588         /*
589          * Roundrobin - but skip the head (which is not a real page):
590          */
591         next = next->next;
592         if (unlikely(next == &data->trace_pages))
593                 next = next->next;
594         BUG_ON(next == &data->trace_pages);
595
596         return next;
597 }
598
599 static inline void *
600 trace_next_page(struct trace_array_cpu *data, void *addr)
601 {
602         struct list_head *next;
603         struct page *page;
604
605         page = virt_to_page(addr);
606
607         next = trace_next_list(data, &page->lru);
608         page = list_entry(next, struct page, lru);
609
610         return page_address(page);
611 }
612
613 static inline struct trace_entry *
614 tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
615 {
616         unsigned long idx, idx_next;
617         struct trace_entry *entry;
618
619         data->trace_idx++;
620         idx = data->trace_head_idx;
621         idx_next = idx + 1;
622
623         BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
624
625         entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
626
627         if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
628                 data->trace_head = trace_next_page(data, data->trace_head);
629                 idx_next = 0;
630         }
631
632         if (data->trace_head == data->trace_tail &&
633             idx_next == data->trace_tail_idx) {
634                 /* overrun */
635                 data->trace_tail_idx++;
636                 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
637                         data->trace_tail =
638                                 trace_next_page(data, data->trace_tail);
639                         data->trace_tail_idx = 0;
640                 }
641         }
642
643         data->trace_head_idx = idx_next;
644
645         return entry;
646 }
647
648 static inline void
649 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
650 {
651         struct task_struct *tsk = current;
652         unsigned long pc;
653
654         pc = preempt_count();
655
656         entry->preempt_count    = pc & 0xff;
657         entry->pid              = tsk->pid;
658         entry->t                = ftrace_now(raw_smp_processor_id());
659         entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
660                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
661                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
662                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
663 }
664
665 void
666 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
667                unsigned long ip, unsigned long parent_ip, unsigned long flags)
668 {
669         struct trace_entry *entry;
670         unsigned long irq_flags;
671
672         spin_lock_irqsave(&data->lock, irq_flags);
673         entry                   = tracing_get_trace_entry(tr, data);
674         tracing_generic_entry_update(entry, flags);
675         entry->type             = TRACE_FN;
676         entry->fn.ip            = ip;
677         entry->fn.parent_ip     = parent_ip;
678         spin_unlock_irqrestore(&data->lock, irq_flags);
679
680         trace_wake_up();
681 }
682
683 void
684 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
685        unsigned long ip, unsigned long parent_ip, unsigned long flags)
686 {
687         if (likely(!atomic_read(&data->disabled)))
688                 trace_function(tr, data, ip, parent_ip, flags);
689 }
690
691 void
692 __trace_special(void *__tr, void *__data,
693                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
694 {
695         struct trace_array_cpu *data = __data;
696         struct trace_array *tr = __tr;
697         struct trace_entry *entry;
698         unsigned long irq_flags;
699
700         spin_lock_irqsave(&data->lock, irq_flags);
701         entry                   = tracing_get_trace_entry(tr, data);
702         tracing_generic_entry_update(entry, 0);
703         entry->type             = TRACE_SPECIAL;
704         entry->special.arg1     = arg1;
705         entry->special.arg2     = arg2;
706         entry->special.arg3     = arg3;
707         spin_unlock_irqrestore(&data->lock, irq_flags);
708
709         trace_wake_up();
710 }
711
712 void __trace_stack(struct trace_array *tr,
713                    struct trace_array_cpu *data,
714                    unsigned long flags,
715                    int skip)
716 {
717         struct trace_entry *entry;
718         struct stack_trace trace;
719
720         if (!(trace_flags & TRACE_ITER_STACKTRACE))
721                 return;
722
723         entry                   = tracing_get_trace_entry(tr, data);
724         tracing_generic_entry_update(entry, flags);
725         entry->type             = TRACE_STACK;
726
727         memset(&entry->stack, 0, sizeof(entry->stack));
728
729         trace.nr_entries        = 0;
730         trace.max_entries       = FTRACE_STACK_ENTRIES;
731         trace.skip              = skip;
732         trace.entries           = entry->stack.caller;
733
734         save_stack_trace(&trace);
735 }
736
737 void
738 tracing_sched_switch_trace(struct trace_array *tr,
739                            struct trace_array_cpu *data,
740                            struct task_struct *prev,
741                            struct task_struct *next,
742                            unsigned long flags)
743 {
744         struct trace_entry *entry;
745         unsigned long irq_flags;
746
747         spin_lock_irqsave(&data->lock, irq_flags);
748         entry                   = tracing_get_trace_entry(tr, data);
749         tracing_generic_entry_update(entry, flags);
750         entry->type             = TRACE_CTX;
751         entry->ctx.prev_pid     = prev->pid;
752         entry->ctx.prev_prio    = prev->prio;
753         entry->ctx.prev_state   = prev->state;
754         entry->ctx.next_pid     = next->pid;
755         entry->ctx.next_prio    = next->prio;
756         __trace_stack(tr, data, flags, 4);
757         spin_unlock_irqrestore(&data->lock, irq_flags);
758 }
759
760 void
761 tracing_sched_wakeup_trace(struct trace_array *tr,
762                            struct trace_array_cpu *data,
763                            struct task_struct *wakee,
764                            struct task_struct *curr,
765                            unsigned long flags)
766 {
767         struct trace_entry *entry;
768         unsigned long irq_flags;
769
770         spin_lock_irqsave(&data->lock, irq_flags);
771         entry                   = tracing_get_trace_entry(tr, data);
772         tracing_generic_entry_update(entry, flags);
773         entry->type             = TRACE_WAKE;
774         entry->ctx.prev_pid     = curr->pid;
775         entry->ctx.prev_prio    = curr->prio;
776         entry->ctx.prev_state   = curr->state;
777         entry->ctx.next_pid     = wakee->pid;
778         entry->ctx.next_prio    = wakee->prio;
779         __trace_stack(tr, data, flags, 5);
780         spin_unlock_irqrestore(&data->lock, irq_flags);
781
782         trace_wake_up();
783 }
784
785 #ifdef CONFIG_FTRACE
786 static void
787 function_trace_call(unsigned long ip, unsigned long parent_ip)
788 {
789         struct trace_array *tr = &global_trace;
790         struct trace_array_cpu *data;
791         unsigned long flags;
792         long disabled;
793         int cpu;
794
795         if (unlikely(!tracer_enabled))
796                 return;
797
798         local_irq_save(flags);
799         cpu = raw_smp_processor_id();
800         data = tr->data[cpu];
801         disabled = atomic_inc_return(&data->disabled);
802
803         if (likely(disabled == 1))
804                 trace_function(tr, data, ip, parent_ip, flags);
805
806         atomic_dec(&data->disabled);
807         local_irq_restore(flags);
808 }
809
810 static struct ftrace_ops trace_ops __read_mostly =
811 {
812         .func = function_trace_call,
813 };
814
815 void tracing_start_function_trace(void)
816 {
817         register_ftrace_function(&trace_ops);
818 }
819
820 void tracing_stop_function_trace(void)
821 {
822         unregister_ftrace_function(&trace_ops);
823 }
824 #endif
825
826 enum trace_file_type {
827         TRACE_FILE_LAT_FMT      = 1,
828 };
829
830 static struct trace_entry *
831 trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
832                 struct trace_iterator *iter, int cpu)
833 {
834         struct page *page;
835         struct trace_entry *array;
836
837         if (iter->next_idx[cpu] >= tr->entries ||
838             iter->next_idx[cpu] >= data->trace_idx ||
839             (data->trace_head == data->trace_tail &&
840              data->trace_head_idx == data->trace_tail_idx))
841                 return NULL;
842
843         if (!iter->next_page[cpu]) {
844                 /* Initialize the iterator for this cpu trace buffer */
845                 WARN_ON(!data->trace_tail);
846                 page = virt_to_page(data->trace_tail);
847                 iter->next_page[cpu] = &page->lru;
848                 iter->next_page_idx[cpu] = data->trace_tail_idx;
849         }
850
851         page = list_entry(iter->next_page[cpu], struct page, lru);
852         BUG_ON(&data->trace_pages == &page->lru);
853
854         array = page_address(page);
855
856         WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
857         return &array[iter->next_page_idx[cpu]];
858 }
859
860 static struct trace_entry *
861 find_next_entry(struct trace_iterator *iter, int *ent_cpu)
862 {
863         struct trace_array *tr = iter->tr;
864         struct trace_entry *ent, *next = NULL;
865         int next_cpu = -1;
866         int cpu;
867
868         for_each_possible_cpu(cpu) {
869                 if (!head_page(tr->data[cpu]))
870                         continue;
871                 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
872                 /*
873                  * Pick the entry with the smallest timestamp:
874                  */
875                 if (ent && (!next || ent->t < next->t)) {
876                         next = ent;
877                         next_cpu = cpu;
878                 }
879         }
880
881         if (ent_cpu)
882                 *ent_cpu = next_cpu;
883
884         return next;
885 }
886
887 static void trace_iterator_increment(struct trace_iterator *iter)
888 {
889         iter->idx++;
890         iter->next_idx[iter->cpu]++;
891         iter->next_page_idx[iter->cpu]++;
892
893         if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
894                 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
895
896                 iter->next_page_idx[iter->cpu] = 0;
897                 iter->next_page[iter->cpu] =
898                         trace_next_list(data, iter->next_page[iter->cpu]);
899         }
900 }
901
902 static void trace_consume(struct trace_iterator *iter)
903 {
904         struct trace_array_cpu *data = iter->tr->data[iter->cpu];
905
906         data->trace_tail_idx++;
907         if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
908                 data->trace_tail = trace_next_page(data, data->trace_tail);
909                 data->trace_tail_idx = 0;
910         }
911
912         /* Check if we empty it, then reset the index */
913         if (data->trace_head == data->trace_tail &&
914             data->trace_head_idx == data->trace_tail_idx)
915                 data->trace_idx = 0;
916 }
917
918 static void *find_next_entry_inc(struct trace_iterator *iter)
919 {
920         struct trace_entry *next;
921         int next_cpu = -1;
922
923         next = find_next_entry(iter, &next_cpu);
924
925         iter->prev_ent = iter->ent;
926         iter->prev_cpu = iter->cpu;
927
928         iter->ent = next;
929         iter->cpu = next_cpu;
930
931         if (next)
932                 trace_iterator_increment(iter);
933
934         return next ? iter : NULL;
935 }
936
937 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
938 {
939         struct trace_iterator *iter = m->private;
940         void *last_ent = iter->ent;
941         int i = (int)*pos;
942         void *ent;
943
944         (*pos)++;
945
946         /* can't go backwards */
947         if (iter->idx > i)
948                 return NULL;
949
950         if (iter->idx < 0)
951                 ent = find_next_entry_inc(iter);
952         else
953                 ent = iter;
954
955         while (ent && iter->idx < i)
956                 ent = find_next_entry_inc(iter);
957
958         iter->pos = *pos;
959
960         if (last_ent && !ent)
961                 seq_puts(m, "\n\nvim:ft=help\n");
962
963         return ent;
964 }
965
966 static void *s_start(struct seq_file *m, loff_t *pos)
967 {
968         struct trace_iterator *iter = m->private;
969         void *p = NULL;
970         loff_t l = 0;
971         int i;
972
973         mutex_lock(&trace_types_lock);
974
975         if (!current_trace || current_trace != iter->trace)
976                 return NULL;
977
978         atomic_inc(&trace_record_cmdline_disabled);
979
980         /* let the tracer grab locks here if needed */
981         if (current_trace->start)
982                 current_trace->start(iter);
983
984         if (*pos != iter->pos) {
985                 iter->ent = NULL;
986                 iter->cpu = 0;
987                 iter->idx = -1;
988                 iter->prev_ent = NULL;
989                 iter->prev_cpu = -1;
990
991                 for_each_possible_cpu(i) {
992                         iter->next_idx[i] = 0;
993                         iter->next_page[i] = NULL;
994                 }
995
996                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
997                         ;
998
999         } else {
1000                 l = *pos - 1;
1001                 p = s_next(m, p, &l);
1002         }
1003
1004         return p;
1005 }
1006
1007 static void s_stop(struct seq_file *m, void *p)
1008 {
1009         struct trace_iterator *iter = m->private;
1010
1011         atomic_dec(&trace_record_cmdline_disabled);
1012
1013         /* let the tracer release locks here if needed */
1014         if (current_trace && current_trace == iter->trace && iter->trace->stop)
1015                 iter->trace->stop(iter);
1016
1017         mutex_unlock(&trace_types_lock);
1018 }
1019
1020 static int
1021 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1022 {
1023 #ifdef CONFIG_KALLSYMS
1024         char str[KSYM_SYMBOL_LEN];
1025
1026         kallsyms_lookup(address, NULL, NULL, NULL, str);
1027
1028         return trace_seq_printf(s, fmt, str);
1029 #endif
1030         return 1;
1031 }
1032
1033 static int
1034 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1035                      unsigned long address)
1036 {
1037 #ifdef CONFIG_KALLSYMS
1038         char str[KSYM_SYMBOL_LEN];
1039
1040         sprint_symbol(str, address);
1041         return trace_seq_printf(s, fmt, str);
1042 #endif
1043         return 1;
1044 }
1045
1046 #ifndef CONFIG_64BIT
1047 # define IP_FMT "%08lx"
1048 #else
1049 # define IP_FMT "%016lx"
1050 #endif
1051
1052 static int
1053 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1054 {
1055         int ret;
1056
1057         if (!ip)
1058                 return trace_seq_printf(s, "0");
1059
1060         if (sym_flags & TRACE_ITER_SYM_OFFSET)
1061                 ret = seq_print_sym_offset(s, "%s", ip);
1062         else
1063                 ret = seq_print_sym_short(s, "%s", ip);
1064
1065         if (!ret)
1066                 return 0;
1067
1068         if (sym_flags & TRACE_ITER_SYM_ADDR)
1069                 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1070         return ret;
1071 }
1072
1073 static void print_lat_help_header(struct seq_file *m)
1074 {
1075         seq_puts(m, "#                _------=> CPU#            \n");
1076         seq_puts(m, "#               / _-----=> irqs-off        \n");
1077         seq_puts(m, "#              | / _----=> need-resched    \n");
1078         seq_puts(m, "#              || / _---=> hardirq/softirq \n");
1079         seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
1080         seq_puts(m, "#              |||| /                      \n");
1081         seq_puts(m, "#              |||||     delay             \n");
1082         seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
1083         seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
1084 }
1085
1086 static void print_func_help_header(struct seq_file *m)
1087 {
1088         seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
1089         seq_puts(m, "#              | |      |          |         |\n");
1090 }
1091
1092
1093 static void
1094 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1095 {
1096         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1097         struct trace_array *tr = iter->tr;
1098         struct trace_array_cpu *data = tr->data[tr->cpu];
1099         struct tracer *type = current_trace;
1100         unsigned long total   = 0;
1101         unsigned long entries = 0;
1102         int cpu;
1103         const char *name = "preemption";
1104
1105         if (type)
1106                 name = type->name;
1107
1108         for_each_possible_cpu(cpu) {
1109                 if (head_page(tr->data[cpu])) {
1110                         total += tr->data[cpu]->trace_idx;
1111                         if (tr->data[cpu]->trace_idx > tr->entries)
1112                                 entries += tr->entries;
1113                         else
1114                                 entries += tr->data[cpu]->trace_idx;
1115                 }
1116         }
1117
1118         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1119                    name, UTS_RELEASE);
1120         seq_puts(m, "-----------------------------------"
1121                  "---------------------------------\n");
1122         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1123                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1124                    nsecs_to_usecs(data->saved_latency),
1125                    entries,
1126                    total,
1127                    tr->cpu,
1128 #if defined(CONFIG_PREEMPT_NONE)
1129                    "server",
1130 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1131                    "desktop",
1132 #elif defined(CONFIG_PREEMPT_DESKTOP)
1133                    "preempt",
1134 #else
1135                    "unknown",
1136 #endif
1137                    /* These are reserved for later use */
1138                    0, 0, 0, 0);
1139 #ifdef CONFIG_SMP
1140         seq_printf(m, " #P:%d)\n", num_online_cpus());
1141 #else
1142         seq_puts(m, ")\n");
1143 #endif
1144         seq_puts(m, "    -----------------\n");
1145         seq_printf(m, "    | task: %.16s-%d "
1146                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1147                    data->comm, data->pid, data->uid, data->nice,
1148                    data->policy, data->rt_priority);
1149         seq_puts(m, "    -----------------\n");
1150
1151         if (data->critical_start) {
1152                 seq_puts(m, " => started at: ");
1153                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1154                 trace_print_seq(m, &iter->seq);
1155                 seq_puts(m, "\n => ended at:   ");
1156                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1157                 trace_print_seq(m, &iter->seq);
1158                 seq_puts(m, "\n");
1159         }
1160
1161         seq_puts(m, "\n");
1162 }
1163
1164 static void
1165 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1166 {
1167         int hardirq, softirq;
1168         char *comm;
1169
1170         comm = trace_find_cmdline(entry->pid);
1171
1172         trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1173         trace_seq_printf(s, "%d", cpu);
1174         trace_seq_printf(s, "%c%c",
1175                         (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1176                         ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1177
1178         hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1179         softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1180         if (hardirq && softirq)
1181                 trace_seq_putc(s, 'H');
1182         else {
1183                 if (hardirq)
1184                         trace_seq_putc(s, 'h');
1185                 else {
1186                         if (softirq)
1187                                 trace_seq_putc(s, 's');
1188                         else
1189                                 trace_seq_putc(s, '.');
1190                 }
1191         }
1192
1193         if (entry->preempt_count)
1194                 trace_seq_printf(s, "%x", entry->preempt_count);
1195         else
1196                 trace_seq_puts(s, ".");
1197 }
1198
1199 unsigned long preempt_mark_thresh = 100;
1200
1201 static void
1202 lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1203                     unsigned long rel_usecs)
1204 {
1205         trace_seq_printf(s, " %4lldus", abs_usecs);
1206         if (rel_usecs > preempt_mark_thresh)
1207                 trace_seq_puts(s, "!: ");
1208         else if (rel_usecs > 1)
1209                 trace_seq_puts(s, "+: ");
1210         else
1211                 trace_seq_puts(s, " : ");
1212 }
1213
1214 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1215
1216 static int
1217 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1218 {
1219         struct trace_seq *s = &iter->seq;
1220         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1221         struct trace_entry *next_entry = find_next_entry(iter, NULL);
1222         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1223         struct trace_entry *entry = iter->ent;
1224         unsigned long abs_usecs;
1225         unsigned long rel_usecs;
1226         char *comm;
1227         int S;
1228         int i;
1229
1230         if (!next_entry)
1231                 next_entry = entry;
1232         rel_usecs = ns2usecs(next_entry->t - entry->t);
1233         abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1234
1235         if (verbose) {
1236                 comm = trace_find_cmdline(entry->pid);
1237                 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1238                                  " %ld.%03ldms (+%ld.%03ldms): ",
1239                                  comm,
1240                                  entry->pid, cpu, entry->flags,
1241                                  entry->preempt_count, trace_idx,
1242                                  ns2usecs(entry->t),
1243                                  abs_usecs/1000,
1244                                  abs_usecs % 1000, rel_usecs/1000,
1245                                  rel_usecs % 1000);
1246         } else {
1247                 if (entry->type != TRACE_STACK) {
1248                         lat_print_generic(s, entry, cpu);
1249                         lat_print_timestamp(s, abs_usecs, rel_usecs);
1250                 }
1251         }
1252         switch (entry->type) {
1253         case TRACE_FN:
1254                 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1255                 trace_seq_puts(s, " (");
1256                 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1257                 trace_seq_puts(s, ")\n");
1258                 break;
1259         case TRACE_CTX:
1260         case TRACE_WAKE:
1261                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1262                         state_to_char[entry->ctx.prev_state] : 'X';
1263                 comm = trace_find_cmdline(entry->ctx.next_pid);
1264                 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d %s\n",
1265                                  entry->ctx.prev_pid,
1266                                  entry->ctx.prev_prio,
1267                                  S, entry->type == TRACE_CTX ? "==>" : "  +",
1268                                  entry->ctx.next_pid,
1269                                  entry->ctx.next_prio,
1270                                  comm);
1271                 break;
1272         case TRACE_SPECIAL:
1273                 trace_seq_printf(s, " %ld %ld %ld\n",
1274                                  entry->special.arg1,
1275                                  entry->special.arg2,
1276                                  entry->special.arg3);
1277                 break;
1278         case TRACE_STACK:
1279                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1280                         if (i)
1281                                 trace_seq_puts(s, " <= ");
1282                         seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1283                 }
1284                 trace_seq_puts(s, "\n");
1285                 break;
1286         default:
1287                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1288         }
1289         return 1;
1290 }
1291
1292 static int print_trace_fmt(struct trace_iterator *iter)
1293 {
1294         struct trace_seq *s = &iter->seq;
1295         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1296         struct trace_entry *entry;
1297         unsigned long usec_rem;
1298         unsigned long long t;
1299         unsigned long secs;
1300         char *comm;
1301         int ret;
1302         int S;
1303         int i;
1304
1305         entry = iter->ent;
1306
1307         comm = trace_find_cmdline(iter->ent->pid);
1308
1309         t = ns2usecs(entry->t);
1310         usec_rem = do_div(t, 1000000ULL);
1311         secs = (unsigned long)t;
1312
1313         if (entry->type != TRACE_STACK) {
1314                 ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1315                 if (!ret)
1316                         return 0;
1317                 ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1318                 if (!ret)
1319                         return 0;
1320                 ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1321                 if (!ret)
1322                         return 0;
1323         }
1324
1325         switch (entry->type) {
1326         case TRACE_FN:
1327                 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1328                 if (!ret)
1329                         return 0;
1330                 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1331                                                 entry->fn.parent_ip) {
1332                         ret = trace_seq_printf(s, " <-");
1333                         if (!ret)
1334                                 return 0;
1335                         ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1336                                                sym_flags);
1337                         if (!ret)
1338                                 return 0;
1339                 }
1340                 ret = trace_seq_printf(s, "\n");
1341                 if (!ret)
1342                         return 0;
1343                 break;
1344         case TRACE_CTX:
1345         case TRACE_WAKE:
1346                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1347                         state_to_char[entry->ctx.prev_state] : 'X';
1348                 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d\n",
1349                                        entry->ctx.prev_pid,
1350                                        entry->ctx.prev_prio,
1351                                        S,
1352                                        entry->type == TRACE_CTX ? "==>" : "  +",
1353                                        entry->ctx.next_pid,
1354                                        entry->ctx.next_prio);
1355                 if (!ret)
1356                         return 0;
1357                 break;
1358         case TRACE_SPECIAL:
1359                 ret = trace_seq_printf(s, " %ld %ld %ld\n",
1360                                  entry->special.arg1,
1361                                  entry->special.arg2,
1362                                  entry->special.arg3);
1363                 if (!ret)
1364                         return 0;
1365                 break;
1366         case TRACE_STACK:
1367                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1368                         if (i) {
1369                                 ret = trace_seq_puts(s, " <= ");
1370                                 if (!ret)
1371                                         return 0;
1372                         }
1373                         ret = seq_print_ip_sym(s, entry->stack.caller[i],
1374                                                sym_flags);
1375                         if (!ret)
1376                                 return 0;
1377                 }
1378                 ret = trace_seq_puts(s, "\n");
1379                 if (!ret)
1380                         return 0;
1381                 break;
1382         }
1383         return 1;
1384 }
1385
1386 static int print_raw_fmt(struct trace_iterator *iter)
1387 {
1388         struct trace_seq *s = &iter->seq;
1389         struct trace_entry *entry;
1390         int ret;
1391         int S;
1392
1393         entry = iter->ent;
1394
1395         ret = trace_seq_printf(s, "%d %d %llu ",
1396                 entry->pid, iter->cpu, entry->t);
1397         if (!ret)
1398                 return 0;
1399
1400         switch (entry->type) {
1401         case TRACE_FN:
1402                 ret = trace_seq_printf(s, "%x %x\n",
1403                                         entry->fn.ip, entry->fn.parent_ip);
1404                 if (!ret)
1405                         return 0;
1406                 break;
1407         case TRACE_CTX:
1408         case TRACE_WAKE:
1409                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1410                         state_to_char[entry->ctx.prev_state] : 'X';
1411                 if (entry->type == TRACE_WAKE)
1412                         S = '+';
1413                 ret = trace_seq_printf(s, "%d %d %c %d %d\n",
1414                                        entry->ctx.prev_pid,
1415                                        entry->ctx.prev_prio,
1416                                        S,
1417                                        entry->ctx.next_pid,
1418                                        entry->ctx.next_prio);
1419                 if (!ret)
1420                         return 0;
1421                 break;
1422         case TRACE_SPECIAL:
1423         case TRACE_STACK:
1424                 ret = trace_seq_printf(s, " %ld %ld %ld\n",
1425                                  entry->special.arg1,
1426                                  entry->special.arg2,
1427                                  entry->special.arg3);
1428                 if (!ret)
1429                         return 0;
1430                 break;
1431         }
1432         return 1;
1433 }
1434
1435 #define SEQ_PUT_FIELD_RET(s, x)                         \
1436 do {                                                    \
1437         if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
1438                 return 0;                               \
1439 } while (0)
1440
1441 #define SEQ_PUT_HEX_FIELD_RET(s, x)                     \
1442 do {                                                    \
1443         if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
1444                 return 0;                               \
1445 } while (0)
1446
1447 static int print_hex_fmt(struct trace_iterator *iter)
1448 {
1449         struct trace_seq *s = &iter->seq;
1450         unsigned char newline = '\n';
1451         struct trace_entry *entry;
1452         int S;
1453
1454         entry = iter->ent;
1455
1456         SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1457         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1458         SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1459
1460         switch (entry->type) {
1461         case TRACE_FN:
1462                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1463                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1464                 break;
1465         case TRACE_CTX:
1466         case TRACE_WAKE:
1467                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1468                         state_to_char[entry->ctx.prev_state] : 'X';
1469                 if (entry->type == TRACE_WAKE)
1470                         S = '+';
1471                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1472                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1473                 SEQ_PUT_HEX_FIELD_RET(s, S);
1474                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1475                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1476                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1477                 break;
1478         case TRACE_SPECIAL:
1479         case TRACE_STACK:
1480                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1481                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1482                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1483                 break;
1484         }
1485         SEQ_PUT_FIELD_RET(s, newline);
1486
1487         return 1;
1488 }
1489
1490 static int print_bin_fmt(struct trace_iterator *iter)
1491 {
1492         struct trace_seq *s = &iter->seq;
1493         struct trace_entry *entry;
1494
1495         entry = iter->ent;
1496
1497         SEQ_PUT_FIELD_RET(s, entry->pid);
1498         SEQ_PUT_FIELD_RET(s, entry->cpu);
1499         SEQ_PUT_FIELD_RET(s, entry->t);
1500
1501         switch (entry->type) {
1502         case TRACE_FN:
1503                 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1504                 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1505                 break;
1506         case TRACE_CTX:
1507                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1508                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1509                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1510                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1511                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1512                 break;
1513         case TRACE_SPECIAL:
1514         case TRACE_STACK:
1515                 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1516                 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1517                 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1518                 break;
1519         }
1520         return 1;
1521 }
1522
1523 static int trace_empty(struct trace_iterator *iter)
1524 {
1525         struct trace_array_cpu *data;
1526         int cpu;
1527
1528         for_each_possible_cpu(cpu) {
1529                 data = iter->tr->data[cpu];
1530
1531                 if (head_page(data) && data->trace_idx &&
1532                     (data->trace_tail != data->trace_head ||
1533                      data->trace_tail_idx != data->trace_head_idx))
1534                         return 0;
1535         }
1536         return 1;
1537 }
1538
1539 static int print_trace_line(struct trace_iterator *iter)
1540 {
1541         if (trace_flags & TRACE_ITER_BIN)
1542                 return print_bin_fmt(iter);
1543
1544         if (trace_flags & TRACE_ITER_HEX)
1545                 return print_hex_fmt(iter);
1546
1547         if (trace_flags & TRACE_ITER_RAW)
1548                 return print_raw_fmt(iter);
1549
1550         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1551                 return print_lat_fmt(iter, iter->idx, iter->cpu);
1552
1553         return print_trace_fmt(iter);
1554 }
1555
1556 static int s_show(struct seq_file *m, void *v)
1557 {
1558         struct trace_iterator *iter = v;
1559
1560         if (iter->ent == NULL) {
1561                 if (iter->tr) {
1562                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1563                         seq_puts(m, "#\n");
1564                 }
1565                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1566                         /* print nothing if the buffers are empty */
1567                         if (trace_empty(iter))
1568                                 return 0;
1569                         print_trace_header(m, iter);
1570                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1571                                 print_lat_help_header(m);
1572                 } else {
1573                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1574                                 print_func_help_header(m);
1575                 }
1576         } else {
1577                 print_trace_line(iter);
1578                 trace_print_seq(m, &iter->seq);
1579         }
1580
1581         return 0;
1582 }
1583
1584 static struct seq_operations tracer_seq_ops = {
1585         .start          = s_start,
1586         .next           = s_next,
1587         .stop           = s_stop,
1588         .show           = s_show,
1589 };
1590
1591 static struct trace_iterator *
1592 __tracing_open(struct inode *inode, struct file *file, int *ret)
1593 {
1594         struct trace_iterator *iter;
1595
1596         if (tracing_disabled) {
1597                 *ret = -ENODEV;
1598                 return NULL;
1599         }
1600
1601         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1602         if (!iter) {
1603                 *ret = -ENOMEM;
1604                 goto out;
1605         }
1606
1607         mutex_lock(&trace_types_lock);
1608         if (current_trace && current_trace->print_max)
1609                 iter->tr = &max_tr;
1610         else
1611                 iter->tr = inode->i_private;
1612         iter->trace = current_trace;
1613         iter->pos = -1;
1614
1615         /* TODO stop tracer */
1616         *ret = seq_open(file, &tracer_seq_ops);
1617         if (!*ret) {
1618                 struct seq_file *m = file->private_data;
1619                 m->private = iter;
1620
1621                 /* stop the trace while dumping */
1622                 if (iter->tr->ctrl)
1623                         tracer_enabled = 0;
1624
1625                 if (iter->trace && iter->trace->open)
1626                         iter->trace->open(iter);
1627         } else {
1628                 kfree(iter);
1629                 iter = NULL;
1630         }
1631         mutex_unlock(&trace_types_lock);
1632
1633  out:
1634         return iter;
1635 }
1636
1637 int tracing_open_generic(struct inode *inode, struct file *filp)
1638 {
1639         if (tracing_disabled)
1640                 return -ENODEV;
1641
1642         filp->private_data = inode->i_private;
1643         return 0;
1644 }
1645
1646 int tracing_release(struct inode *inode, struct file *file)
1647 {
1648         struct seq_file *m = (struct seq_file *)file->private_data;
1649         struct trace_iterator *iter = m->private;
1650
1651         mutex_lock(&trace_types_lock);
1652         if (iter->trace && iter->trace->close)
1653                 iter->trace->close(iter);
1654
1655         /* reenable tracing if it was previously enabled */
1656         if (iter->tr->ctrl)
1657                 tracer_enabled = 1;
1658         mutex_unlock(&trace_types_lock);
1659
1660         seq_release(inode, file);
1661         kfree(iter);
1662         return 0;
1663 }
1664
1665 static int tracing_open(struct inode *inode, struct file *file)
1666 {
1667         int ret;
1668
1669         __tracing_open(inode, file, &ret);
1670
1671         return ret;
1672 }
1673
1674 static int tracing_lt_open(struct inode *inode, struct file *file)
1675 {
1676         struct trace_iterator *iter;
1677         int ret;
1678
1679         iter = __tracing_open(inode, file, &ret);
1680
1681         if (!ret)
1682                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1683
1684         return ret;
1685 }
1686
1687
1688 static void *
1689 t_next(struct seq_file *m, void *v, loff_t *pos)
1690 {
1691         struct tracer *t = m->private;
1692
1693         (*pos)++;
1694
1695         if (t)
1696                 t = t->next;
1697
1698         m->private = t;
1699
1700         return t;
1701 }
1702
1703 static void *t_start(struct seq_file *m, loff_t *pos)
1704 {
1705         struct tracer *t = m->private;
1706         loff_t l = 0;
1707
1708         mutex_lock(&trace_types_lock);
1709         for (; t && l < *pos; t = t_next(m, t, &l))
1710                 ;
1711
1712         return t;
1713 }
1714
1715 static void t_stop(struct seq_file *m, void *p)
1716 {
1717         mutex_unlock(&trace_types_lock);
1718 }
1719
1720 static int t_show(struct seq_file *m, void *v)
1721 {
1722         struct tracer *t = v;
1723
1724         if (!t)
1725                 return 0;
1726
1727         seq_printf(m, "%s", t->name);
1728         if (t->next)
1729                 seq_putc(m, ' ');
1730         else
1731                 seq_putc(m, '\n');
1732
1733         return 0;
1734 }
1735
1736 static struct seq_operations show_traces_seq_ops = {
1737         .start          = t_start,
1738         .next           = t_next,
1739         .stop           = t_stop,
1740         .show           = t_show,
1741 };
1742
1743 static int show_traces_open(struct inode *inode, struct file *file)
1744 {
1745         int ret;
1746
1747         if (tracing_disabled)
1748                 return -ENODEV;
1749
1750         ret = seq_open(file, &show_traces_seq_ops);
1751         if (!ret) {
1752                 struct seq_file *m = file->private_data;
1753                 m->private = trace_types;
1754         }
1755
1756         return ret;
1757 }
1758
1759 static struct file_operations tracing_fops = {
1760         .open           = tracing_open,
1761         .read           = seq_read,
1762         .llseek         = seq_lseek,
1763         .release        = tracing_release,
1764 };
1765
1766 static struct file_operations tracing_lt_fops = {
1767         .open           = tracing_lt_open,
1768         .read           = seq_read,
1769         .llseek         = seq_lseek,
1770         .release        = tracing_release,
1771 };
1772
1773 static struct file_operations show_traces_fops = {
1774         .open           = show_traces_open,
1775         .read           = seq_read,
1776         .release        = seq_release,
1777 };
1778
1779 static ssize_t
1780 tracing_cpumask_read(struct file *filp, char __user *ubuf,
1781                      size_t count, loff_t *ppos)
1782 {
1783         int err;
1784
1785         count = min(count, (size_t)NR_CPUS);
1786
1787         mutex_lock(&tracing_cpumask_update_lock);
1788         cpumask_scnprintf(mask_str, NR_CPUS, tracing_cpumask);
1789         err = copy_to_user(ubuf, mask_str, count);
1790         if (err)
1791                 count = -EFAULT;
1792         mutex_unlock(&tracing_cpumask_update_lock);
1793
1794         return count;
1795 }
1796
1797 static ssize_t
1798 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
1799                       size_t count, loff_t *ppos)
1800 {
1801         int err;
1802
1803         mutex_lock(&tracing_cpumask_update_lock);
1804         err = cpumask_parse_user(ubuf, count, tracing_cpumask);
1805         mutex_unlock(&tracing_cpumask_update_lock);
1806
1807         if (err)
1808                 return err;
1809
1810         return count;
1811 }
1812
1813 static struct file_operations tracing_cpumask_fops = {
1814         .open           = tracing_open_generic,
1815         .read           = tracing_cpumask_read,
1816         .write          = tracing_cpumask_write,
1817 };
1818
1819 static ssize_t
1820 tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
1821                        size_t cnt, loff_t *ppos)
1822 {
1823         char *buf;
1824         int r = 0;
1825         int len = 0;
1826         int i;
1827
1828         /* calulate max size */
1829         for (i = 0; trace_options[i]; i++) {
1830                 len += strlen(trace_options[i]);
1831                 len += 3; /* "no" and space */
1832         }
1833
1834         /* +2 for \n and \0 */
1835         buf = kmalloc(len + 2, GFP_KERNEL);
1836         if (!buf)
1837                 return -ENOMEM;
1838
1839         for (i = 0; trace_options[i]; i++) {
1840                 if (trace_flags & (1 << i))
1841                         r += sprintf(buf + r, "%s ", trace_options[i]);
1842                 else
1843                         r += sprintf(buf + r, "no%s ", trace_options[i]);
1844         }
1845
1846         r += sprintf(buf + r, "\n");
1847         WARN_ON(r >= len + 2);
1848
1849         r = simple_read_from_buffer(ubuf, cnt, ppos,
1850                                     buf, r);
1851
1852         kfree(buf);
1853
1854         return r;
1855 }
1856
1857 static ssize_t
1858 tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
1859                         size_t cnt, loff_t *ppos)
1860 {
1861         char buf[64];
1862         char *cmp = buf;
1863         int neg = 0;
1864         int i;
1865
1866         if (cnt > 63)
1867                 cnt = 63;
1868
1869         if (copy_from_user(&buf, ubuf, cnt))
1870                 return -EFAULT;
1871
1872         buf[cnt] = 0;
1873
1874         if (strncmp(buf, "no", 2) == 0) {
1875                 neg = 1;
1876                 cmp += 2;
1877         }
1878
1879         for (i = 0; trace_options[i]; i++) {
1880                 int len = strlen(trace_options[i]);
1881
1882                 if (strncmp(cmp, trace_options[i], len) == 0) {
1883                         if (neg)
1884                                 trace_flags &= ~(1 << i);
1885                         else
1886                                 trace_flags |= (1 << i);
1887                         break;
1888                 }
1889         }
1890
1891         filp->f_pos += cnt;
1892
1893         return cnt;
1894 }
1895
1896 static struct file_operations tracing_iter_fops = {
1897         .open           = tracing_open_generic,
1898         .read           = tracing_iter_ctrl_read,
1899         .write          = tracing_iter_ctrl_write,
1900 };
1901
1902 static const char readme_msg[] =
1903         "tracing mini-HOWTO:\n\n"
1904         "# mkdir /debug\n"
1905         "# mount -t debugfs nodev /debug\n\n"
1906         "# cat /debug/tracing/available_tracers\n"
1907         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
1908         "# cat /debug/tracing/current_tracer\n"
1909         "none\n"
1910         "# echo sched_switch > /debug/tracing/current_tracer\n"
1911         "# cat /debug/tracing/current_tracer\n"
1912         "sched_switch\n"
1913         "# cat /debug/tracing/iter_ctrl\n"
1914         "noprint-parent nosym-offset nosym-addr noverbose\n"
1915         "# echo print-parent > /debug/tracing/iter_ctrl\n"
1916         "# echo 1 > /debug/tracing/tracing_enabled\n"
1917         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
1918         "echo 0 > /debug/tracing/tracing_enabled\n"
1919 ;
1920
1921 static ssize_t
1922 tracing_readme_read(struct file *filp, char __user *ubuf,
1923                        size_t cnt, loff_t *ppos)
1924 {
1925         return simple_read_from_buffer(ubuf, cnt, ppos,
1926                                         readme_msg, strlen(readme_msg));
1927 }
1928
1929 static struct file_operations tracing_readme_fops = {
1930         .open           = tracing_open_generic,
1931         .read           = tracing_readme_read,
1932 };
1933
1934 static ssize_t
1935 tracing_ctrl_read(struct file *filp, char __user *ubuf,
1936                   size_t cnt, loff_t *ppos)
1937 {
1938         struct trace_array *tr = filp->private_data;
1939         char buf[64];
1940         int r;
1941
1942         r = sprintf(buf, "%ld\n", tr->ctrl);
1943         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1944 }
1945
1946 static ssize_t
1947 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
1948                    size_t cnt, loff_t *ppos)
1949 {
1950         struct trace_array *tr = filp->private_data;
1951         long val;
1952         char buf[64];
1953
1954         if (cnt > 63)
1955                 cnt = 63;
1956
1957         if (copy_from_user(&buf, ubuf, cnt))
1958                 return -EFAULT;
1959
1960         buf[cnt] = 0;
1961
1962         val = simple_strtoul(buf, NULL, 10);
1963
1964         val = !!val;
1965
1966         mutex_lock(&trace_types_lock);
1967         if (tr->ctrl ^ val) {
1968                 if (val)
1969                         tracer_enabled = 1;
1970                 else
1971                         tracer_enabled = 0;
1972
1973                 tr->ctrl = val;
1974
1975                 if (current_trace && current_trace->ctrl_update)
1976                         current_trace->ctrl_update(tr);
1977         }
1978         mutex_unlock(&trace_types_lock);
1979
1980         filp->f_pos += cnt;
1981
1982         return cnt;
1983 }
1984
1985 static ssize_t
1986 tracing_set_trace_read(struct file *filp, char __user *ubuf,
1987                        size_t cnt, loff_t *ppos)
1988 {
1989         char buf[max_tracer_type_len+2];
1990         int r;
1991
1992         mutex_lock(&trace_types_lock);
1993         if (current_trace)
1994                 r = sprintf(buf, "%s\n", current_trace->name);
1995         else
1996                 r = sprintf(buf, "\n");
1997         mutex_unlock(&trace_types_lock);
1998
1999         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2000 }
2001
2002 static ssize_t
2003 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2004                         size_t cnt, loff_t *ppos)
2005 {
2006         struct trace_array *tr = &global_trace;
2007         struct tracer *t;
2008         char buf[max_tracer_type_len+1];
2009         int i;
2010
2011         if (cnt > max_tracer_type_len)
2012                 cnt = max_tracer_type_len;
2013
2014         if (copy_from_user(&buf, ubuf, cnt))
2015                 return -EFAULT;
2016
2017         buf[cnt] = 0;
2018
2019         /* strip ending whitespace. */
2020         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2021                 buf[i] = 0;
2022
2023         mutex_lock(&trace_types_lock);
2024         for (t = trace_types; t; t = t->next) {
2025                 if (strcmp(t->name, buf) == 0)
2026                         break;
2027         }
2028         if (!t || t == current_trace)
2029                 goto out;
2030
2031         if (current_trace && current_trace->reset)
2032                 current_trace->reset(tr);
2033
2034         current_trace = t;
2035         if (t->init)
2036                 t->init(tr);
2037
2038  out:
2039         mutex_unlock(&trace_types_lock);
2040
2041         filp->f_pos += cnt;
2042
2043         return cnt;
2044 }
2045
2046 static ssize_t
2047 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2048                      size_t cnt, loff_t *ppos)
2049 {
2050         unsigned long *ptr = filp->private_data;
2051         char buf[64];
2052         int r;
2053
2054         r = snprintf(buf, 64, "%ld\n",
2055                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2056         if (r > 64)
2057                 r = 64;
2058         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2059 }
2060
2061 static ssize_t
2062 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2063                       size_t cnt, loff_t *ppos)
2064 {
2065         long *ptr = filp->private_data;
2066         long val;
2067         char buf[64];
2068
2069         if (cnt > 63)
2070                 cnt = 63;
2071
2072         if (copy_from_user(&buf, ubuf, cnt))
2073                 return -EFAULT;
2074
2075         buf[cnt] = 0;
2076
2077         val = simple_strtoul(buf, NULL, 10);
2078
2079         *ptr = val * 1000;
2080
2081         return cnt;
2082 }
2083
2084 static atomic_t tracing_reader;
2085
2086 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2087 {
2088         struct trace_iterator *iter;
2089
2090         if (tracing_disabled)
2091                 return -ENODEV;
2092
2093         /* We only allow for reader of the pipe */
2094         if (atomic_inc_return(&tracing_reader) != 1) {
2095                 atomic_dec(&tracing_reader);
2096                 return -EBUSY;
2097         }
2098
2099         /* create a buffer to store the information to pass to userspace */
2100         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2101         if (!iter)
2102                 return -ENOMEM;
2103
2104         iter->tr = &global_trace;
2105
2106         filp->private_data = iter;
2107
2108         return 0;
2109 }
2110
2111 static int tracing_release_pipe(struct inode *inode, struct file *file)
2112 {
2113         struct trace_iterator *iter = file->private_data;
2114
2115         kfree(iter);
2116         atomic_dec(&tracing_reader);
2117
2118         return 0;
2119 }
2120
2121 static unsigned int
2122 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2123 {
2124         struct trace_iterator *iter = filp->private_data;
2125
2126         if (trace_flags & TRACE_ITER_BLOCK) {
2127                 /*
2128                  * Always select as readable when in blocking mode
2129                  */
2130                 return POLLIN | POLLRDNORM;
2131         }
2132         else {
2133                 if (!trace_empty(iter))
2134                         return POLLIN | POLLRDNORM;
2135                 poll_wait(filp, &trace_wait, poll_table);
2136                 if (!trace_empty(iter))
2137                         return POLLIN | POLLRDNORM;
2138
2139                 return 0;
2140         }
2141 }
2142
2143 /*
2144  * Consumer reader.
2145  */
2146 static ssize_t
2147 tracing_read_pipe(struct file *filp, char __user *ubuf,
2148                   size_t cnt, loff_t *ppos)
2149 {
2150         struct trace_iterator *iter = filp->private_data;
2151         struct trace_array_cpu *data;
2152         static cpumask_t mask;
2153         static int start;
2154         unsigned long flags;
2155 #ifdef CONFIG_FTRACE
2156         int ftrace_save;
2157 #endif
2158         int read = 0;
2159         int cpu;
2160         int len;
2161         int ret;
2162
2163         /* return any leftover data */
2164         if (iter->seq.len > start) {
2165                 len = iter->seq.len - start;
2166                 if (cnt > len)
2167                         cnt = len;
2168                 ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt);
2169                 if (ret)
2170                         cnt = -EFAULT;
2171
2172                 start += len;
2173
2174                 return cnt;
2175         }
2176
2177         trace_seq_reset(&iter->seq);
2178         start = 0;
2179
2180         while (trace_empty(iter)) {
2181                 if (!(trace_flags & TRACE_ITER_BLOCK))
2182                         return -EWOULDBLOCK;
2183                 /*
2184                  * This is a make-shift waitqueue. The reason we don't use
2185                  * an actual wait queue is because:
2186                  *  1) we only ever have one waiter
2187                  *  2) the tracing, traces all functions, we don't want
2188                  *     the overhead of calling wake_up and friends
2189                  *     (and tracing them too)
2190                  *     Anyway, this is really very primitive wakeup.
2191                  */
2192                 set_current_state(TASK_INTERRUPTIBLE);
2193                 iter->tr->waiter = current;
2194
2195                 /* sleep for one second, and try again. */
2196                 schedule_timeout(HZ);
2197
2198                 iter->tr->waiter = NULL;
2199
2200                 if (signal_pending(current))
2201                         return -EINTR;
2202
2203                 /*
2204                  * We block until we read something and tracing is disabled.
2205                  * We still block if tracing is disabled, but we have never
2206                  * read anything. This allows a user to cat this file, and
2207                  * then enable tracing. But after we have read something,
2208                  * we give an EOF when tracing is again disabled.
2209                  *
2210                  * iter->pos will be 0 if we haven't read anything.
2211                  */
2212                 if (!tracer_enabled && iter->pos)
2213                         break;
2214
2215                 continue;
2216         }
2217
2218         /* stop when tracing is finished */
2219         if (trace_empty(iter))
2220                 return 0;
2221
2222         if (cnt >= PAGE_SIZE)
2223                 cnt = PAGE_SIZE - 1;
2224
2225         memset(iter, 0, sizeof(*iter));
2226         iter->tr = &global_trace;
2227         iter->pos = -1;
2228
2229         /*
2230          * We need to stop all tracing on all CPUS to read the
2231          * the next buffer. This is a bit expensive, but is
2232          * not done often. We fill all what we can read,
2233          * and then release the locks again.
2234          */
2235
2236         cpus_clear(mask);
2237         local_irq_save(flags);
2238 #ifdef CONFIG_FTRACE
2239         ftrace_save = ftrace_enabled;
2240         ftrace_enabled = 0;
2241 #endif
2242         smp_wmb();
2243         for_each_possible_cpu(cpu) {
2244                 data = iter->tr->data[cpu];
2245
2246                 if (!head_page(data) || !data->trace_idx)
2247                         continue;
2248
2249                 atomic_inc(&data->disabled);
2250                 cpu_set(cpu, mask);
2251         }
2252
2253         for_each_cpu_mask(cpu, mask) {
2254                 data = iter->tr->data[cpu];
2255                 spin_lock(&data->lock);
2256         }
2257
2258         while (find_next_entry_inc(iter) != NULL) {
2259                 int len = iter->seq.len;
2260
2261                 ret = print_trace_line(iter);
2262                 if (!ret) {
2263                         /* don't print partial lines */
2264                         iter->seq.len = len;
2265                         break;
2266                 }
2267
2268                 trace_consume(iter);
2269
2270                 if (iter->seq.len >= cnt)
2271                         break;
2272         }
2273
2274         for_each_cpu_mask(cpu, mask) {
2275                 data = iter->tr->data[cpu];
2276                 spin_unlock(&data->lock);
2277         }
2278
2279         for_each_cpu_mask(cpu, mask) {
2280                 data = iter->tr->data[cpu];
2281                 atomic_dec(&data->disabled);
2282         }
2283 #ifdef CONFIG_FTRACE
2284         ftrace_enabled = ftrace_save;
2285 #endif
2286         local_irq_restore(flags);
2287
2288         /* Now copy what we have to the user */
2289         read = iter->seq.len;
2290         if (read > cnt)
2291                 read = cnt;
2292
2293         ret = copy_to_user(ubuf, iter->seq.buffer, read);
2294
2295         if (read < iter->seq.len)
2296                 start = read;
2297         else
2298                 trace_seq_reset(&iter->seq);
2299
2300         if (ret)
2301                 read = -EFAULT;
2302
2303         return read;
2304 }
2305
2306 static struct file_operations tracing_max_lat_fops = {
2307         .open           = tracing_open_generic,
2308         .read           = tracing_max_lat_read,
2309         .write          = tracing_max_lat_write,
2310 };
2311
2312 static struct file_operations tracing_ctrl_fops = {
2313         .open           = tracing_open_generic,
2314         .read           = tracing_ctrl_read,
2315         .write          = tracing_ctrl_write,
2316 };
2317
2318 static struct file_operations set_tracer_fops = {
2319         .open           = tracing_open_generic,
2320         .read           = tracing_set_trace_read,
2321         .write          = tracing_set_trace_write,
2322 };
2323
2324 static struct file_operations tracing_pipe_fops = {
2325         .open           = tracing_open_pipe,
2326         .poll           = tracing_poll_pipe,
2327         .read           = tracing_read_pipe,
2328         .release        = tracing_release_pipe,
2329 };
2330
2331 #ifdef CONFIG_DYNAMIC_FTRACE
2332
2333 static ssize_t
2334 tracing_read_long(struct file *filp, char __user *ubuf,
2335                   size_t cnt, loff_t *ppos)
2336 {
2337         unsigned long *p = filp->private_data;
2338         char buf[64];
2339         int r;
2340
2341         r = sprintf(buf, "%ld\n", *p);
2342
2343         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2344 }
2345
2346 static struct file_operations tracing_read_long_fops = {
2347         .open           = tracing_open_generic,
2348         .read           = tracing_read_long,
2349 };
2350 #endif
2351
2352 static struct dentry *d_tracer;
2353
2354 struct dentry *tracing_init_dentry(void)
2355 {
2356         static int once;
2357
2358         if (d_tracer)
2359                 return d_tracer;
2360
2361         d_tracer = debugfs_create_dir("tracing", NULL);
2362
2363         if (!d_tracer && !once) {
2364                 once = 1;
2365                 pr_warning("Could not create debugfs directory 'tracing'\n");
2366                 return NULL;
2367         }
2368
2369         return d_tracer;
2370 }
2371
2372 #ifdef CONFIG_FTRACE_SELFTEST
2373 /* Let selftest have access to static functions in this file */
2374 #include "trace_selftest.c"
2375 #endif
2376
2377 static __init void tracer_init_debugfs(void)
2378 {
2379         struct dentry *d_tracer;
2380         struct dentry *entry;
2381
2382         d_tracer = tracing_init_dentry();
2383
2384         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2385                                     &global_trace, &tracing_ctrl_fops);
2386         if (!entry)
2387                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2388
2389         entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2390                                     NULL, &tracing_iter_fops);
2391         if (!entry)
2392                 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2393
2394         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2395                                     NULL, &tracing_cpumask_fops);
2396         if (!entry)
2397                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2398
2399         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2400                                     &global_trace, &tracing_lt_fops);
2401         if (!entry)
2402                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2403
2404         entry = debugfs_create_file("trace", 0444, d_tracer,
2405                                     &global_trace, &tracing_fops);
2406         if (!entry)
2407                 pr_warning("Could not create debugfs 'trace' entry\n");
2408
2409         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2410                                     &global_trace, &show_traces_fops);
2411         if (!entry)
2412                 pr_warning("Could not create debugfs 'trace' entry\n");
2413
2414         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2415                                     &global_trace, &set_tracer_fops);
2416         if (!entry)
2417                 pr_warning("Could not create debugfs 'trace' entry\n");
2418
2419         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2420                                     &tracing_max_latency,
2421                                     &tracing_max_lat_fops);
2422         if (!entry)
2423                 pr_warning("Could not create debugfs "
2424                            "'tracing_max_latency' entry\n");
2425
2426         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2427                                     &tracing_thresh, &tracing_max_lat_fops);
2428         if (!entry)
2429                 pr_warning("Could not create debugfs "
2430                            "'tracing_threash' entry\n");
2431         entry = debugfs_create_file("README", 0644, d_tracer,
2432                                     NULL, &tracing_readme_fops);
2433         if (!entry)
2434                 pr_warning("Could not create debugfs 'README' entry\n");
2435
2436         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2437                                     NULL, &tracing_pipe_fops);
2438         if (!entry)
2439                 pr_warning("Could not create debugfs "
2440                            "'tracing_threash' entry\n");
2441
2442 #ifdef CONFIG_DYNAMIC_FTRACE
2443         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2444                                     &ftrace_update_tot_cnt,
2445                                     &tracing_read_long_fops);
2446         if (!entry)
2447                 pr_warning("Could not create debugfs "
2448                            "'dyn_ftrace_total_info' entry\n");
2449 #endif
2450 }
2451
2452 /* dummy trace to disable tracing */
2453 static struct tracer no_tracer __read_mostly =
2454 {
2455         .name           = "none",
2456 };
2457
2458 static int trace_alloc_page(void)
2459 {
2460         struct trace_array_cpu *data;
2461         struct page *page, *tmp;
2462         LIST_HEAD(pages);
2463         void *array;
2464         int i;
2465
2466         /* first allocate a page for each CPU */
2467         for_each_possible_cpu(i) {
2468                 array = (void *)__get_free_page(GFP_KERNEL);
2469                 if (array == NULL) {
2470                         printk(KERN_ERR "tracer: failed to allocate page"
2471                                "for trace buffer!\n");
2472                         goto free_pages;
2473                 }
2474
2475                 page = virt_to_page(array);
2476                 list_add(&page->lru, &pages);
2477
2478 /* Only allocate if we are actually using the max trace */
2479 #ifdef CONFIG_TRACER_MAX_TRACE
2480                 array = (void *)__get_free_page(GFP_KERNEL);
2481                 if (array == NULL) {
2482                         printk(KERN_ERR "tracer: failed to allocate page"
2483                                "for trace buffer!\n");
2484                         goto free_pages;
2485                 }
2486                 page = virt_to_page(array);
2487                 list_add(&page->lru, &pages);
2488 #endif
2489         }
2490
2491         /* Now that we successfully allocate a page per CPU, add them */
2492         for_each_possible_cpu(i) {
2493                 data = global_trace.data[i];
2494                 spin_lock_init(&data->lock);
2495                 lockdep_set_class(&data->lock, &data->lock_key);
2496                 page = list_entry(pages.next, struct page, lru);
2497                 list_del_init(&page->lru);
2498                 list_add_tail(&page->lru, &data->trace_pages);
2499                 ClearPageLRU(page);
2500
2501 #ifdef CONFIG_TRACER_MAX_TRACE
2502                 data = max_tr.data[i];
2503                 spin_lock_init(&data->lock);
2504                 lockdep_set_class(&data->lock, &data->lock_key);
2505                 page = list_entry(pages.next, struct page, lru);
2506                 list_del_init(&page->lru);
2507                 list_add_tail(&page->lru, &data->trace_pages);
2508                 SetPageLRU(page);
2509 #endif
2510         }
2511         global_trace.entries += ENTRIES_PER_PAGE;
2512
2513         return 0;
2514
2515  free_pages:
2516         list_for_each_entry_safe(page, tmp, &pages, lru) {
2517                 list_del_init(&page->lru);
2518                 __free_page(page);
2519         }
2520         return -ENOMEM;
2521 }
2522
2523 __init static int tracer_alloc_buffers(void)
2524 {
2525         struct trace_array_cpu *data;
2526         void *array;
2527         struct page *page;
2528         int pages = 0;
2529         int ret = -ENOMEM;
2530         int i;
2531
2532         global_trace.ctrl = tracer_enabled;
2533
2534         /* Allocate the first page for all buffers */
2535         for_each_possible_cpu(i) {
2536                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
2537                 max_tr.data[i] = &per_cpu(max_data, i);
2538
2539                 array = (void *)__get_free_page(GFP_KERNEL);
2540                 if (array == NULL) {
2541                         printk(KERN_ERR "tracer: failed to allocate page"
2542                                "for trace buffer!\n");
2543                         goto free_buffers;
2544                 }
2545
2546                 /* set the array to the list */
2547                 INIT_LIST_HEAD(&data->trace_pages);
2548                 page = virt_to_page(array);
2549                 list_add(&page->lru, &data->trace_pages);
2550                 /* use the LRU flag to differentiate the two buffers */
2551                 ClearPageLRU(page);
2552
2553 /* Only allocate if we are actually using the max trace */
2554 #ifdef CONFIG_TRACER_MAX_TRACE
2555                 array = (void *)__get_free_page(GFP_KERNEL);
2556                 if (array == NULL) {
2557                         printk(KERN_ERR "tracer: failed to allocate page"
2558                                "for trace buffer!\n");
2559                         goto free_buffers;
2560                 }
2561
2562                 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
2563                 page = virt_to_page(array);
2564                 list_add(&page->lru, &max_tr.data[i]->trace_pages);
2565                 SetPageLRU(page);
2566 #endif
2567         }
2568
2569         /*
2570          * Since we allocate by orders of pages, we may be able to
2571          * round up a bit.
2572          */
2573         global_trace.entries = ENTRIES_PER_PAGE;
2574         pages++;
2575
2576         while (global_trace.entries < trace_nr_entries) {
2577                 if (trace_alloc_page())
2578                         break;
2579                 pages++;
2580         }
2581         max_tr.entries = global_trace.entries;
2582
2583         pr_info("tracer: %d pages allocated for %ld",
2584                 pages, trace_nr_entries);
2585         pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE);
2586         pr_info("   actual entries %ld\n", global_trace.entries);
2587
2588         tracer_init_debugfs();
2589
2590         trace_init_cmdlines();
2591
2592         register_tracer(&no_tracer);
2593         current_trace = &no_tracer;
2594
2595         /* All seems OK, enable tracing */
2596         tracing_disabled = 0;
2597
2598         return 0;
2599
2600  free_buffers:
2601         for (i-- ; i >= 0; i--) {
2602                 struct page *page, *tmp;
2603                 struct trace_array_cpu *data = global_trace.data[i];
2604
2605                 if (data) {
2606                         list_for_each_entry_safe(page, tmp,
2607                                                  &data->trace_pages, lru) {
2608                                 list_del_init(&page->lru);
2609                                 __free_page(page);
2610                         }
2611                 }
2612
2613 #ifdef CONFIG_TRACER_MAX_TRACE
2614                 data = max_tr.data[i];
2615                 if (data) {
2616                         list_for_each_entry_safe(page, tmp,
2617                                                  &data->trace_pages, lru) {
2618                                 list_del_init(&page->lru);
2619                                 __free_page(page);
2620                         }
2621                 }
2622 #endif
2623         }
2624         return ret;
2625 }
2626 fs_initcall(tracer_alloc_buffers);