]> err.no Git - linux-2.6/blobdiff - arch/powerpc/kernel/time.c
[POWERPC] Prevent decrementer clockevents from firing early
[linux-2.6] / arch / powerpc / kernel / time.c
index e71a0d8c597ab38fd27cfc21362ea7ff13592ed6..64b503c82a31a4d6848a873b0b08aa28d90795ab 100644 (file)
@@ -73,6 +73,7 @@
 
 /* powerpc clocksource/clockevent code */
 
+#include <linux/clockchips.h>
 #include <linux/clocksource.h>
 
 static cycle_t rtc_read(void);
@@ -97,6 +98,28 @@ static struct clocksource clocksource_timebase = {
        .read         = timebase_read,
 };
 
+#define DECREMENTER_MAX        0x7fffffff
+
+static int decrementer_set_next_event(unsigned long evt,
+                                     struct clock_event_device *dev);
+static void decrementer_set_mode(enum clock_event_mode mode,
+                                struct clock_event_device *dev);
+
+static struct clock_event_device decrementer_clockevent = {
+       .name           = "decrementer",
+       .rating         = 200,
+       .shift          = 32,
+       .mult           = 0,    /* To be filled in */
+       .irq            = 0,
+       .set_next_event = decrementer_set_next_event,
+       .set_mode       = decrementer_set_mode,
+       .features       = CLOCK_EVT_FEAT_ONESHOT,
+};
+
+static DEFINE_PER_CPU(struct clock_event_device, decrementers);
+void init_decrementer_clockevent(void);
+static DEFINE_PER_CPU(u64, decrementer_next_tb);
+
 #ifdef CONFIG_PPC_ISERIES
 static unsigned long __initdata iSeries_recal_titan;
 static signed long __initdata iSeries_recal_tb;
@@ -517,20 +540,30 @@ void __init iSeries_time_init_early(void)
 void timer_interrupt(struct pt_regs * regs)
 {
        struct pt_regs *old_regs;
-       int next_dec;
        int cpu = smp_processor_id();
-       unsigned long ticks;
-       u64 tb_next_jiffy;
+       struct clock_event_device *evt = &per_cpu(decrementers, cpu);
+       u64 now;
+
+       /* Ensure a positive value is written to the decrementer, or else
+        * some CPUs will continuue to take decrementer exceptions */
+       set_dec(DECREMENTER_MAX);
 
 #ifdef CONFIG_PPC32
        if (atomic_read(&ppc_n_lost_interrupts) != 0)
                do_IRQ(regs);
 #endif
 
+       now = get_tb_or_rtc();
+       if (now < per_cpu(decrementer_next_tb, cpu)) {
+               /* not time for this event yet */
+               now = per_cpu(decrementer_next_tb, cpu) - now;
+               if (now <= DECREMENTER_MAX)
+                       set_dec((unsigned int)now - 1);
+               return;
+       }
        old_regs = set_irq_regs(regs);
        irq_enter();
 
-       profile_tick(CPU_PROFILING);
        calculate_steal_time();
 
 #ifdef CONFIG_PPC_ISERIES
@@ -538,44 +571,20 @@ void timer_interrupt(struct pt_regs * regs)
                get_lppaca()->int_dword.fields.decr_int = 0;
 #endif
 
-       while ((ticks = tb_ticks_since(per_cpu(last_jiffy, cpu)))
-              >= tb_ticks_per_jiffy) {
-               /* Update last_jiffy */
-               per_cpu(last_jiffy, cpu) += tb_ticks_per_jiffy;
-               /* Handle RTCL overflow on 601 */
-               if (__USE_RTC() && per_cpu(last_jiffy, cpu) >= 1000000000)
-                       per_cpu(last_jiffy, cpu) -= 1000000000;
-
-               /*
-                * We cannot disable the decrementer, so in the period
-                * between this cpu's being marked offline in cpu_online_map
-                * and calling stop-self, it is taking timer interrupts.
-                * Avoid calling into the scheduler rebalancing code if this
-                * is the case.
-                */
-               if (!cpu_is_offline(cpu))
-                       account_process_time(regs);
-
-               /*
-                * No need to check whether cpu is offline here; boot_cpuid
-                * should have been fixed up by now.
-                */
-               if (cpu != boot_cpuid)
-                       continue;
+       /*
+        * We cannot disable the decrementer, so in the period
+        * between this cpu's being marked offline in cpu_online_map
+        * and calling stop-self, it is taking timer interrupts.
+        * Avoid calling into the scheduler rebalancing code if this
+        * is the case.
+        */
+       if (!cpu_is_offline(cpu))
+               account_process_time(regs);
 
-               write_seqlock(&xtime_lock);
-               tb_next_jiffy = tb_last_jiffy + tb_ticks_per_jiffy;
-               if (__USE_RTC() && tb_next_jiffy >= 1000000000)
-                       tb_next_jiffy -= 1000000000;
-               if (per_cpu(last_jiffy, cpu) >= tb_next_jiffy) {
-                       tb_last_jiffy = tb_next_jiffy;
-                       do_timer(1);
-               }
-               write_sequnlock(&xtime_lock);
-       }
-       
-       next_dec = tb_ticks_per_jiffy - ticks;
-       set_dec(next_dec);
+       if (evt->event_handler)
+               evt->event_handler(evt);
+       else
+               evt->set_next_event(DECREMENTER_MAX, evt);
 
 #ifdef CONFIG_PPC_ISERIES
        if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending())
@@ -795,6 +804,57 @@ void __init clocksource_init(void)
               clock->name, clock->mult, clock->shift);
 }
 
+static int decrementer_set_next_event(unsigned long evt,
+                                     struct clock_event_device *dev)
+{
+       __get_cpu_var(decrementer_next_tb) = get_tb_or_rtc() + evt;
+       /* The decrementer interrupts on the 0 -> -1 transition */
+       if (evt)
+               --evt;
+       set_dec(evt);
+       return 0;
+}
+
+static void decrementer_set_mode(enum clock_event_mode mode,
+                                struct clock_event_device *dev)
+{
+       if (mode != CLOCK_EVT_MODE_ONESHOT)
+               decrementer_set_next_event(DECREMENTER_MAX, dev);
+}
+
+static void register_decrementer_clockevent(int cpu)
+{
+       struct clock_event_device *dec = &per_cpu(decrementers, cpu);
+
+       *dec = decrementer_clockevent;
+       dec->cpumask = cpumask_of_cpu(cpu);
+
+       printk(KERN_ERR "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n",
+              dec->name, dec->mult, dec->shift, cpu);
+
+       clockevents_register_device(dec);
+}
+
+void init_decrementer_clockevent(void)
+{
+       int cpu = smp_processor_id();
+
+       decrementer_clockevent.mult = div_sc(ppc_tb_freq, NSEC_PER_SEC,
+                                            decrementer_clockevent.shift);
+       decrementer_clockevent.max_delta_ns =
+               clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
+       decrementer_clockevent.min_delta_ns = 1000;
+
+       register_decrementer_clockevent(cpu);
+}
+
+void secondary_cpu_time_init(void)
+{
+       /* FIME: Should make unrelatred change to move snapshot_timebase
+        * call here ! */
+       register_decrementer_clockevent(smp_processor_id());
+}
+
 /* This function is only called on the boot processor */
 void __init time_init(void)
 {
@@ -908,8 +968,7 @@ void __init time_init(void)
        if (!firmware_has_feature(FW_FEATURE_ISERIES))
                clocksource_init();
 
-       /* Not exact, but the timer interrupt takes care of this */
-       set_dec(tb_ticks_per_jiffy);
+       init_decrementer_clockevent();
 }