X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=kernel%2Fhrtimer.c;h=421be5fe5cc78f5d2be0eabd3ba2367fe1b15b3e;hb=9bf8a943ad79a3bb15597fe0275f8b1cf26e2010;hp=bd5d6b5060bcd5704c278ea38954862fb463889f;hpb=50d9a126240f9961cfdd063336bbeb91f77a7dce;p=linux-2.6 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index bd5d6b5060..421be5fe5c 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -152,15 +153,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) ktime_add(xtim, tomono); } -/* - * Helper function to check, whether the timer is running the callback - * function - */ -static inline int hrtimer_callback_running(struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_CALLBACK; -} - /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -306,7 +298,7 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns); /* * Divide a ktime value by a nanosecond value */ -unsigned long ktime_divns(const ktime_t kt, s64 div) +u64 ktime_divns(const ktime_t kt, s64 div) { u64 dclc, inc, dns; int sft = 0; @@ -321,10 +313,136 @@ unsigned long ktime_divns(const ktime_t kt, s64 div) dclc >>= sft; do_div(dclc, (unsigned long) div); - return (unsigned long) dclc; + return dclc; } #endif /* BITS_PER_LONG >= 64 */ +/* + * Add two ktime values and do a safety check for overflow: + */ +ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) +{ + ktime_t res = ktime_add(lhs, rhs); + + /* + * We use KTIME_SEC_MAX here, the maximum timeout which we can + * return to user space in a timespec: + */ + if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64) + res = ktime_set(KTIME_SEC_MAX, 0); + + return res; +} + +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS + +static struct debug_obj_descr hrtimer_debug_descr; + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int hrtimer_fixup_init(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + hrtimer_cancel(timer); + debug_object_init(timer, &hrtimer_debug_descr); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int hrtimer_fixup_activate(void *addr, enum debug_obj_state state) +{ + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + WARN_ON_ONCE(1); + return 0; + + case ODEBUG_STATE_ACTIVE: + WARN_ON(1); + + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int hrtimer_fixup_free(void *addr, enum debug_obj_state state) +{ + struct hrtimer *timer = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + hrtimer_cancel(timer); + debug_object_free(timer, &hrtimer_debug_descr); + return 1; + default: + return 0; + } +} + +static struct debug_obj_descr hrtimer_debug_descr = { + .name = "hrtimer", + .fixup_init = hrtimer_fixup_init, + .fixup_activate = hrtimer_fixup_activate, + .fixup_free = hrtimer_fixup_free, +}; + +static inline void debug_hrtimer_init(struct hrtimer *timer) +{ + debug_object_init(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_activate(struct hrtimer *timer) +{ + debug_object_activate(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_deactivate(struct hrtimer *timer) +{ + debug_object_deactivate(timer, &hrtimer_debug_descr); +} + +static inline void debug_hrtimer_free(struct hrtimer *timer) +{ + debug_object_free(timer, &hrtimer_debug_descr); +} + +static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode); + +void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_object_init_on_stack(timer, &hrtimer_debug_descr); + __hrtimer_init(timer, clock_id, mode); +} + +void destroy_hrtimer_on_stack(struct hrtimer *timer) +{ + debug_object_free(timer, &hrtimer_debug_descr); +} + +#else +static inline void debug_hrtimer_init(struct hrtimer *timer) { } +static inline void debug_hrtimer_activate(struct hrtimer *timer) { } +static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } +#endif + /* * Check, whether the timer is on the callback pending list */ @@ -425,6 +543,8 @@ static int hrtimer_reprogram(struct hrtimer *timer, ktime_t expires = ktime_sub(timer->expires, base->offset); int res; + WARN_ON_ONCE(timer->expires.tv64 < 0); + /* * When the callback is running, we do not reprogram the clock event * device. The timer callback is either running on a different CPU or @@ -435,6 +555,15 @@ static int hrtimer_reprogram(struct hrtimer *timer, if (hrtimer_callback_running(timer)) return 0; + /* + * CLOCK_REALTIME timer might be requested with an absolute + * expiry time which is less than base->offset. Nothing wrong + * about that, just avoid to call into the tick code, which + * has now objections against negative expiry values. + */ + if (expires.tv64 < 0) + return -ETIME; + if (expires.tv64 >= expires_next->tv64) return 0; @@ -539,6 +668,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, /* Timer is expired, act upon the callback mode */ switch(timer->cb_mode) { case HRTIMER_CB_IRQSAFE_NO_RESTART: + debug_hrtimer_deactivate(timer); /* * We can call the callback from here. No restart * happens, so no danger of recursion @@ -553,6 +683,7 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, * the tick timer in the softirq ! The calling site * takes care of this. */ + debug_hrtimer_deactivate(timer); return 1; case HRTIMER_CB_IRQSAFE: case HRTIMER_CB_SOFTIRQ: @@ -562,7 +693,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, list_add_tail(&timer->cb_entry, &base->cpu_base->cb_pending); timer->state = HRTIMER_STATE_PENDING; - raise_softirq(HRTIMER_SOFTIRQ); return 1; default: BUG(); @@ -605,6 +735,11 @@ static int hrtimer_switch_to_hres(void) return 1; } +static inline void hrtimer_raise_softirq(void) +{ + raise_softirq(HRTIMER_SOFTIRQ); +} + #else static inline int hrtimer_hres_active(void) { return 0; } @@ -623,6 +758,7 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, { return 0; } +static inline void hrtimer_raise_softirq(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -656,10 +792,9 @@ void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) * Forward the timer expiry so it will expire in the future. * Returns the number of overruns. */ -unsigned long -hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) +u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) { - unsigned long orun = 1; + u64 orun = 1; ktime_t delta; delta = ktime_sub(now, timer->expires); @@ -683,13 +818,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) */ orun++; } - timer->expires = ktime_add(timer->expires, interval); - /* - * Make sure, that the result did not wrap with a very large - * interval. - */ - if (timer->expires.tv64 < 0) - timer->expires = ktime_set(KTIME_SEC_MAX, 0); + timer->expires = ktime_add_safe(timer->expires, interval); return orun; } @@ -709,6 +838,8 @@ static void enqueue_hrtimer(struct hrtimer *timer, struct hrtimer *entry; int leftmost = 1; + debug_hrtimer_activate(timer); + /* * Find the right place in the rbtree: */ @@ -805,6 +936,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * reprogramming happens in the interrupt handler. This is a * rare case and less expensive than a smp call. */ + debug_hrtimer_deactivate(timer); timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, @@ -829,7 +961,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret; + int ret, raise; base = lock_hrtimer_base(timer, &flags); @@ -840,7 +972,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) new_base = switch_hrtimer_base(timer, base); if (mode == HRTIMER_MODE_REL) { - tim = ktime_add(tim, new_base->get_time()); + tim = ktime_add_safe(tim, new_base->get_time()); /* * CONFIG_TIME_LOW_RES is a temporary way for architectures * to signal that they simply return xtime in @@ -849,17 +981,10 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) * timeouts. This will go away with the GTOD framework. */ #ifdef CONFIG_TIME_LOW_RES - tim = ktime_add(tim, base->resolution); + tim = ktime_add_safe(tim, base->resolution); #endif - /* - * Careful here: User space might have asked for a - * very long sleep, so the add above might result in a - * negative number, which enqueues the timer in front - * of the queue. - */ - if (tim.tv64 < 0) - tim.tv64 = KTIME_MAX; } + timer->expires = tim; timer_stats_hrtimer_set_start_info(timer); @@ -871,8 +996,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) enqueue_hrtimer(timer, new_base, new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); + /* + * The timer may be expired and moved to the cb_pending + * list. We can not raise the softirq with base lock held due + * to a possible deadlock with runqueue lock. + */ + raise = timer->state == HRTIMER_STATE_PENDING; + unlock_hrtimer_base(timer, &flags); + if (raise) + hrtimer_raise_softirq(); + return ret; } EXPORT_SYMBOL_GPL(hrtimer_start); @@ -983,14 +1118,8 @@ ktime_t hrtimer_get_next_event(void) } #endif -/** - * hrtimer_init - initialize a timer to the given clock - * @timer: the timer to be initialized - * @clock_id: the clock to be used - * @mode: timer mode abs/rel - */ -void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, - enum hrtimer_mode mode) +static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) { struct hrtimer_cpu_base *cpu_base; @@ -1011,6 +1140,19 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, memset(timer->start_comm, 0, TASK_COMM_LEN); #endif } + +/** + * hrtimer_init - initialize a timer to the given clock + * @timer: the timer to be initialized + * @clock_id: the clock to be used + * @mode: timer mode abs/rel + */ +void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, + enum hrtimer_mode mode) +{ + debug_hrtimer_init(timer); + __hrtimer_init(timer, clock_id, mode); +} EXPORT_SYMBOL_GPL(hrtimer_init); /** @@ -1044,6 +1186,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) timer = list_entry(cpu_base->cb_pending.next, struct hrtimer, cb_entry); + debug_hrtimer_deactivate(timer); timer_stats_account_hrtimer(timer); fn = timer->function; @@ -1067,8 +1210,19 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) * If the timer was rearmed on another CPU, reprogram * the event device. */ - if (timer->base->first == &timer->node) - hrtimer_reprogram(timer, timer->base); + struct hrtimer_clock_base *base = timer->base; + + if (base->first == &timer->node && + hrtimer_reprogram(timer, base)) { + /* + * Timer is expired. Thus move it from tree to + * pending list again. + */ + __remove_hrtimer(timer, base, + HRTIMER_STATE_PENDING, 0); + list_add_tail(&timer->cb_entry, + &base->cpu_base->cb_pending); + } } } spin_unlock_irq(&cpu_base->lock); @@ -1081,6 +1235,7 @@ static void __run_hrtimer(struct hrtimer *timer) enum hrtimer_restart (*fn)(struct hrtimer *); int restart; + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); @@ -1225,51 +1380,50 @@ void hrtimer_run_pending(void) /* * Called from hardirq context every jiffy */ -static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, - int index) +void hrtimer_run_queues(void) { struct rb_node *node; - struct hrtimer_clock_base *base = &cpu_base->clock_base[index]; + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + struct hrtimer_clock_base *base; + int index, gettime = 1; - if (!base->first) + if (hrtimer_hres_active()) return; - if (base->get_softirq_time) - base->softirq_time = base->get_softirq_time(); - - spin_lock(&cpu_base->lock); - - while ((node = base->first)) { - struct hrtimer *timer; - - timer = rb_entry(node, struct hrtimer, node); - if (base->softirq_time.tv64 <= timer->expires.tv64) - break; + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { + base = &cpu_base->clock_base[index]; - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); + if (!base->first) continue; + + if (base->get_softirq_time) + base->softirq_time = base->get_softirq_time(); + else if (gettime) { + hrtimer_get_softirq_time(cpu_base); + gettime = 0; } - __run_hrtimer(timer); - } - spin_unlock(&cpu_base->lock); -} + spin_lock(&cpu_base->lock); -void hrtimer_run_queues(void) -{ - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - int i; + while ((node = base->first)) { + struct hrtimer *timer; - if (hrtimer_hres_active()) - return; + timer = rb_entry(node, struct hrtimer, node); + if (base->softirq_time.tv64 <= timer->expires.tv64) + break; - hrtimer_get_softirq_time(cpu_base); + if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { + __remove_hrtimer(timer, base, + HRTIMER_STATE_PENDING, 0); + list_add_tail(&timer->cb_entry, + &base->cpu_base->cb_pending); + continue; + } - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) - run_hrtimer_queue(cpu_base, i); + __run_hrtimer(timer); + } + spin_unlock(&cpu_base->lock); + } } /* @@ -1315,75 +1469,94 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod } while (t->task && !signal_pending(current)); + __set_current_state(TASK_RUNNING); + return t->task == NULL; } +static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) +{ + struct timespec rmt; + ktime_t rem; + + rem = ktime_sub(timer->expires, timer->base->get_time()); + if (rem.tv64 <= 0) + return 0; + rmt = ktime_to_timespec(rem); + + if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) + return -EFAULT; + + return 1; +} + long __sched hrtimer_nanosleep_restart(struct restart_block *restart) { struct hrtimer_sleeper t; - struct timespec *rmtp; - ktime_t time; + struct timespec __user *rmtp; + int ret = 0; - restart->fn = do_no_restart_syscall; - - hrtimer_init(&t.timer, restart->arg0, HRTIMER_MODE_ABS); - t.timer.expires.tv64 = ((u64)restart->arg3 << 32) | (u64) restart->arg2; + hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, + HRTIMER_MODE_ABS); + t.timer.expires.tv64 = restart->nanosleep.expires; if (do_nanosleep(&t, HRTIMER_MODE_ABS)) - return 0; + goto out; - rmtp = (struct timespec *)restart->arg1; + rmtp = restart->nanosleep.rmtp; if (rmtp) { - time = ktime_sub(t.timer.expires, t.timer.base->get_time()); - if (time.tv64 <= 0) - return 0; - *rmtp = ktime_to_timespec(time); + ret = update_rmtp(&t.timer, rmtp); + if (ret <= 0) + goto out; } - restart->fn = hrtimer_nanosleep_restart; - /* The other values in restart are already filled in */ - return -ERESTART_RESTARTBLOCK; + ret = -ERESTART_RESTARTBLOCK; +out: + destroy_hrtimer_on_stack(&t.timer); + return ret; } -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec *rmtp, +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, const enum hrtimer_mode mode, const clockid_t clockid) { struct restart_block *restart; struct hrtimer_sleeper t; - ktime_t rem; + int ret = 0; - hrtimer_init(&t.timer, clockid, mode); + hrtimer_init_on_stack(&t.timer, clockid, mode); t.timer.expires = timespec_to_ktime(*rqtp); if (do_nanosleep(&t, mode)) - return 0; + goto out; /* Absolute timers do not update the rmtp value and restart: */ - if (mode == HRTIMER_MODE_ABS) - return -ERESTARTNOHAND; + if (mode == HRTIMER_MODE_ABS) { + ret = -ERESTARTNOHAND; + goto out; + } if (rmtp) { - rem = ktime_sub(t.timer.expires, t.timer.base->get_time()); - if (rem.tv64 <= 0) - return 0; - *rmtp = ktime_to_timespec(rem); + ret = update_rmtp(&t.timer, rmtp); + if (ret <= 0) + goto out; } restart = ¤t_thread_info()->restart_block; restart->fn = hrtimer_nanosleep_restart; - restart->arg0 = (unsigned long) t.timer.base->index; - restart->arg1 = (unsigned long) rmtp; - restart->arg2 = t.timer.expires.tv64 & 0xFFFFFFFF; - restart->arg3 = t.timer.expires.tv64 >> 32; + restart->nanosleep.index = t.timer.base->index; + restart->nanosleep.rmtp = rmtp; + restart->nanosleep.expires = t.timer.expires.tv64; - return -ERESTART_RESTARTBLOCK; + ret = -ERESTART_RESTARTBLOCK; +out: + destroy_hrtimer_on_stack(&t.timer); + return ret; } asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) { - struct timespec tu, rmt; - int ret; + struct timespec tu; if (copy_from_user(&tu, rqtp, sizeof(tu))) return -EFAULT; @@ -1391,15 +1564,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) if (!timespec_valid(&tu)) return -EINVAL; - ret = hrtimer_nanosleep(&tu, rmtp ? &rmt : NULL, HRTIMER_MODE_REL, - CLOCK_MONOTONIC); - - if (ret && rmtp) { - if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) - return -EFAULT; - } - - return ret; + return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); } /* @@ -1411,7 +1576,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu) int i; spin_lock_init(&cpu_base->lock); - lockdep_set_class(&cpu_base->lock, &cpu_base->lock_key); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; @@ -1431,6 +1595,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0); timer->base = new_base; /* @@ -1452,16 +1617,16 @@ static void migrate_hrtimers(int cpu) tick_cancel_sched_timer(cpu); local_irq_disable(); - double_spin_lock(&new_base->lock, &old_base->lock, - smp_processor_id() < cpu); + spin_lock(&new_base->lock); + spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); } - double_spin_unlock(&new_base->lock, &old_base->lock, - smp_processor_id() < cpu); + spin_unlock(&old_base->lock); + spin_unlock(&new_base->lock); local_irq_enable(); put_cpu_var(hrtimer_bases); }