From: Peter Zijlstra Date: Wed, 13 Feb 2008 14:45:39 +0000 (+0100) Subject: sched: fair-group: separate tg->shares from task_group_lock X-Git-Tag: v2.6.25-rc2~73^2~6 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8ed3699682be75fd68281239c202ad3830f9c72d;p=linux-2.6 sched: fair-group: separate tg->shares from task_group_lock On Mon, 2008-02-11 at 15:09 +0300, Denis V. Lunev wrote: > BUG: sleeping function called from invalid context > at /home/den/src/linux-netns26/kernel/mutex.c:209 > in_atomic():1, irqs_disabled():0 > no locks held by swapper/0. > Pid: 0, comm: swapper Not tainted 2.6.24 #304 > > Call Trace: > [] ? __debug_show_held_locks+0x15/0x27 > [] __might_sleep+0xc0/0xdf > [] mutex_lock_nested+0x28/0x2a9 > [] sched_destroy_group+0x18/0xea > [] sched_destroy_user+0xd/0xf > [] free_uid+0x8a/0xab > [] __put_task_struct+0x3f/0xd3 > [] delayed_put_task_struct+0x23/0x25 > [] __rcu_process_callbacks+0x8d/0x215 > [] rcu_process_callbacks+0x23/0x44 > [] __do_softirq+0x79/0xf8 > [] ? profile_pc+0x2a/0x67 > [] call_softirq+0x1c/0x30 > [] do_softirq+0x61/0x9c > [] irq_exit+0x51/0x53 > [] smp_apic_timer_interrupt+0x77/0xad > [] apic_timer_interrupt+0x6b/0x70 > [] ? default_idle+0x43/0x76 > [] ? default_idle+0x41/0x76 > [] ? default_idle+0x0/0x76 > [] ? cpu_idle+0x76/0x98 separate the tg->shares protection from the task_group lock. Reported-by: Denis V. Lunev Tested-by: Denis V. Lunev Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- diff --git a/kernel/sched.c b/kernel/sched.c index 3eedd52609..6b02276baa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -232,10 +232,10 @@ static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; static struct rt_rq *init_rt_rq_p[NR_CPUS]; -/* task_group_mutex serializes add/remove of task groups and also changes to +/* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. */ -static DEFINE_MUTEX(task_group_mutex); +static DEFINE_SPINLOCK(task_group_lock); /* doms_cur_mutex serializes access to doms_cur[] array */ static DEFINE_MUTEX(doms_cur_mutex); @@ -295,16 +295,6 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) p->rt.parent = task_group(p)->rt_se[cpu]; } -static inline void lock_task_group_list(void) -{ - mutex_lock(&task_group_mutex); -} - -static inline void unlock_task_group_list(void) -{ - mutex_unlock(&task_group_mutex); -} - static inline void lock_doms_cur(void) { mutex_lock(&doms_cur_mutex); @@ -318,8 +308,6 @@ static inline void unlock_doms_cur(void) #else static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } -static inline void lock_task_group_list(void) { } -static inline void unlock_task_group_list(void) { } static inline void lock_doms_cur(void) { } static inline void unlock_doms_cur(void) { } @@ -7571,6 +7559,7 @@ struct task_group *sched_create_group(void) struct rt_rq *rt_rq; struct sched_rt_entity *rt_se; struct rq *rq; + unsigned long flags; int i; tg = kzalloc(sizeof(*tg), GFP_KERNEL); @@ -7620,7 +7609,7 @@ struct task_group *sched_create_group(void) init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); } - lock_task_group_list(); + spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) { rq = cpu_rq(i); cfs_rq = tg->cfs_rq[i]; @@ -7629,7 +7618,7 @@ struct task_group *sched_create_group(void) list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); } list_add_rcu(&tg->list, &task_groups); - unlock_task_group_list(); + spin_unlock_irqrestore(&task_group_lock, flags); return tg; @@ -7650,9 +7639,10 @@ void sched_destroy_group(struct task_group *tg) { struct cfs_rq *cfs_rq = NULL; struct rt_rq *rt_rq = NULL; + unsigned long flags; int i; - lock_task_group_list(); + spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) { cfs_rq = tg->cfs_rq[i]; list_del_rcu(&cfs_rq->leaf_cfs_rq_list); @@ -7660,7 +7650,7 @@ void sched_destroy_group(struct task_group *tg) list_del_rcu(&rt_rq->leaf_rt_rq_list); } list_del_rcu(&tg->list); - unlock_task_group_list(); + spin_unlock_irqrestore(&task_group_lock, flags); BUG_ON(!cfs_rq); @@ -7728,13 +7718,16 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) } } +static DEFINE_MUTEX(shares_mutex); + int sched_group_set_shares(struct task_group *tg, unsigned long shares) { int i; struct cfs_rq *cfs_rq; struct rq *rq; + unsigned long flags; - lock_task_group_list(); + mutex_lock(&shares_mutex); if (tg->shares == shares) goto done; @@ -7746,10 +7739,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) * load_balance_fair) from referring to this group first, * by taking it off the rq->leaf_cfs_rq_list on each cpu. */ + spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) { cfs_rq = tg->cfs_rq[i]; list_del_rcu(&cfs_rq->leaf_cfs_rq_list); } + spin_unlock_irqrestore(&task_group_lock, flags); /* wait for any ongoing reference to this group to finish */ synchronize_sched(); @@ -7769,13 +7764,15 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) * Enable load balance activity on this group, by inserting it back on * each cpu's rq->leaf_cfs_rq_list. */ + spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) { rq = cpu_rq(i); cfs_rq = tg->cfs_rq[i]; list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); } + spin_unlock_irqrestore(&task_group_lock, flags); done: - unlock_task_group_list(); + mutex_unlock(&shares_mutex); return 0; }