]> err.no Git - linux-2.6/blob - arch/powerpc/platforms/cell/spufs/sched.c
[POWERPC] spufs: scheduler support for NUMA.
[linux-2.6] / arch / powerpc / platforms / cell / spufs / sched.c
1 /* sched.c - SPU scheduler.
2  *
3  * Copyright (C) IBM 2005
4  * Author: Mark Nutter <mnutter@us.ibm.com>
5  *
6  * 2006-03-31   NUMA domains added.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2, or (at your option)
11  * any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22
23 #undef DEBUG
24
25 #include <linux/module.h>
26 #include <linux/errno.h>
27 #include <linux/sched.h>
28 #include <linux/kernel.h>
29 #include <linux/mm.h>
30 #include <linux/completion.h>
31 #include <linux/vmalloc.h>
32 #include <linux/smp.h>
33 #include <linux/smp_lock.h>
34 #include <linux/stddef.h>
35 #include <linux/unistd.h>
36 #include <linux/numa.h>
37 #include <linux/mutex.h>
38
39 #include <asm/io.h>
40 #include <asm/mmu_context.h>
41 #include <asm/spu.h>
42 #include <asm/spu_csa.h>
43 #include <asm/spu_priv1.h>
44 #include "spufs.h"
45
46 #define SPU_MIN_TIMESLICE       (100 * HZ / 1000)
47
48 #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
49 struct spu_prio_array {
50         unsigned long bitmap[SPU_BITMAP_SIZE];
51         wait_queue_head_t waitq[MAX_PRIO];
52         struct list_head active_list[MAX_NUMNODES];
53         struct mutex active_mutex[MAX_NUMNODES];
54 };
55
56 static struct spu_prio_array *spu_prio;
57
58 static inline int node_allowed(int node)
59 {
60         cpumask_t mask;
61
62         if (!nr_cpus_node(node))
63                 return 0;
64         mask = node_to_cpumask(node);
65         if (!cpus_intersects(mask, current->cpus_allowed))
66                 return 0;
67         return 1;
68 }
69
70 static inline void mm_needs_global_tlbie(struct mm_struct *mm)
71 {
72         int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
73
74         /* Global TLBIE broadcast required with SPEs. */
75         __cpus_setall(&mm->cpu_vm_mask, nr);
76 }
77
78 static inline void bind_context(struct spu *spu, struct spu_context *ctx)
79 {
80         pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
81                  spu->number, spu->node);
82         spu->ctx = ctx;
83         spu->flags = 0;
84         ctx->flags = 0;
85         ctx->spu = spu;
86         ctx->ops = &spu_hw_ops;
87         spu->pid = current->pid;
88         spu->prio = current->prio;
89         spu->mm = ctx->owner;
90         mm_needs_global_tlbie(spu->mm);
91         spu->ibox_callback = spufs_ibox_callback;
92         spu->wbox_callback = spufs_wbox_callback;
93         spu->stop_callback = spufs_stop_callback;
94         spu->mfc_callback = spufs_mfc_callback;
95         mb();
96         spu_unmap_mappings(ctx);
97         spu_restore(&ctx->csa, spu);
98         spu->timestamp = jiffies;
99         spu_cpu_affinity_set(spu, raw_smp_processor_id());
100 }
101
102 static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
103 {
104         pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
105                  spu->pid, spu->number, spu->node);
106         spu_unmap_mappings(ctx);
107         spu_save(&ctx->csa, spu);
108         spu->timestamp = jiffies;
109         ctx->state = SPU_STATE_SAVED;
110         spu->ibox_callback = NULL;
111         spu->wbox_callback = NULL;
112         spu->stop_callback = NULL;
113         spu->mfc_callback = NULL;
114         spu->mm = NULL;
115         spu->pid = 0;
116         spu->prio = MAX_PRIO;
117         ctx->ops = &spu_backing_ops;
118         ctx->spu = NULL;
119         ctx->flags = 0;
120         spu->flags = 0;
121         spu->ctx = NULL;
122 }
123
124 static inline void spu_add_wq(wait_queue_head_t * wq, wait_queue_t * wait,
125                               int prio)
126 {
127         prepare_to_wait_exclusive(wq, wait, TASK_INTERRUPTIBLE);
128         set_bit(prio, spu_prio->bitmap);
129 }
130
131 static inline void spu_del_wq(wait_queue_head_t * wq, wait_queue_t * wait,
132                               int prio)
133 {
134         u64 flags;
135
136         __set_current_state(TASK_RUNNING);
137
138         spin_lock_irqsave(&wq->lock, flags);
139
140         remove_wait_queue_locked(wq, wait);
141         if (list_empty(&wq->task_list))
142                 clear_bit(prio, spu_prio->bitmap);
143
144         spin_unlock_irqrestore(&wq->lock, flags);
145 }
146
147 static void spu_prio_wait(struct spu_context *ctx, u64 flags)
148 {
149         int prio = current->prio;
150         wait_queue_head_t *wq = &spu_prio->waitq[prio];
151         DEFINE_WAIT(wait);
152
153         if (ctx->spu)
154                 return;
155
156         spu_add_wq(wq, &wait, prio);
157
158         if (!signal_pending(current)) {
159                 up_write(&ctx->state_sema);
160                 pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
161                          current->pid, current->prio);
162                 schedule();
163                 down_write(&ctx->state_sema);
164         }
165
166         spu_del_wq(wq, &wait, prio);
167 }
168
169 static void spu_prio_wakeup(void)
170 {
171         int best = sched_find_first_bit(spu_prio->bitmap);
172         if (best < MAX_PRIO) {
173                 wait_queue_head_t *wq = &spu_prio->waitq[best];
174                 wake_up_interruptible_nr(wq, 1);
175         }
176 }
177
178 static int get_active_spu(struct spu *spu)
179 {
180         int node = spu->node;
181         struct spu *tmp;
182         int rc = 0;
183
184         mutex_lock(&spu_prio->active_mutex[node]);
185         list_for_each_entry(tmp, &spu_prio->active_list[node], list) {
186                 if (tmp == spu) {
187                         list_del_init(&spu->list);
188                         rc = 1;
189                         break;
190                 }
191         }
192         mutex_unlock(&spu_prio->active_mutex[node]);
193         return rc;
194 }
195
196 static void put_active_spu(struct spu *spu)
197 {
198         int node = spu->node;
199
200         mutex_lock(&spu_prio->active_mutex[node]);
201         list_add_tail(&spu->list, &spu_prio->active_list[node]);
202         mutex_unlock(&spu_prio->active_mutex[node]);
203 }
204
205 static struct spu *spu_get_idle(struct spu_context *ctx, u64 flags)
206 {
207         struct spu *spu = NULL;
208         int node = cpu_to_node(raw_smp_processor_id());
209         int n;
210
211         for (n = 0; n < MAX_NUMNODES; n++, node++) {
212                 node = (node < MAX_NUMNODES) ? node : 0;
213                 if (!node_allowed(node))
214                         continue;
215                 spu = spu_alloc_node(node);
216                 if (spu)
217                         break;
218         }
219         return spu;
220 }
221
222 static inline struct spu *spu_get(struct spu_context *ctx, u64 flags)
223 {
224         /* Future: spu_get_idle() if possible,
225          * otherwise try to preempt an active
226          * context.
227          */
228         return spu_get_idle(ctx, flags);
229 }
230
231 /* The three externally callable interfaces
232  * for the scheduler begin here.
233  *
234  *      spu_activate    - bind a context to SPU, waiting as needed.
235  *      spu_deactivate  - unbind a context from its SPU.
236  *      spu_yield       - yield an SPU if others are waiting.
237  */
238
239 int spu_activate(struct spu_context *ctx, u64 flags)
240 {
241         struct spu *spu;
242         int ret = 0;
243
244         for (;;) {
245                 if (ctx->spu)
246                         return 0;
247                 spu = spu_get(ctx, flags);
248                 if (spu != NULL) {
249                         if (ctx->spu != NULL) {
250                                 spu_free(spu);
251                                 spu_prio_wakeup();
252                                 break;
253                         }
254                         bind_context(spu, ctx);
255                         put_active_spu(spu);
256                         break;
257                 }
258                 spu_prio_wait(ctx, flags);
259                 if (signal_pending(current)) {
260                         ret = -ERESTARTSYS;
261                         spu_prio_wakeup();
262                         break;
263                 }
264         }
265         return ret;
266 }
267
268 void spu_deactivate(struct spu_context *ctx)
269 {
270         struct spu *spu;
271         int needs_idle;
272
273         spu = ctx->spu;
274         if (!spu)
275                 return;
276         needs_idle = get_active_spu(spu);
277         unbind_context(spu, ctx);
278         if (needs_idle) {
279                 spu_free(spu);
280                 spu_prio_wakeup();
281         }
282 }
283
284 void spu_yield(struct spu_context *ctx)
285 {
286         struct spu *spu;
287         int need_yield = 0;
288
289         if (down_write_trylock(&ctx->state_sema)) {
290                 if ((spu = ctx->spu) != NULL) {
291                         int best = sched_find_first_bit(spu_prio->bitmap);
292                         if (best < MAX_PRIO) {
293                                 pr_debug("%s: yielding SPU %d NODE %d\n",
294                                          __FUNCTION__, spu->number, spu->node);
295                                 spu_deactivate(ctx);
296                                 ctx->state = SPU_STATE_SAVED;
297                                 need_yield = 1;
298                         } else {
299                                 spu->prio = MAX_PRIO;
300                         }
301                 }
302                 up_write(&ctx->state_sema);
303         }
304         if (unlikely(need_yield))
305                 yield();
306 }
307
308 int __init spu_sched_init(void)
309 {
310         int i;
311
312         spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
313         if (!spu_prio) {
314                 printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
315                        __FUNCTION__);
316                 return 1;
317         }
318         for (i = 0; i < MAX_PRIO; i++) {
319                 init_waitqueue_head(&spu_prio->waitq[i]);
320                 __clear_bit(i, spu_prio->bitmap);
321         }
322         __set_bit(MAX_PRIO, spu_prio->bitmap);
323         for (i = 0; i < MAX_NUMNODES; i++) {
324                 mutex_init(&spu_prio->active_mutex[i]);
325                 INIT_LIST_HEAD(&spu_prio->active_list[i]);
326         }
327         return 0;
328 }
329
330 void __exit spu_sched_exit(void)
331 {
332         struct spu *spu, *tmp;
333         int node;
334
335         for (node = 0; node < MAX_NUMNODES; node++) {
336                 mutex_lock(&spu_prio->active_mutex[node]);
337                 list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
338                                          list) {
339                         list_del_init(&spu->list);
340                         spu_free(spu);
341                 }
342                 mutex_unlock(&spu_prio->active_mutex[node]);
343         }
344         kfree(spu_prio);
345 }