X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=block%2Fcfq-iosched.c;h=d01b411c72f053225c2faee733e4299afeeec51b;hb=6f4a0e45c6392f84436004d4c04d31b8ff5071c5;hp=0f962ecae91fb68345fe1d916791a78e06a2f2e8;hpb=b9c64498f53e0f4d90eec03048f73ac215bb6f79;p=linux-2.6 diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 0f962ecae9..d01b411c72 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -124,6 +124,8 @@ struct cfq_data { struct cfq_queue { /* reference count */ atomic_t ref; + /* various state flags, see below */ + unsigned int flags; /* parent cfq_data */ struct cfq_data *cfqd; /* service_tree member */ @@ -138,14 +140,14 @@ struct cfq_queue { int queued[2]; /* currently allocated requests */ int allocated[2]; - /* pending metadata requests */ - int meta_pending; /* fifo list of requests in sort_list */ struct list_head fifo; unsigned long slice_end; long slice_resid; + /* pending metadata requests */ + int meta_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -153,8 +155,6 @@ struct cfq_queue { unsigned short ioprio, org_ioprio; unsigned short ioprio_class, org_ioprio_class; - /* various state flags, see below */ - unsigned int flags; }; enum cfqq_state_flags { @@ -1143,24 +1143,47 @@ static void cfq_put_queue(struct cfq_queue *cfqq) } /* - * Call func for each cic attached to this ioc. Returns number of cic's seen. + * Must always be called with the rcu_read_lock() held */ -static unsigned int -call_for_each_cic(struct io_context *ioc, - void (*func)(struct io_context *, struct cfq_io_context *)) +static void +__call_for_each_cic(struct io_context *ioc, + void (*func)(struct io_context *, struct cfq_io_context *)) { struct cfq_io_context *cic; struct hlist_node *n; - int called = 0; - rcu_read_lock(); - hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) { + hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) func(ioc, cic); - called++; - } +} + +/* + * Call func for each cic attached to this ioc. + */ +static void +call_for_each_cic(struct io_context *ioc, + void (*func)(struct io_context *, struct cfq_io_context *)) +{ + rcu_read_lock(); + __call_for_each_cic(ioc, func); rcu_read_unlock(); +} + +static void cfq_cic_free_rcu(struct rcu_head *head) +{ + struct cfq_io_context *cic; + + cic = container_of(head, struct cfq_io_context, rcu_head); + + kmem_cache_free(cfq_ioc_pool, cic); + elv_ioc_count_dec(ioc_count); - return called; + if (ioc_gone && !elv_ioc_count_read(ioc_count)) + complete(ioc_gone); +} + +static void cfq_cic_free(struct cfq_io_context *cic) +{ + call_rcu(&cic->rcu_head, cfq_cic_free_rcu); } static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) @@ -1174,24 +1197,23 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) hlist_del_rcu(&cic->cic_list); spin_unlock_irqrestore(&ioc->lock, flags); - kmem_cache_free(cfq_ioc_pool, cic); + cfq_cic_free(cic); } +/* + * Must be called with rcu_read_lock() held or preemption otherwise disabled. + * Only two callers of this - ->dtor() which is called with the rcu_read_lock(), + * and ->trim() which is called with the task lock held + */ static void cfq_free_io_context(struct io_context *ioc) { - int freed; - /* - * ioc->refcount is zero here, so no more cic's are allowed to be - * linked into this ioc. So it should be ok to iterate over the known - * list, we will see all cic's since no new ones are added. + * ioc->refcount is zero here, or we are called from elv_unregister(), + * so no more cic's are allowed to be linked into this ioc. So it + * should be ok to iterate over the known list, we will see all cic's + * since no new ones are added. */ - freed = call_for_each_cic(ioc, cic_free_func); - - elv_ioc_count_mod(ioc_count, -freed); - - if (ioc_gone && !elv_ioc_count_read(ioc_count)) - complete(ioc_gone); + __call_for_each_cic(ioc, cic_free_func); } static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) @@ -1207,6 +1229,8 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) static void __cfq_exit_single_io_context(struct cfq_data *cfqd, struct cfq_io_context *cic) { + struct io_context *ioc = cic->ioc; + list_del_init(&cic->queue_list); /* @@ -1216,6 +1240,9 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, cic->dead_key = (unsigned long) cic->key; cic->key = NULL; + if (ioc->ioc_data == cic) + rcu_assign_pointer(ioc->ioc_data, NULL); + if (cic->cfqq[ASYNC]) { cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]); cic->cfqq[ASYNC] = NULL; @@ -1248,7 +1275,6 @@ static void cfq_exit_single_io_context(struct io_context *ioc, */ static void cfq_exit_io_context(struct io_context *ioc) { - rcu_assign_pointer(ioc->ioc_data, NULL); call_for_each_cic(ioc, cfq_exit_single_io_context); } @@ -1285,10 +1311,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class); case IOPRIO_CLASS_NONE: /* - * no prio set, place us in the middle of the BE classes + * no prio set, inherit CPU scheduling settings */ cfqq->ioprio = task_nice_ioprio(tsk); - cfqq->ioprio_class = IOPRIO_CLASS_BE; + cfqq->ioprio_class = task_nice_ioclass(tsk); break; case IOPRIO_CLASS_RT: cfqq->ioprio = task_ioprio(ioc); @@ -1458,15 +1484,6 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc, return cfqq; } -static void cfq_cic_free(struct cfq_io_context *cic) -{ - kmem_cache_free(cfq_ioc_pool, cic); - elv_ioc_count_dec(ioc_count); - - if (ioc_gone && !elv_ioc_count_read(ioc_count)) - complete(ioc_gone); -} - /* * We drop cfq io contexts lazily, so we may find a dead one. */ @@ -1480,8 +1497,7 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, spin_lock_irqsave(&ioc->lock, flags); - if (ioc->ioc_data == cic) - rcu_assign_pointer(ioc->ioc_data, NULL); + BUG_ON(ioc->ioc_data == cic); radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); hlist_del_rcu(&cic->cic_list); @@ -1494,20 +1510,24 @@ static struct cfq_io_context * cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) { struct cfq_io_context *cic; + unsigned long flags; void *k; if (unlikely(!ioc)) return NULL; + rcu_read_lock(); + /* * we maintain a last-hit cache, to avoid browsing over the tree */ cic = rcu_dereference(ioc->ioc_data); - if (cic && cic->key == cfqd) + if (cic && cic->key == cfqd) { + rcu_read_unlock(); return cic; + } do { - rcu_read_lock(); cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); rcu_read_unlock(); if (!cic) @@ -1516,10 +1536,13 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) k = cic->key; if (unlikely(!k)) { cfq_drop_dead_cic(cfqd, ioc, cic); + rcu_read_lock(); continue; } + spin_lock_irqsave(&ioc->lock, flags); rcu_assign_pointer(ioc->ioc_data, cic); + spin_unlock_irqrestore(&ioc->lock, flags); break; } while (1); @@ -2126,6 +2149,10 @@ static void *cfq_init_queue(struct request_queue *q) static void cfq_slab_kill(void) { + /* + * Caller already ensured that pending RCU callbacks are completed, + * so we should have no busy allocations at this point. + */ if (cfq_pool) kmem_cache_destroy(cfq_pool); if (cfq_ioc_pool) @@ -2138,7 +2165,7 @@ static int __init cfq_slab_setup(void) if (!cfq_pool) goto fail; - cfq_ioc_pool = KMEM_CACHE(cfq_io_context, SLAB_DESTROY_BY_RCU); + cfq_ioc_pool = KMEM_CACHE(cfq_io_context, 0); if (!cfq_ioc_pool) goto fail; @@ -2284,9 +2311,13 @@ static void __exit cfq_exit(void) ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); + + /* + * this also protects us from entering cfq_slab_kill() with + * pending RCU callbacks + */ if (elv_ioc_count_read(ioc_count)) wait_for_completion(ioc_gone); - synchronize_rcu(); cfq_slab_kill(); }