From: Roland Dreier Date: Mon, 26 Sep 2005 20:53:25 +0000 (-0700) Subject: [IB] uverbs: ABI-breaking fixes for userspace verbs X-Git-Tag: v2.6.15-rc1~731^2~21^2~39 X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6b73597e7062118c0549c2702bfb7d273518c906;p=linux-2.6 [IB] uverbs: ABI-breaking fixes for userspace verbs Introduce new userspace verbs ABI version 3. This eliminates some unneeded commands, and adds support for user-created completion channels. This cleans up problems with file leaks on error paths, and also makes sure that file descriptors are always installed into the correct process. Signed-off-by: Roland Dreier --- diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index cc124344dd..475153e510 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -53,14 +53,14 @@ struct ib_uverbs_device { struct cdev dev; struct class_device class_dev; struct ib_device *ib_dev; - int num_comp; + int num_comp_vectors; }; struct ib_uverbs_event_file { struct kref ref; + struct file *file; struct ib_uverbs_file *uverbs_file; spinlock_t lock; - int fd; int is_async; wait_queue_head_t poll_wait; struct fasync_struct *async_queue; @@ -73,8 +73,7 @@ struct ib_uverbs_file { struct ib_uverbs_device *device; struct ib_ucontext *ucontext; struct ib_event_handler event_handler; - struct ib_uverbs_event_file async_file; - struct ib_uverbs_event_file comp_file[1]; + struct ib_uverbs_event_file *async_file; }; struct ib_uverbs_event { @@ -110,10 +109,17 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd); +void ib_uverbs_release_event_file(struct kref *ref); +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd); + void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event); int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, void *addr, size_t size, int write); @@ -125,16 +131,14 @@ void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); const char __user *buf, int in_len, \ int out_len) -IB_UVERBS_DECLARE_CMD(query_params); IB_UVERBS_DECLARE_CMD(get_context); IB_UVERBS_DECLARE_CMD(query_device); IB_UVERBS_DECLARE_CMD(query_port); -IB_UVERBS_DECLARE_CMD(query_gid); -IB_UVERBS_DECLARE_CMD(query_pkey); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); IB_UVERBS_DECLARE_CMD(destroy_cq); IB_UVERBS_DECLARE_CMD(create_qp); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 562445165d..79b60c3dc8 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -33,6 +33,8 @@ * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ */ +#include + #include #include "uverbs.h" @@ -45,29 +47,6 @@ (udata)->outlen = (olen); \ } while (0) -ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_params cmd; - struct ib_uverbs_query_params_resp resp; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - resp.num_cq_events = file->device->num_comp; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -77,7 +56,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_device *ibdev = file->device->ib_dev; struct ib_ucontext *ucontext; - int i; + struct file *filp; int ret; if (out_len < sizeof resp) @@ -110,26 +89,42 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); - resp.async_fd = file->async_file.fd; - for (i = 0; i < file->device->num_comp; ++i) - if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + - i * sizeof (__u32), - &file->comp_file[i].fd, sizeof (__u32))) { - ret = -EFAULT; - goto err_free; - } + resp.num_comp_vectors = file->device->num_comp_vectors; + + filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd); + if (IS_ERR(filp)) { + ret = PTR_ERR(filp); + goto err_free; + } if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) { ret = -EFAULT; - goto err_free; + goto err_file; } - file->ucontext = ucontext; + file->async_file = filp->private_data; + + INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev, + ib_uverbs_event_handler); + ret = ib_register_event_handler(&file->event_handler); + if (ret) + goto err_file; + + kref_get(&file->async_file->ref); + kref_get(&file->ref); + file->ucontext = ucontext; + + fd_install(resp.async_fd, filp); + up(&file->mutex); return in_len; +err_file: + put_unused_fd(resp.async_fd); + fput(filp); + err_free: ibdev->dealloc_ucontext(ucontext); @@ -255,62 +250,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, return in_len; } -ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_gid cmd; - struct ib_uverbs_query_gid_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, - (union ib_gid *) resp.gid); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - -ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, - const char __user *buf, - int in_len, int out_len) -{ - struct ib_uverbs_query_pkey cmd; - struct ib_uverbs_query_pkey_resp resp; - int ret; - - if (out_len < sizeof resp) - return -ENOSPC; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - memset(&resp, 0, sizeof resp); - - ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, - &resp.pkey); - if (ret) - return ret; - - if (copy_to_user((void __user *) (unsigned long) cmd.response, - &resp, sizeof resp)) - return -EFAULT; - - return in_len; -} - ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -595,6 +534,35 @@ out: return ret ? ret : in_len; } +ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_comp_channel cmd; + struct ib_uverbs_create_comp_channel_resp resp; + struct file *filp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd); + if (IS_ERR(filp)) + return PTR_ERR(filp); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + put_unused_fd(resp.fd); + fput(filp); + return -EFAULT; + } + + fd_install(resp.fd, filp); + return in_len; +} + ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -603,6 +571,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, struct ib_uverbs_create_cq_resp resp; struct ib_udata udata; struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file = NULL; struct ib_cq *cq; int ret; @@ -616,9 +585,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - if (cmd.event_handler >= file->device->num_comp) + if (cmd.comp_vector >= file->device->num_comp_vectors) return -EINVAL; + if (cmd.comp_channel >= 0) + ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -641,7 +613,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, cq->uobject = &uobj->uobject; cq->comp_handler = ib_uverbs_comp_handler; cq->event_handler = ib_uverbs_cq_event_handler; - cq->cq_context = file; + cq->cq_context = ev_file; atomic_set(&cq->usecnt, 0); retry: @@ -700,6 +672,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, struct ib_uverbs_destroy_cq_resp resp; struct ib_cq *cq; struct ib_ucq_object *uobj; + struct ib_uverbs_event_file *ev_file; struct ib_uverbs_event *evt, *tmp; u64 user_handle; int ret = -EINVAL; @@ -716,7 +689,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, goto out; user_handle = cq->uobject->user_handle; - uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + uobj = container_of(cq->uobject, struct ib_ucq_object, uobject); + ev_file = cq->cq_context; ret = ib_destroy_cq(cq); if (ret) @@ -728,19 +702,23 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->comp_file[0].lock); - list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { - list_del(&evt->list); - kfree(evt); + if (ev_file) { + spin_lock_irq(&ev_file->lock); + list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) { + list_del(&evt->list); + kfree(evt); + } + spin_unlock_irq(&ev_file->lock); + + kref_put(&ev_file->ref, ib_uverbs_release_event_file); } - spin_unlock_irq(&file->comp_file[0].lock); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.comp_events_reported = uobj->comp_events_reported; resp.async_events_reported = uobj->async_events_reported; @@ -1005,12 +983,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.events_reported = uobj->events_reported; @@ -1243,12 +1221,12 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, list_del(&uobj->uobject.list); up(&file->mutex); - spin_lock_irq(&file->async_file.lock); + spin_lock_irq(&file->async_file->lock); list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) { list_del(&evt->list); kfree(evt); } - spin_unlock_irq(&file->async_file.lock); + spin_unlock_irq(&file->async_file->lock); resp.events_reported = uobj->events_reported; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 12511808de..e7058fbc60 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -77,26 +77,24 @@ static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) = { - [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, - [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, - [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, - [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, - [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, - [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, - [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, - [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, - [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, - [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, - [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, - [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, - [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, - [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, - [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, - [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, - [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, - [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, - [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, - [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, + [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq, + [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, + [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, }; static struct vfsmount *uverbs_event_mnt; @@ -188,25 +186,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, spin_lock_irq(&file->lock); - while (list_empty(&file->event_list) && file->fd >= 0) { + while (list_empty(&file->event_list)) { spin_unlock_irq(&file->lock); if (filp->f_flags & O_NONBLOCK) return -EAGAIN; if (wait_event_interruptible(file->poll_wait, - !list_empty(&file->event_list) || - file->fd < 0)) + !list_empty(&file->event_list))) return -ERESTARTSYS; spin_lock_irq(&file->lock); } - if (file->fd < 0) { - spin_unlock_irq(&file->lock); - return -ENODEV; - } - event = list_entry(file->event_list.next, struct ib_uverbs_event, list); if (file->is_async) @@ -248,26 +240,19 @@ static unsigned int ib_uverbs_event_poll(struct file *filp, poll_wait(filp, &file->poll_wait, wait); spin_lock_irq(&file->lock); - if (file->fd < 0) - pollflags = POLLERR; - else if (!list_empty(&file->event_list)) + if (!list_empty(&file->event_list)) pollflags = POLLIN | POLLRDNORM; spin_unlock_irq(&file->lock); return pollflags; } -static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) +void ib_uverbs_release_event_file(struct kref *ref) { - struct ib_uverbs_event *entry, *tmp; + struct ib_uverbs_event_file *file = + container_of(ref, struct ib_uverbs_event_file, ref); - spin_lock_irq(&file->lock); - if (file->fd != -1) { - file->fd = -1; - list_for_each_entry_safe(entry, tmp, &file->event_list, list) - kfree(entry); - } - spin_unlock_irq(&file->lock); + kfree(file); } static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) @@ -280,21 +265,30 @@ static int ib_uverbs_event_fasync(int fd, struct file *filp, int on) static int ib_uverbs_event_close(struct inode *inode, struct file *filp) { struct ib_uverbs_event_file *file = filp->private_data; + struct ib_uverbs_event *entry, *tmp; + + spin_lock_irq(&file->lock); + file->file = NULL; + list_for_each_entry_safe(entry, tmp, &file->event_list, list) { + if (entry->counter) + list_del(&entry->obj_list); + kfree(entry); + } + spin_unlock_irq(&file->lock); - ib_uverbs_event_release(file); ib_uverbs_event_fasync(-1, filp, 0); - kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + + if (file->is_async) { + ib_unregister_event_handler(&file->uverbs_file->event_handler); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + } + kref_put(&file->ref, ib_uverbs_release_event_file); return 0; } static struct file_operations uverbs_event_fops = { - /* - * No .owner field since we artificially create event files, - * so there is no increment to the module reference count in - * the open path. All event files come from a uverbs command - * file, which already takes a module reference, so this is OK. - */ + .owner = THIS_MODULE, .read = ib_uverbs_event_read, .poll = ib_uverbs_event_poll, .release = ib_uverbs_event_close, @@ -303,10 +297,19 @@ static struct file_operations uverbs_event_fops = { void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) { - struct ib_uverbs_file *file = cq_context; - struct ib_ucq_object *uobj; - struct ib_uverbs_event *entry; - unsigned long flags; + struct ib_uverbs_event_file *file = cq_context; + struct ib_ucq_object *uobj; + struct ib_uverbs_event *entry; + unsigned long flags; + + if (!file) + return; + + spin_lock_irqsave(&file->lock, flags); + if (!file->file) { + spin_unlock_irqrestore(&file->lock, flags); + return; + } entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) @@ -317,13 +320,12 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) entry->desc.comp.cq_handle = cq->uobject->user_handle; entry->counter = &uobj->comp_events_reported; - spin_lock_irqsave(&file->comp_file[0].lock, flags); - list_add_tail(&entry->list, &file->comp_file[0].event_list); + list_add_tail(&entry->list, &file->event_list); list_add_tail(&entry->obj_list, &uobj->comp_list); - spin_unlock_irqrestore(&file->comp_file[0].lock, flags); + spin_unlock_irqrestore(&file->lock, flags); - wake_up_interruptible(&file->comp_file[0].poll_wait); - kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->poll_wait); + kill_fasync(&file->async_queue, SIGIO, POLL_IN); } static void ib_uverbs_async_handler(struct ib_uverbs_file *file, @@ -334,6 +336,12 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, struct ib_uverbs_event *entry; unsigned long flags; + spin_lock_irqsave(&file->async_file->lock, flags); + if (!file->async_file->file) { + spin_unlock_irqrestore(&file->async_file->lock, flags); + return; + } + entry = kmalloc(sizeof *entry, GFP_ATOMIC); if (!entry) return; @@ -342,24 +350,24 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.event_type = event; entry->counter = counter; - spin_lock_irqsave(&file->async_file.lock, flags); - list_add_tail(&entry->list, &file->async_file.event_list); + list_add_tail(&entry->list, &file->async_file->event_list); if (obj_list) list_add_tail(&entry->obj_list, obj_list); - spin_unlock_irqrestore(&file->async_file.lock, flags); + spin_unlock_irqrestore(&file->async_file->lock, flags); - wake_up_interruptible(&file->async_file.poll_wait); - kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN); + wake_up_interruptible(&file->async_file->poll_wait); + kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN); } void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) { + struct ib_uverbs_event_file *ev_file = context_ptr; struct ib_ucq_object *uobj; uobj = container_of(event->element.cq->uobject, struct ib_ucq_object, uobject); - ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle, + ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle, event->event, &uobj->async_list, &uobj->async_events_reported); @@ -389,8 +397,8 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr) &uobj->events_reported); } -static void ib_uverbs_event_handler(struct ib_event_handler *handler, - struct ib_event *event) +void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) { struct ib_uverbs_file *file = container_of(handler, struct ib_uverbs_file, event_handler); @@ -399,38 +407,90 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler, NULL, NULL); } -static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, - struct ib_uverbs_file *uverbs_file) +struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, + int is_async, int *fd) { + struct ib_uverbs_event_file *ev_file; struct file *filp; + int ret; - spin_lock_init(&file->lock); - INIT_LIST_HEAD(&file->event_list); - init_waitqueue_head(&file->poll_wait); - file->uverbs_file = uverbs_file; - file->async_queue = NULL; - - file->fd = get_unused_fd(); - if (file->fd < 0) - return file->fd; + ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL); + if (!ev_file) + return ERR_PTR(-ENOMEM); + + kref_init(&ev_file->ref); + spin_lock_init(&ev_file->lock); + INIT_LIST_HEAD(&ev_file->event_list); + init_waitqueue_head(&ev_file->poll_wait); + ev_file->uverbs_file = uverbs_file; + ev_file->async_queue = NULL; + ev_file->is_async = is_async; + + *fd = get_unused_fd(); + if (*fd < 0) { + ret = *fd; + goto err; + } filp = get_empty_filp(); if (!filp) { - put_unused_fd(file->fd); - return -ENFILE; + ret = -ENFILE; + goto err_fd; } - filp->f_op = &uverbs_event_fops; + ev_file->file = filp; + + /* + * fops_get() can't fail here, because we're coming from a + * system call on a uverbs file, which will already have a + * module reference. + */ + filp->f_op = fops_get(&uverbs_event_fops); filp->f_vfsmnt = mntget(uverbs_event_mnt); filp->f_dentry = dget(uverbs_event_mnt->mnt_root); filp->f_mapping = filp->f_dentry->d_inode->i_mapping; filp->f_flags = O_RDONLY; filp->f_mode = FMODE_READ; - filp->private_data = file; + filp->private_data = ev_file; - fd_install(file->fd, filp); + return filp; - return 0; +err_fd: + put_unused_fd(*fd); + +err: + kfree(ev_file); + return ERR_PTR(ret); +} + +/* + * Look up a completion event file by FD. If lookup is successful, + * takes a ref to the event file struct that it returns; if + * unsuccessful, returns NULL. + */ +struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) +{ + struct ib_uverbs_event_file *ev_file = NULL; + struct file *filp; + + filp = fget(fd); + if (!filp) + return NULL; + + if (filp->f_op != &uverbs_event_fops) + goto out; + + ev_file = filp->private_data; + if (ev_file->is_async) { + ev_file = NULL; + goto out; + } + + kref_get(&ev_file->ref); + +out: + fput(filp); + return ev_file; } static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, @@ -453,8 +513,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, !uverbs_cmd_table[hdr.command]) return -EINVAL; - if (!file->ucontext && - hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && + if (!file->ucontext && hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) return -EINVAL; @@ -477,82 +536,33 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp) struct ib_uverbs_device *dev = container_of(inode->i_cdev, struct ib_uverbs_device, dev); struct ib_uverbs_file *file; - int i = 0; - int ret; if (!try_module_get(dev->ib_dev->owner)) return -ENODEV; - file = kmalloc(sizeof *file + - (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), - GFP_KERNEL); + file = kmalloc(sizeof *file, GFP_KERNEL); if (!file) { - ret = -ENOMEM; - goto err; + module_put(dev->ib_dev->owner); + return -ENOMEM; } - file->device = dev; + file->device = dev; + file->ucontext = NULL; kref_init(&file->ref); init_MUTEX(&file->mutex); - file->ucontext = NULL; - - kref_get(&file->ref); - ret = ib_uverbs_event_init(&file->async_file, file); - if (ret) - goto err_kref; - - file->async_file.is_async = 1; - - for (i = 0; i < dev->num_comp; ++i) { - kref_get(&file->ref); - ret = ib_uverbs_event_init(&file->comp_file[i], file); - if (ret) - goto err_async; - file->comp_file[i].is_async = 0; - } - - filp->private_data = file; - INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, - ib_uverbs_event_handler); - if (ib_register_event_handler(&file->event_handler)) - goto err_async; - return 0; - -err_async: - while (i--) - ib_uverbs_event_release(&file->comp_file[i]); - - ib_uverbs_event_release(&file->async_file); - -err_kref: - /* - * One extra kref_put() because we took a reference before the - * event file creation that failed and got us here. - */ - kref_put(&file->ref, ib_uverbs_release_file); - kref_put(&file->ref, ib_uverbs_release_file); - -err: - module_put(dev->ib_dev->owner); - return ret; } static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - int i; - ib_unregister_event_handler(&file->event_handler); - ib_uverbs_event_release(&file->async_file); ib_dealloc_ucontext(file->ucontext); - for (i = 0; i < file->device->num_comp; ++i) - ib_uverbs_event_release(&file->comp_file[i]); - + kref_put(&file->async_file->ref, ib_uverbs_release_event_file); kref_put(&file->ref, ib_uverbs_release_file); return 0; @@ -631,8 +641,8 @@ static void ib_uverbs_add_one(struct ib_device *device) set_bit(uverbs_dev->devnum, dev_map); spin_unlock(&map_lock); - uverbs_dev->ib_dev = device; - uverbs_dev->num_comp = 1; + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp_vectors = 1; if (device->mmap) cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h index fd85725391..0532b78200 100644 --- a/include/rdma/ib_user_verbs.h +++ b/include/rdma/ib_user_verbs.h @@ -42,15 +42,12 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define IB_USER_VERBS_ABI_VERSION 2 +#define IB_USER_VERBS_ABI_VERSION 3 enum { - IB_USER_VERBS_CMD_QUERY_PARAMS, IB_USER_VERBS_CMD_GET_CONTEXT, IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_CMD_QUERY_PORT, - IB_USER_VERBS_CMD_QUERY_GID, - IB_USER_VERBS_CMD_QUERY_PKEY, IB_USER_VERBS_CMD_ALLOC_PD, IB_USER_VERBS_CMD_DEALLOC_PD, IB_USER_VERBS_CMD_CREATE_AH, @@ -65,6 +62,7 @@ enum { IB_USER_VERBS_CMD_ALLOC_MW, IB_USER_VERBS_CMD_BIND_MW, IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL, IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_CMD_RESIZE_CQ, IB_USER_VERBS_CMD_DESTROY_CQ, @@ -118,27 +116,14 @@ struct ib_uverbs_cmd_hdr { __u16 out_words; }; -/* - * No driver_data for "query params" command, since this is intended - * to be a core function with no possible device dependence. - */ -struct ib_uverbs_query_params { - __u64 response; -}; - -struct ib_uverbs_query_params_resp { - __u32 num_cq_events; -}; - struct ib_uverbs_get_context { __u64 response; - __u64 cq_fd_tab; __u64 driver_data[0]; }; struct ib_uverbs_get_context_resp { __u32 async_fd; - __u32 reserved; + __u32 num_comp_vectors; }; struct ib_uverbs_query_device { @@ -220,31 +205,6 @@ struct ib_uverbs_query_port_resp { __u8 reserved[3]; }; -struct ib_uverbs_query_gid { - __u64 response; - __u8 port_num; - __u8 index; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_gid_resp { - __u8 gid[16]; -}; - -struct ib_uverbs_query_pkey { - __u64 response; - __u8 port_num; - __u8 index; - __u8 reserved[6]; - __u64 driver_data[0]; -}; - -struct ib_uverbs_query_pkey_resp { - __u16 pkey; - __u16 reserved; -}; - struct ib_uverbs_alloc_pd { __u64 response; __u64 driver_data[0]; @@ -278,11 +238,21 @@ struct ib_uverbs_dereg_mr { __u32 mr_handle; }; +struct ib_uverbs_create_comp_channel { + __u64 response; +}; + +struct ib_uverbs_create_comp_channel_resp { + __u32 fd; +}; + struct ib_uverbs_create_cq { __u64 response; __u64 user_handle; __u32 cqe; - __u32 event_handler; + __u32 comp_vector; + __s32 comp_channel; + __u32 reserved; __u64 driver_data[0]; };