]> err.no Git - linux-2.6/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland...
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 12 Oct 2007 02:43:13 +0000 (19:43 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 12 Oct 2007 02:43:13 +0000 (19:43 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (87 commits)
  mlx4_core: Fix section mismatches
  IPoIB: Allow setting policy to ignore multicast groups
  IB/mthca: Mark error paths as unlikely() in post_srq_recv functions
  IB/ipath: Minor fix to ordering of freeing and zeroing of tid pages.
  IB/ipath: Remove redundant link state checks
  IB/ipath: Fix IB_EVENT_PORT_ERR event
  IB/ipath: Better handling of unexpected GPIO interrupts
  IB/ipath: Maintain active time on all chips
  IB/ipath: Fix QHT7040 serial number check
  IB/ipath: Indicate a couple of chip bugs to userspace
  IB/ipath: iba6110 rev4 no longer needs recv header overrun workaround
  IB/ipath: Use counters in ipath_poll and cleanup interrupts in ipath_close
  IB/ipath: Remove duplicate copy of LMC
  IB/ipath: Add ability to set the LMC via the sysfs debugging interface
  IB/ipath: Optimize completion queue entry insertion and polling
  IB/ipath: Implement IB_EVENT_QP_LAST_WQE_REACHED
  IB/ipath: Generate flush CQE when QP is in error state
  IB/ipath: Remove redundant code
  IB/ipath: Future proof eeprom checksum code (contents reading)
  IB/ipath: UC RDMA WRITE with IMMEDIATE doesn't send the immediate
  ...

89 files changed:
Documentation/infiniband/user_mad.txt
drivers/infiniband/core/addr.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/device.c
drivers/infiniband/core/fmr_pool.c
drivers/infiniband/core/multicast.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/hw/cxgb3/iwch_cm.c
drivers/infiniband/hw/ehca/ehca_classes.h
drivers/infiniband/hw/ehca/ehca_cq.c
drivers/infiniband/hw/ehca/ehca_hca.c
drivers/infiniband/hw/ehca/ehca_irq.c
drivers/infiniband/hw/ehca/ehca_main.c
drivers/infiniband/hw/ehca/ehca_mcast.c
drivers/infiniband/hw/ehca/ehca_mrmw.c
drivers/infiniband/hw/ehca/ehca_qp.c
drivers/infiniband/hw/ehca/ehca_reqs.c
drivers/infiniband/hw/ehca/ehca_sqp.c
drivers/infiniband/hw/ehca/ehca_tools.h
drivers/infiniband/hw/ehca/ehca_uverbs.c
drivers/infiniband/hw/ehca/hcp_if.c
drivers/infiniband/hw/ehca/ipz_pt_fn.c
drivers/infiniband/hw/ipath/ipath_common.h
drivers/infiniband/hw/ipath/ipath_cq.c
drivers/infiniband/hw/ipath/ipath_diag.c
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_eeprom.c
drivers/infiniband/hw/ipath/ipath_file_ops.c
drivers/infiniband/hw/ipath/ipath_fs.c
drivers/infiniband/hw/ipath/ipath_iba6110.c
drivers/infiniband/hw/ipath/ipath_iba6120.c
drivers/infiniband/hw/ipath/ipath_intr.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/ipath/ipath_qp.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_ruc.c
drivers/infiniband/hw/ipath/ipath_stats.c
drivers/infiniband/hw/ipath/ipath_sysfs.c
drivers/infiniband/hw/ipath/ipath_uc.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/ipath/ipath_verbs.h
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/srq.c
drivers/infiniband/hw/mthca/mthca_cmd.c
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_main.c
drivers/infiniband/hw/mthca/mthca_srq.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_vlan.c
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/net/mlx4/cmd.c
drivers/net/mlx4/cq.c
drivers/net/mlx4/eq.c
drivers/net/mlx4/fw.c
drivers/net/mlx4/icm.c
drivers/net/mlx4/icm.h
drivers/net/mlx4/main.c
drivers/net/mlx4/mcg.c
drivers/net/mlx4/mlx4.h
drivers/net/mlx4/mr.c
drivers/net/mlx4/pd.c
drivers/net/mlx4/qp.c
drivers/net/mlx4/srq.c
include/linux/mlx4/device.h
include/rdma/ib_cm.h
include/rdma/ib_sa.h
include/rdma/ib_umem.h
include/rdma/ib_user_mad.h
include/rdma/rdma_cm.h
include/rdma/rdma_user_cm.h

index 8ec54b974b676ac4854052194697c99baaa6591c..744687dd195bb04e4af3cf8f549769824fa02087 100644 (file)
@@ -99,6 +99,20 @@ Transaction IDs
   request/response pairs.  The upper 32 bits are reserved for use by
   the kernel and will be overwritten before a MAD is sent.
 
+P_Key Index Handling
+
+  The old ib_umad interface did not allow setting the P_Key index for
+  MADs that are sent and did not provide a way for obtaining the P_Key
+  index of received MADs.  A new layout for struct ib_user_mad_hdr
+  with a pkey_index member has been defined; however, to preserve
+  binary compatibility with older applications, this new layout will
+  not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called
+  before a file descriptor is used for anything else.
+
+  In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
+  to 6, the new layout of struct ib_user_mad_hdr will be used by
+  default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
+
 Setting IsSM Capability Bit
 
   To set the IsSM capability bit for a port, simply open the
index c5c33d35f87d619bf4a04bd6cf878801d448298b..5381c80de10aad692290898003c7ebf543274191 100644 (file)
@@ -161,8 +161,7 @@ static void addr_send_arp(struct sockaddr_in *dst_in)
        if (ip_route_output_key(&rt, &fl))
                return;
 
-       arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
-                rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
+       neigh_event_send(rt->u.dst.neighbour, NULL);
        ip_rt_put(rt);
 }
 
index 4df269f5d9ac96ca8895ee04ceecd7ab75e12569..2e39236d189ff88e3d919629c9f756e61a7d4d6b 100644 (file)
@@ -2219,6 +2219,9 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
 {
        struct cm_id_private *cm_id_priv;
        struct ib_mad_send_buf *msg;
+       enum ib_cm_state cm_state;
+       enum ib_cm_lap_state lap_state;
+       enum cm_msg_response msg_response;
        void *data;
        unsigned long flags;
        int ret;
@@ -2235,48 +2238,40 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
        spin_lock_irqsave(&cm_id_priv->lock, flags);
        switch(cm_id_priv->id.state) {
        case IB_CM_REQ_RCVD:
-               ret = cm_alloc_msg(cm_id_priv, &msg);
-               if (ret)
-                       goto error1;
-
-               cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
-                             CM_MSG_RESPONSE_REQ, service_timeout,
-                             private_data, private_data_len);
-               ret = ib_post_send_mad(msg, NULL);
-               if (ret)
-                       goto error2;
-               cm_id->state = IB_CM_MRA_REQ_SENT;
+               cm_state = IB_CM_MRA_REQ_SENT;
+               lap_state = cm_id->lap_state;
+               msg_response = CM_MSG_RESPONSE_REQ;
                break;
        case IB_CM_REP_RCVD:
-               ret = cm_alloc_msg(cm_id_priv, &msg);
-               if (ret)
-                       goto error1;
-
-               cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
-                             CM_MSG_RESPONSE_REP, service_timeout,
-                             private_data, private_data_len);
-               ret = ib_post_send_mad(msg, NULL);
-               if (ret)
-                       goto error2;
-               cm_id->state = IB_CM_MRA_REP_SENT;
+               cm_state = IB_CM_MRA_REP_SENT;
+               lap_state = cm_id->lap_state;
+               msg_response = CM_MSG_RESPONSE_REP;
                break;
        case IB_CM_ESTABLISHED:
+               cm_state = cm_id->state;
+               lap_state = IB_CM_MRA_LAP_SENT;
+               msg_response = CM_MSG_RESPONSE_OTHER;
+               break;
+       default:
+               ret = -EINVAL;
+               goto error1;
+       }
+
+       if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
                ret = cm_alloc_msg(cm_id_priv, &msg);
                if (ret)
                        goto error1;
 
                cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
-                             CM_MSG_RESPONSE_OTHER, service_timeout,
+                             msg_response, service_timeout,
                              private_data, private_data_len);
                ret = ib_post_send_mad(msg, NULL);
                if (ret)
                        goto error2;
-               cm_id->lap_state = IB_CM_MRA_LAP_SENT;
-               break;
-       default:
-               ret = -EINVAL;
-               goto error1;
        }
+
+       cm_id->state = cm_state;
+       cm_id->lap_state = lap_state;
        cm_id_priv->service_timeout = service_timeout;
        cm_set_private_data(cm_id_priv, data, private_data_len);
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
index 2e641b255db48b197ab51bcb49e2dcde94e1aae3..93644f82592c426074a9beba068b4e23d4910b79 100644 (file)
@@ -52,6 +52,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 
 #define CMA_CM_RESPONSE_TIMEOUT 20
 #define CMA_MAX_CM_RETRIES 15
+#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
 
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device);
@@ -138,6 +139,7 @@ struct rdma_id_private {
        u32                     qkey;
        u32                     qp_num;
        u8                      srq;
+       u8                      tos;
 };
 
 struct cma_multicast {
@@ -1089,6 +1091,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
                event.param.ud.private_data_len =
                                IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
        } else {
+               ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
                conn_id = cma_new_conn_id(&listen_id->id, ib_event);
                cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
                                       ib_event->private_data, offset);
@@ -1474,6 +1477,15 @@ err:
 }
 EXPORT_SYMBOL(rdma_listen);
 
+void rdma_set_service_type(struct rdma_cm_id *id, int tos)
+{
+       struct rdma_id_private *id_priv;
+
+       id_priv = container_of(id, struct rdma_id_private, id);
+       id_priv->tos = (u8) tos;
+}
+EXPORT_SYMBOL(rdma_set_service_type);
+
 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
                              void *context)
 {
@@ -1498,23 +1510,37 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
                              struct cma_work *work)
 {
-       struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
+       struct rdma_addr *addr = &id_priv->id.route.addr;
        struct ib_sa_path_rec path_rec;
+       ib_sa_comp_mask comp_mask;
+       struct sockaddr_in6 *sin6;
 
        memset(&path_rec, 0, sizeof path_rec);
-       ib_addr_get_sgid(addr, &path_rec.sgid);
-       ib_addr_get_dgid(addr, &path_rec.dgid);
-       path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
+       ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+       ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+       path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
        path_rec.numb_path = 1;
        path_rec.reversible = 1;
+       path_rec.service_id = cma_get_service_id(id_priv->id.ps, &addr->dst_addr);
+
+       comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
+                   IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
+                   IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
+
+       if (addr->src_addr.sa_family == AF_INET) {
+               path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
+               comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
+       } else {
+               sin6 = (struct sockaddr_in6 *) &addr->src_addr;
+               path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
+               comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
+       }
 
        id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
-                               id_priv->id.port_num, &path_rec,
-                               IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
-                               IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
-                               IB_SA_PATH_REC_REVERSIBLE,
-                               timeout_ms, GFP_KERNEL,
-                               cma_query_handler, work, &id_priv->query);
+                                              id_priv->id.port_num, &path_rec,
+                                              comp_mask, timeout_ms,
+                                              GFP_KERNEL, cma_query_handler,
+                                              work, &id_priv->query);
 
        return (id_priv->query_id < 0) ? id_priv->query_id : 0;
 }
index 2506c43ba041c996a54a30b80efb455d908da5a7..5ac5ffee05cbbc044c34aecfd6c1f323f719ec6e 100644 (file)
@@ -120,12 +120,12 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
 
 static int alloc_name(char *name)
 {
-       long *inuse;
+       unsigned long *inuse;
        char buf[IB_DEVICE_NAME_MAX];
        struct ib_device *device;
        int i;
 
-       inuse = (long *) get_zeroed_page(GFP_KERNEL);
+       inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
        if (!inuse)
                return -ENOMEM;
 
index a06bcc65a871f54c1a6d35545a4557fd1ae2d4f1..d7f64525469b5769d23621c22f4842a7b49387a9 100644 (file)
@@ -152,7 +152,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
 
 #ifdef DEBUG
                if (fmr->ref_count !=0) {
-                       printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d",
+                       printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
                               fmr, fmr->ref_count);
                }
 #endif
@@ -170,7 +170,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
 
        ret = ib_unmap_fmr(&fmr_list);
        if (ret)
-               printk(KERN_WARNING PFX "ib_unmap_fmr returned %d", ret);
+               printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
 
        spin_lock_irq(&pool->pool_lock);
        list_splice(&unmap_list, &pool->free_list);
@@ -235,13 +235,13 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 
        attr = kmalloc(sizeof *attr, GFP_KERNEL);
        if (!attr) {
-               printk(KERN_WARNING PFX "couldn't allocate device attr struct");
+               printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
                return ERR_PTR(-ENOMEM);
        }
 
        ret = ib_query_device(device, attr);
        if (ret) {
-               printk(KERN_WARNING PFX "couldn't query device: %d", ret);
+               printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
                kfree(attr);
                return ERR_PTR(ret);
        }
@@ -255,7 +255,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 
        pool = kmalloc(sizeof *pool, GFP_KERNEL);
        if (!pool) {
-               printk(KERN_WARNING PFX "couldn't allocate pool struct");
+               printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
                return ERR_PTR(-ENOMEM);
        }
 
@@ -272,7 +272,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
                        kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
                                GFP_KERNEL);
                if (!pool->cache_bucket) {
-                       printk(KERN_WARNING PFX "Failed to allocate cache in pool");
+                       printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
                        ret = -ENOMEM;
                        goto out_free_pool;
                }
@@ -296,7 +296,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
                                      "ib_fmr(%s)",
                                      device->name);
        if (IS_ERR(pool->thread)) {
-               printk(KERN_WARNING PFX "couldn't start cleanup thread");
+               printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
                ret = PTR_ERR(pool->thread);
                goto out_free_pool;
        }
@@ -314,7 +314,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
                                      GFP_KERNEL);
                        if (!fmr) {
                                printk(KERN_WARNING PFX "failed to allocate fmr "
-                                      "struct for FMR %d", i);
+                                      "struct for FMR %d\n", i);
                                goto out_fail;
                        }
 
@@ -326,7 +326,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
                        fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
                        if (IS_ERR(fmr->fmr)) {
                                printk(KERN_WARNING PFX "fmr_create failed "
-                                      "for FMR %d", i);
+                                      "for FMR %d\n", i);
                                kfree(fmr);
                                goto out_fail;
                        }
@@ -381,7 +381,7 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
        }
 
        if (i < pool->pool_size)
-               printk(KERN_WARNING PFX "pool still has %d regions registered",
+               printk(KERN_WARNING PFX "pool still has %d regions registered\n",
                       pool->pool_size - i);
 
        kfree(pool->cache_bucket);
@@ -518,7 +518,7 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
 
 #ifdef DEBUG
        if (fmr->ref_count < 0)
-               printk(KERN_WARNING PFX "FMR %p has ref count %d < 0",
+               printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
                       fmr, fmr->ref_count);
 #endif
 
index 15b4c4d3606dbc4954d399e5ef0f1d5a5d254bb0..1bc1fe60528296251f7ddf86ba85f20dcb65a8d4 100644 (file)
@@ -196,7 +196,7 @@ static void queue_join(struct mcast_member *member)
        unsigned long flags;
 
        spin_lock_irqsave(&group->lock, flags);
-       list_add(&member->list, &group->pending_list);
+       list_add_tail(&member->list, &group->pending_list);
        if (group->state == MCAST_IDLE) {
                group->state = MCAST_BUSY;
                atomic_inc(&group->refcount);
index d271bd715c12920a9a0e907f28865e1e120a816e..cf474ec270703fd8e565c19e28f8bc378f326e70 100644 (file)
@@ -123,14 +123,10 @@ static u32 tid;
        .field_name          = "sa_path_rec:" #field
 
 static const struct ib_field path_rec_table[] = {
-       { RESERVED,
+       { PATH_REC_FIELD(service_id),
          .offset_words = 0,
          .offset_bits  = 0,
-         .size_bits    = 32 },
-       { RESERVED,
-         .offset_words = 1,
-         .offset_bits  = 0,
-         .size_bits    = 32 },
+         .size_bits    = 64 },
        { PATH_REC_FIELD(dgid),
          .offset_words = 2,
          .offset_bits  = 0,
@@ -179,7 +175,7 @@ static const struct ib_field path_rec_table[] = {
          .offset_words = 12,
          .offset_bits  = 16,
          .size_bits    = 16 },
-       { RESERVED,
+       { PATH_REC_FIELD(qos_class),
          .offset_words = 13,
          .offset_bits  = 0,
          .size_bits    = 12 },
@@ -531,7 +527,7 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
                                            query->sm_ah->pkey_index,
                                            0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
                                            gfp_mask);
-       if (!query->mad_buf) {
+       if (IS_ERR(query->mad_buf)) {
                kref_put(&query->sm_ah->ref, free_sm_ah);
                return -ENOMEM;
        }
index 53b4c94a7eb5a34a0ffe8ab29a4dcb941967c33b..90d675ad9ec8af51acaea573a58ea91f8ebc40de 100644 (file)
@@ -792,6 +792,78 @@ out:
        return ret;
 }
 
+static int ucma_set_option_id(struct ucma_context *ctx, int optname,
+                             void *optval, size_t optlen)
+{
+       int ret = 0;
+
+       switch (optname) {
+       case RDMA_OPTION_ID_TOS:
+               if (optlen != sizeof(u8)) {
+                       ret = -EINVAL;
+                       break;
+               }
+               rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+
+       return ret;
+}
+
+static int ucma_set_option_level(struct ucma_context *ctx, int level,
+                                int optname, void *optval, size_t optlen)
+{
+       int ret;
+
+       switch (level) {
+       case RDMA_OPTION_ID:
+               ret = ucma_set_option_id(ctx, optname, optval, optlen);
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+
+       return ret;
+}
+
+static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
+                              int in_len, int out_len)
+{
+       struct rdma_ucm_set_option cmd;
+       struct ucma_context *ctx;
+       void *optval;
+       int ret;
+
+       if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+               return -EFAULT;
+
+       ctx = ucma_get_ctx(file, cmd.id);
+       if (IS_ERR(ctx))
+               return PTR_ERR(ctx);
+
+       optval = kmalloc(cmd.optlen, GFP_KERNEL);
+       if (!optval) {
+               ret = -ENOMEM;
+               goto out1;
+       }
+
+       if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
+                          cmd.optlen)) {
+               ret = -EFAULT;
+               goto out2;
+       }
+
+       ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
+                                   cmd.optlen);
+out2:
+       kfree(optval);
+out1:
+       ucma_put_ctx(ctx);
+       return ret;
+}
+
 static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
                           int in_len, int out_len)
 {
@@ -936,7 +1008,7 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
        [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
        [RDMA_USER_CM_CMD_GET_EVENT]    = ucma_get_event,
        [RDMA_USER_CM_CMD_GET_OPTION]   = NULL,
-       [RDMA_USER_CM_CMD_SET_OPTION]   = NULL,
+       [RDMA_USER_CM_CMD_SET_OPTION]   = ucma_set_option,
        [RDMA_USER_CM_CMD_NOTIFY]       = ucma_notify,
        [RDMA_USER_CM_CMD_JOIN_MCAST]   = ucma_join_multicast,
        [RDMA_USER_CM_CMD_LEAVE_MCAST]  = ucma_leave_multicast,
index 664d2faa9e744b48aa6ce11e4c195ad18d2d66eb..2f54e29dc7a64b3771e6fec854d7f4699b355e17 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/sched.h>
+#include <linux/hugetlb.h>
 
 #include "uverbs.h"
 
@@ -75,6 +76,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 {
        struct ib_umem *umem;
        struct page **page_list;
+       struct vm_area_struct **vma_list;
        struct ib_umem_chunk *chunk;
        unsigned long locked;
        unsigned long lock_limit;
@@ -104,6 +106,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
         */
        umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
 
+       /* We assume the memory is from hugetlb until proved otherwise */
+       umem->hugetlb   = 1;
+
        INIT_LIST_HEAD(&umem->chunk_list);
 
        page_list = (struct page **) __get_free_page(GFP_KERNEL);
@@ -112,6 +117,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
                return ERR_PTR(-ENOMEM);
        }
 
+       /*
+        * if we can't alloc the vma_list, it's not so bad;
+        * just assume the memory is not hugetlb memory
+        */
+       vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
+       if (!vma_list)
+               umem->hugetlb = 0;
+
        npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
 
        down_write(&current->mm->mmap_sem);
@@ -131,7 +144,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
                ret = get_user_pages(current, current->mm, cur_base,
                                     min_t(int, npages,
                                           PAGE_SIZE / sizeof (struct page *)),
-                                    1, !umem->writable, page_list, NULL);
+                                    1, !umem->writable, page_list, vma_list);
 
                if (ret < 0)
                        goto out;
@@ -152,6 +165,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
                        chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
                        for (i = 0; i < chunk->nents; ++i) {
+                               if (vma_list &&
+                                   !is_vm_hugetlb_page(vma_list[i + off]))
+                                       umem->hugetlb = 0;
                                chunk->page_list[i].page   = page_list[i + off];
                                chunk->page_list[i].offset = 0;
                                chunk->page_list[i].length = PAGE_SIZE;
@@ -186,6 +202,8 @@ out:
                current->mm->locked_vm = locked;
 
        up_write(&current->mm->mmap_sem);
+       if (vma_list)
+               free_page((unsigned long) vma_list);
        free_page((unsigned long) page_list);
 
        return ret < 0 ? ERR_PTR(ret) : umem;
index d97ded25c4ff98bb7c8e68fa299f569bcfec1eb3..b53eac4611de387b6eb8f831108810205204d5c1 100644 (file)
@@ -44,6 +44,7 @@
 #include <linux/poll.h>
 #include <linux/rwsem.h>
 #include <linux/kref.h>
+#include <linux/compat.h>
 
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
@@ -118,6 +119,8 @@ struct ib_umad_file {
        wait_queue_head_t       recv_wait;
        struct ib_mad_agent    *agent[IB_UMAD_MAX_AGENTS];
        int                     agents_dead;
+       u8                      use_pkey_index;
+       u8                      already_used;
 };
 
 struct ib_umad_packet {
@@ -147,6 +150,12 @@ static void ib_umad_release_dev(struct kref *ref)
        kfree(dev);
 }
 
+static int hdr_size(struct ib_umad_file *file)
+{
+       return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
+               sizeof (struct ib_user_mad_hdr_old);
+}
+
 /* caller must hold port->mutex at least for reading */
 static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
 {
@@ -221,13 +230,13 @@ static void recv_handler(struct ib_mad_agent *agent,
        packet->length = mad_recv_wc->mad_len;
        packet->recv_wc = mad_recv_wc;
 
-       packet->mad.hdr.status    = 0;
-       packet->mad.hdr.length    = sizeof (struct ib_user_mad) +
-                                   mad_recv_wc->mad_len;
-       packet->mad.hdr.qpn       = cpu_to_be32(mad_recv_wc->wc->src_qp);
-       packet->mad.hdr.lid       = cpu_to_be16(mad_recv_wc->wc->slid);
-       packet->mad.hdr.sl        = mad_recv_wc->wc->sl;
-       packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
+       packet->mad.hdr.status     = 0;
+       packet->mad.hdr.length     = hdr_size(file) + mad_recv_wc->mad_len;
+       packet->mad.hdr.qpn        = cpu_to_be32(mad_recv_wc->wc->src_qp);
+       packet->mad.hdr.lid        = cpu_to_be16(mad_recv_wc->wc->slid);
+       packet->mad.hdr.sl         = mad_recv_wc->wc->sl;
+       packet->mad.hdr.path_bits  = mad_recv_wc->wc->dlid_path_bits;
+       packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
        packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
        if (packet->mad.hdr.grh_present) {
                struct ib_ah_attr ah_attr;
@@ -253,8 +262,8 @@ err1:
        ib_free_recv_mad(mad_recv_wc);
 }
 
-static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
-                            size_t count)
+static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
+                            struct ib_umad_packet *packet, size_t count)
 {
        struct ib_mad_recv_buf *recv_buf;
        int left, seg_payload, offset, max_seg_payload;
@@ -262,15 +271,15 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
        /* We need enough room to copy the first (or only) MAD segment. */
        recv_buf = &packet->recv_wc->recv_buf;
        if ((packet->length <= sizeof (*recv_buf->mad) &&
-            count < sizeof (packet->mad) + packet->length) ||
+            count < hdr_size(file) + packet->length) ||
            (packet->length > sizeof (*recv_buf->mad) &&
-            count < sizeof (packet->mad) + sizeof (*recv_buf->mad)))
+            count < hdr_size(file) + sizeof (*recv_buf->mad)))
                return -EINVAL;
 
-       if (copy_to_user(buf, &packet->mad, sizeof (packet->mad)))
+       if (copy_to_user(buf, &packet->mad, hdr_size(file)))
                return -EFAULT;
 
-       buf += sizeof (packet->mad);
+       buf += hdr_size(file);
        seg_payload = min_t(int, packet->length, sizeof (*recv_buf->mad));
        if (copy_to_user(buf, recv_buf->mad, seg_payload))
                return -EFAULT;
@@ -280,7 +289,7 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
                 * Multipacket RMPP MAD message. Copy remainder of message.
                 * Note that last segment may have a shorter payload.
                 */
-               if (count < sizeof (packet->mad) + packet->length) {
+               if (count < hdr_size(file) + packet->length) {
                        /*
                         * The buffer is too small, return the first RMPP segment,
                         * which includes the RMPP message length.
@@ -300,18 +309,23 @@ static ssize_t copy_recv_mad(char __user *buf, struct ib_umad_packet *packet,
                                return -EFAULT;
                }
        }
-       return sizeof (packet->mad) + packet->length;
+       return hdr_size(file) + packet->length;
 }
 
-static ssize_t copy_send_mad(char __user *buf, struct ib_umad_packet *packet,
-                            size_t count)
+static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
+                            struct ib_umad_packet *packet, size_t count)
 {
-       ssize_t size = sizeof (packet->mad) + packet->length;
+       ssize_t size = hdr_size(file) + packet->length;
 
        if (count < size)
                return -EINVAL;
 
-       if (copy_to_user(buf, &packet->mad, size))
+       if (copy_to_user(buf, &packet->mad, hdr_size(file)))
+               return -EFAULT;
+
+       buf += hdr_size(file);
+
+       if (copy_to_user(buf, packet->mad.data, packet->length))
                return -EFAULT;
 
        return size;
@@ -324,7 +338,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
        struct ib_umad_packet *packet;
        ssize_t ret;
 
-       if (count < sizeof (struct ib_user_mad))
+       if (count < hdr_size(file))
                return -EINVAL;
 
        spin_lock_irq(&file->recv_lock);
@@ -348,9 +362,9 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
        spin_unlock_irq(&file->recv_lock);
 
        if (packet->recv_wc)
-               ret = copy_recv_mad(buf, packet, count);
+               ret = copy_recv_mad(file, buf, packet, count);
        else
-               ret = copy_send_mad(buf, packet, count);
+               ret = copy_send_mad(file, buf, packet, count);
 
        if (ret < 0) {
                /* Requeue packet */
@@ -442,15 +456,14 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
        __be64 *tid;
        int ret, data_len, hdr_len, copy_offset, rmpp_active;
 
-       if (count < sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)
+       if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
                return -EINVAL;
 
        packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
        if (!packet)
                return -ENOMEM;
 
-       if (copy_from_user(&packet->mad, buf,
-                           sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
+       if (copy_from_user(&packet->mad, buf, hdr_size(file))) {
                ret = -EFAULT;
                goto err;
        }
@@ -461,6 +474,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
                goto err;
        }
 
+       buf += hdr_size(file);
+
+       if (copy_from_user(packet->mad.data, buf, IB_MGMT_RMPP_HDR)) {
+               ret = -EFAULT;
+               goto err;
+       }
+
        down_read(&file->port->mutex);
 
        agent = __get_agent(file, packet->mad.hdr.id);
@@ -500,11 +520,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
                              IB_MGMT_RMPP_FLAG_ACTIVE;
        }
 
-       data_len = count - sizeof (struct ib_user_mad) - hdr_len;
+       data_len = count - hdr_size(file) - hdr_len;
        packet->msg = ib_create_send_mad(agent,
                                         be32_to_cpu(packet->mad.hdr.qpn),
-                                        0, rmpp_active, hdr_len,
-                                        data_len, GFP_KERNEL);
+                                        packet->mad.hdr.pkey_index, rmpp_active,
+                                        hdr_len, data_len, GFP_KERNEL);
        if (IS_ERR(packet->msg)) {
                ret = PTR_ERR(packet->msg);
                goto err_ah;
@@ -517,7 +537,6 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 
        /* Copy MAD header.  Any RMPP header is already in place. */
        memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
-       buf += sizeof (struct ib_user_mad);
 
        if (!rmpp_active) {
                if (copy_from_user(packet->msg->mad + copy_offset,
@@ -589,7 +608,8 @@ static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wa
        return mask;
 }
 
-static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
+static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
+                            int compat_method_mask)
 {
        struct ib_user_mad_reg_req ureq;
        struct ib_mad_reg_req req;
@@ -604,7 +624,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, unsigned long arg)
                goto out;
        }
 
-       if (copy_from_user(&ureq, (void __user *) arg, sizeof ureq)) {
+       if (copy_from_user(&ureq, arg, sizeof ureq)) {
                ret = -EFAULT;
                goto out;
        }
@@ -625,8 +645,18 @@ found:
        if (ureq.mgmt_class) {
                req.mgmt_class         = ureq.mgmt_class;
                req.mgmt_class_version = ureq.mgmt_class_version;
-               memcpy(req.method_mask, ureq.method_mask, sizeof req.method_mask);
-               memcpy(req.oui,         ureq.oui,         sizeof req.oui);
+               memcpy(req.oui, ureq.oui, sizeof req.oui);
+
+               if (compat_method_mask) {
+                       u32 *umm = (u32 *) ureq.method_mask;
+                       int i;
+
+                       for (i = 0; i < BITS_TO_LONGS(IB_MGMT_MAX_METHODS); ++i)
+                               req.method_mask[i] =
+                                       umm[i * 2] | ((u64) umm[i * 2 + 1] << 32);
+               } else
+                       memcpy(req.method_mask, ureq.method_mask,
+                              sizeof req.method_mask);
        }
 
        agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
@@ -646,6 +676,16 @@ found:
                goto out;
        }
 
+       if (!file->already_used) {
+               file->already_used = 1;
+               if (!file->use_pkey_index) {
+                       printk(KERN_WARNING "user_mad: process %s did not enable "
+                              "P_Key index support.\n", current->comm);
+                       printk(KERN_WARNING "user_mad:   Documentation/infiniband/user_mad.txt "
+                              "has info on the new ABI.\n");
+               }
+       }
+
        file->agent[agent_id] = agent;
        ret = 0;
 
@@ -654,13 +694,13 @@ out:
        return ret;
 }
 
-static int ib_umad_unreg_agent(struct ib_umad_file *file, unsigned long arg)
+static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
 {
        struct ib_mad_agent *agent = NULL;
        u32 id;
        int ret = 0;
 
-       if (get_user(id, (u32 __user *) arg))
+       if (get_user(id, arg))
                return -EFAULT;
 
        down_write(&file->port->mutex);
@@ -682,18 +722,51 @@ out:
        return ret;
 }
 
+static long ib_umad_enable_pkey(struct ib_umad_file *file)
+{
+       int ret = 0;
+
+       down_write(&file->port->mutex);
+       if (file->already_used)
+               ret = -EINVAL;
+       else
+               file->use_pkey_index = 1;
+       up_write(&file->port->mutex);
+
+       return ret;
+}
+
 static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
                          unsigned long arg)
 {
        switch (cmd) {
        case IB_USER_MAD_REGISTER_AGENT:
-               return ib_umad_reg_agent(filp->private_data, arg);
+               return ib_umad_reg_agent(filp->private_data, (void __user *) arg, 0);
+       case IB_USER_MAD_UNREGISTER_AGENT:
+               return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
+       case IB_USER_MAD_ENABLE_PKEY:
+               return ib_umad_enable_pkey(filp->private_data);
+       default:
+               return -ENOIOCTLCMD;
+       }
+}
+
+#ifdef CONFIG_COMPAT
+static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
+                                unsigned long arg)
+{
+       switch (cmd) {
+       case IB_USER_MAD_REGISTER_AGENT:
+               return ib_umad_reg_agent(filp->private_data, compat_ptr(arg), 1);
        case IB_USER_MAD_UNREGISTER_AGENT:
-               return ib_umad_unreg_agent(filp->private_data, arg);
+               return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
+       case IB_USER_MAD_ENABLE_PKEY:
+               return ib_umad_enable_pkey(filp->private_data);
        default:
                return -ENOIOCTLCMD;
        }
 }
+#endif
 
 static int ib_umad_open(struct inode *inode, struct file *filp)
 {
@@ -782,7 +855,9 @@ static const struct file_operations umad_fops = {
        .write          = ib_umad_write,
        .poll           = ib_umad_poll,
        .unlocked_ioctl = ib_umad_ioctl,
-       .compat_ioctl   = ib_umad_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = ib_umad_compat_ioctl,
+#endif
        .open           = ib_umad_open,
        .release        = ib_umad_close
 };
index c33546f9e96199b28811ed5396c3daabda27e8c4..c75eb6c9bd49dee9a6d355e9d5fba9e211642247 100644 (file)
@@ -148,7 +148,6 @@ void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
 
 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
                                        int is_async, int *fd);
-void ib_uverbs_release_event_file(struct kref *ref);
 struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
 
 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
index 14d7ccd8919534d646814fb7cf58c89a81f07bc0..7c2ac39055822fa6e4db39a8fcd105ac35385e97 100644 (file)
@@ -125,6 +125,14 @@ static void ib_uverbs_release_dev(struct kref *ref)
        complete(&dev->comp);
 }
 
+static void ib_uverbs_release_event_file(struct kref *ref)
+{
+       struct ib_uverbs_event_file *file =
+               container_of(ref, struct ib_uverbs_event_file, ref);
+
+       kfree(file);
+}
+
 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
                          struct ib_uverbs_event_file *ev_file,
                          struct ib_ucq_object *uobj)
@@ -331,14 +339,6 @@ static unsigned int ib_uverbs_event_poll(struct file *filp,
        return pollflags;
 }
 
-void ib_uverbs_release_event_file(struct kref *ref)
-{
-       struct ib_uverbs_event_file *file =
-               container_of(ref, struct ib_uverbs_event_file, ref);
-
-       kfree(file);
-}
-
 static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
 {
        struct ib_uverbs_event_file *file = filp->private_data;
index 1cdfcd43b0bcb6bd8dff940fd4c11d19c07fc3ea..20ba372dd182268bb5614c8c62f4dd9992445fb3 100644 (file)
@@ -63,37 +63,37 @@ static char *states[] = {
 };
 
 static int ep_timeout_secs = 10;
-module_param(ep_timeout_secs, int, 0444);
+module_param(ep_timeout_secs, int, 0644);
 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
                                   "in seconds (default=10)");
 
 static int mpa_rev = 1;
-module_param(mpa_rev, int, 0444);
+module_param(mpa_rev, int, 0644);
 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
                 "1 is spec compliant. (default=1)");
 
 static int markers_enabled = 0;
-module_param(markers_enabled, int, 0444);
+module_param(markers_enabled, int, 0644);
 MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
 
 static int crc_enabled = 1;
-module_param(crc_enabled, int, 0444);
+module_param(crc_enabled, int, 0644);
 MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
 
 static int rcv_win = 256 * 1024;
-module_param(rcv_win, int, 0444);
+module_param(rcv_win, int, 0644);
 MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
 
 static int snd_win = 32 * 1024;
-module_param(snd_win, int, 0444);
+module_param(snd_win, int, 0644);
 MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
 
 static unsigned int nocong = 0;
-module_param(nocong, uint, 0444);
+module_param(nocong, uint, 0644);
 MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
 
 static unsigned int cong_flavor = 1;
-module_param(cong_flavor, uint, 0444);
+module_param(cong_flavor, uint, 0644);
 MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
 
 static void process_work(struct work_struct *work);
index b5e96030531642c7a95fc6141d1a4228137fa3ac..0f7a55d35ea7e1f6e4dfd385c12108d5638fe576 100644 (file)
@@ -53,6 +53,7 @@ struct ehca_pd;
 struct ehca_av;
 
 #include <linux/wait.h>
+#include <linux/mutex.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
@@ -99,10 +100,10 @@ struct ehca_sport {
        struct ehca_sma_attr saved_attr;
 };
 
-#define HCA_CAP_MR_PGSIZE_4K  1
-#define HCA_CAP_MR_PGSIZE_64K 2
-#define HCA_CAP_MR_PGSIZE_1M  4
-#define HCA_CAP_MR_PGSIZE_16M 8
+#define HCA_CAP_MR_PGSIZE_4K  0x80000000
+#define HCA_CAP_MR_PGSIZE_64K 0x40000000
+#define HCA_CAP_MR_PGSIZE_1M  0x20000000
+#define HCA_CAP_MR_PGSIZE_16M 0x10000000
 
 struct ehca_shca {
        struct ib_device ib_device;
@@ -337,6 +338,8 @@ struct ehca_create_cq_resp {
        u32 cq_number;
        u32 token;
        struct ipzu_queue_resp ipz_queue;
+       u32 fw_handle_ofs;
+       u32 dummy;
 };
 
 struct ehca_create_qp_resp {
@@ -347,7 +350,8 @@ struct ehca_create_qp_resp {
        u32 qkey;
        /* qp_num assigned by ehca: sqp0/1 may have got different numbers */
        u32 real_qp_num;
-       u32 dummy; /* padding for 8 byte alignment */
+       u32 fw_handle_ofs;
+       u32 dummy;
        struct ipzu_queue_resp ipz_squeue;
        struct ipzu_queue_resp ipz_rqueue;
 };
index 81aff36101ba5944fab04c22030f7b35a1a61f07..79c25f51c21e82997b1215bcea2935764cdc5773 100644 (file)
@@ -166,7 +166,6 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                write_lock_irqsave(&ehca_cq_idr_lock, flags);
                ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
                write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-
        } while (ret == -EAGAIN);
 
        if (ret) {
@@ -176,6 +175,12 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                goto create_cq_exit1;
        }
 
+       if (my_cq->token > 0x1FFFFFF) {
+               cq = ERR_PTR(-ENOMEM);
+               ehca_err(device, "Invalid number of cq. device=%p", device);
+               goto create_cq_exit2;
+       }
+
        /*
         * CQs maximum depth is 4GB-64, but we need additional 20 as buffer
         * for receiving errors CQEs.
@@ -185,7 +190,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
 
        if (h_ret != H_SUCCESS) {
                ehca_err(device, "hipz_h_alloc_resource_cq() failed "
-                        "h_ret=%lx device=%p", h_ret, device);
+                        "h_ret=%li device=%p", h_ret, device);
                cq = ERR_PTR(ehca2ib_return_code(h_ret));
                goto create_cq_exit2;
        }
@@ -193,7 +198,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
        ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages,
                                EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0);
        if (!ipz_rc) {
-               ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%x device=%p",
+               ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p",
                         ipz_rc, device);
                cq = ERR_PTR(-EINVAL);
                goto create_cq_exit3;
@@ -221,7 +226,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
 
                if (h_ret < H_SUCCESS) {
                        ehca_err(device, "hipz_h_register_rpage_cq() failed "
-                                "ehca_cq=%p cq_num=%x h_ret=%lx counter=%i "
+                                "ehca_cq=%p cq_num=%x h_ret=%li counter=%i "
                                 "act_pages=%i", my_cq, my_cq->cq_number,
                                 h_ret, counter, param.act_pages);
                        cq = ERR_PTR(-EINVAL);
@@ -233,7 +238,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                        if ((h_ret != H_SUCCESS) || vpage) {
                                ehca_err(device, "Registration of pages not "
                                         "complete ehca_cq=%p cq_num=%x "
-                                        "h_ret=%lx", my_cq, my_cq->cq_number,
+                                        "h_ret=%li", my_cq, my_cq->cq_number,
                                         h_ret);
                                cq = ERR_PTR(-EAGAIN);
                                goto create_cq_exit4;
@@ -241,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                } else {
                        if (h_ret != H_PAGE_REGISTERED) {
                                ehca_err(device, "Registration of page failed "
-                                        "ehca_cq=%p cq_num=%x h_ret=%lx"
+                                        "ehca_cq=%p cq_num=%x h_ret=%li"
                                         "counter=%i act_pages=%i",
                                         my_cq, my_cq->cq_number,
                                         h_ret, counter, param.act_pages);
@@ -276,6 +281,8 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
                resp.ipz_queue.queue_length = ipz_queue->queue_length;
                resp.ipz_queue.pagesize = ipz_queue->pagesize;
                resp.ipz_queue.toggle_state = ipz_queue->toggle_state;
+               resp.fw_handle_ofs = (u32)
+                       (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
                if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
                        ehca_err(device, "Copy to udata failed.");
                        goto create_cq_exit4;
@@ -291,7 +298,7 @@ create_cq_exit3:
        h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1);
        if (h_ret != H_SUCCESS)
                ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p "
-                        "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
+                        "cq_num=%x h_ret=%li", my_cq, my_cq->cq_number, h_ret);
 
 create_cq_exit2:
        write_lock_irqsave(&ehca_cq_idr_lock, flags);
@@ -355,7 +362,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
                                 cq_num);
        }
        if (h_ret != H_SUCCESS) {
-               ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lx "
+               ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%li "
                         "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num);
                return ehca2ib_return_code(h_ret);
        }
index cf22472d9414b66e2d681e44d604e6e65a5f841d..4aa3ffa6a19fe425345e1bcd800dad2d6cc3d459 100644 (file)
@@ -82,17 +82,17 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
        props->vendor_id       = rblock->vendor_id >> 8;
        props->vendor_part_id  = rblock->vendor_part_id >> 16;
        props->hw_ver          = rblock->hw_ver;
-       props->max_qp          = min_t(int, rblock->max_qp, INT_MAX);
-       props->max_qp_wr       = min_t(int, rblock->max_wqes_wq, INT_MAX);
-       props->max_sge         = min_t(int, rblock->max_sge, INT_MAX);
-       props->max_sge_rd      = min_t(int, rblock->max_sge_rd, INT_MAX);
-       props->max_cq          = min_t(int, rblock->max_cq, INT_MAX);
-       props->max_cqe         = min_t(int, rblock->max_cqe, INT_MAX);
-       props->max_mr          = min_t(int, rblock->max_mr, INT_MAX);
-       props->max_mw          = min_t(int, rblock->max_mw, INT_MAX);
-       props->max_pd          = min_t(int, rblock->max_pd, INT_MAX);
-       props->max_ah          = min_t(int, rblock->max_ah, INT_MAX);
-       props->max_fmr         = min_t(int, rblock->max_mr, INT_MAX);
+       props->max_qp          = min_t(unsigned, rblock->max_qp, INT_MAX);
+       props->max_qp_wr       = min_t(unsigned, rblock->max_wqes_wq, INT_MAX);
+       props->max_sge         = min_t(unsigned, rblock->max_sge, INT_MAX);
+       props->max_sge_rd      = min_t(unsigned, rblock->max_sge_rd, INT_MAX);
+       props->max_cq          = min_t(unsigned, rblock->max_cq, INT_MAX);
+       props->max_cqe         = min_t(unsigned, rblock->max_cqe, INT_MAX);
+       props->max_mr          = min_t(unsigned, rblock->max_mr, INT_MAX);
+       props->max_mw          = min_t(unsigned, rblock->max_mw, INT_MAX);
+       props->max_pd          = min_t(unsigned, rblock->max_pd, INT_MAX);
+       props->max_ah          = min_t(unsigned, rblock->max_ah, INT_MAX);
+       props->max_fmr         = min_t(unsigned, rblock->max_mr, INT_MAX);
 
        if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
                props->max_srq         = props->max_qp;
@@ -104,15 +104,15 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
        props->local_ca_ack_delay
                = rblock->local_ca_ack_delay;
        props->max_raw_ipv6_qp
-               = min_t(int, rblock->max_raw_ipv6_qp, INT_MAX);
+               = min_t(unsigned, rblock->max_raw_ipv6_qp, INT_MAX);
        props->max_raw_ethy_qp
-               = min_t(int, rblock->max_raw_ethy_qp, INT_MAX);
+               = min_t(unsigned, rblock->max_raw_ethy_qp, INT_MAX);
        props->max_mcast_grp
-               = min_t(int, rblock->max_mcast_grp, INT_MAX);
+               = min_t(unsigned, rblock->max_mcast_grp, INT_MAX);
        props->max_mcast_qp_attach
-               = min_t(int, rblock->max_mcast_qp_attach, INT_MAX);
+               = min_t(unsigned, rblock->max_mcast_qp_attach, INT_MAX);
        props->max_total_mcast_qp_attach
-               = min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
+               = min_t(unsigned, rblock->max_total_mcast_qp_attach, INT_MAX);
 
        /* translate device capabilities */
        props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
@@ -352,7 +352,7 @@ int ehca_modify_port(struct ib_device *ibdev,
        hret = hipz_h_modify_port(shca->ipz_hca_handle, port,
                                  cap, props->init_type, port_modify_mask);
        if (hret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "Modify port failed  hret=%lx",
+               ehca_err(&shca->ib_device, "Modify port failed  h_ret=%li",
                         hret);
                ret = -EINVAL;
        }
index a925ea52443ff6408b68409e1720758dde9d8d9f..3f617b27b9543658d901dd1de36caca52371f9e5 100644 (file)
@@ -69,9 +69,6 @@
 static void queue_comp_task(struct ehca_cq *__cq);
 
 static struct ehca_comp_pool *pool;
-#ifdef CONFIG_HOTPLUG_CPU
-static struct notifier_block comp_pool_callback_nb;
-#endif
 
 static inline void comp_event_callback(struct ehca_cq *cq)
 {
@@ -294,8 +291,8 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
        case 0x11: /* unaffiliated access error */
                ehca_err(&shca->ib_device, "Unaffiliated access error.");
                break;
-       case 0x12: /* path migrating error */
-               ehca_err(&shca->ib_device, "Path migration error.");
+       case 0x12: /* path migrating */
+               ehca_err(&shca->ib_device, "Path migrating.");
                break;
        case 0x13: /* interface trace stopped */
                ehca_err(&shca->ib_device, "Interface trace stopped.");
@@ -760,9 +757,7 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
                kthread_stop(task);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static void take_over_work(struct ehca_comp_pool *pool,
-                          int cpu)
+static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu)
 {
        struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
        LIST_HEAD(list);
@@ -785,9 +780,9 @@ static void take_over_work(struct ehca_comp_pool *pool,
 
 }
 
-static int comp_pool_callback(struct notifier_block *nfb,
-                             unsigned long action,
-                             void *hcpu)
+static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
+                                       unsigned long action,
+                                       void *hcpu)
 {
        unsigned int cpu = (unsigned long)hcpu;
        struct ehca_cpu_comp_task *cct;
@@ -833,7 +828,11 @@ static int comp_pool_callback(struct notifier_block *nfb,
 
        return NOTIFY_OK;
 }
-#endif
+
+static struct notifier_block comp_pool_callback_nb __cpuinitdata = {
+       .notifier_call  = comp_pool_callback,
+       .priority       = 0,
+};
 
 int ehca_create_comp_pool(void)
 {
@@ -864,11 +863,7 @@ int ehca_create_comp_pool(void)
                }
        }
 
-#ifdef CONFIG_HOTPLUG_CPU
-       comp_pool_callback_nb.notifier_call = comp_pool_callback;
-       comp_pool_callback_nb.priority = 0;
-       register_cpu_notifier(&comp_pool_callback_nb);
-#endif
+       register_hotcpu_notifier(&comp_pool_callback_nb);
 
        printk(KERN_INFO "eHCA scaling code enabled\n");
 
@@ -882,9 +877,7 @@ void ehca_destroy_comp_pool(void)
        if (!ehca_scaling_code)
                return;
 
-#ifdef CONFIG_HOTPLUG_CPU
-       unregister_cpu_notifier(&comp_pool_callback_nb);
-#endif
+       unregister_hotcpu_notifier(&comp_pool_callback_nb);
 
        for (i = 0; i < NR_CPUS; i++) {
                if (cpu_online(i))
index 99036b65bb848c190abcddbe611f128fbe44550e..403467f66fe6975d13d2eba62745d7c03a98241d 100644 (file)
 #include "ehca_tools.h"
 #include "hcp_if.h"
 
+#define HCAD_VERSION "0024"
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
 MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
-MODULE_VERSION("SVNEHCA_0023");
+MODULE_VERSION(HCAD_VERSION);
 
 int ehca_open_aqp1     = 0;
 int ehca_debug_level   = 0;
@@ -65,16 +67,16 @@ int ehca_static_rate   = -1;
 int ehca_scaling_code  = 0;
 int ehca_mr_largepage  = 0;
 
-module_param_named(open_aqp1,     ehca_open_aqp1,     int, 0);
-module_param_named(debug_level,   ehca_debug_level,   int, 0);
-module_param_named(hw_level,      ehca_hw_level,      int, 0);
-module_param_named(nr_ports,      ehca_nr_ports,      int, 0);
-module_param_named(use_hp_mr,     ehca_use_hp_mr,     int, 0);
-module_param_named(port_act_time, ehca_port_act_time, int, 0);
-module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, 0);
-module_param_named(static_rate,   ehca_static_rate,   int, 0);
-module_param_named(scaling_code,  ehca_scaling_code,  int, 0);
-module_param_named(mr_largepage,  ehca_mr_largepage,  int, 0);
+module_param_named(open_aqp1,     ehca_open_aqp1,     int, S_IRUGO);
+module_param_named(debug_level,   ehca_debug_level,   int, S_IRUGO);
+module_param_named(hw_level,      ehca_hw_level,      int, S_IRUGO);
+module_param_named(nr_ports,      ehca_nr_ports,      int, S_IRUGO);
+module_param_named(use_hp_mr,     ehca_use_hp_mr,     int, S_IRUGO);
+module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
+module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  int, S_IRUGO);
+module_param_named(static_rate,   ehca_static_rate,   int, S_IRUGO);
+module_param_named(scaling_code,  ehca_scaling_code,  int, S_IRUGO);
+module_param_named(mr_largepage,  ehca_mr_largepage,  int, S_IRUGO);
 
 MODULE_PARM_DESC(open_aqp1,
                 "AQP1 on startup (0: no (default), 1: yes)");
@@ -273,7 +275,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
 
        h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock);
        if (h_ret != H_SUCCESS) {
-               ehca_gen_err("Cannot query device properties. h_ret=%lx",
+               ehca_gen_err("Cannot query device properties. h_ret=%li",
                             h_ret);
                ret = -EPERM;
                goto sense_attributes1;
@@ -332,7 +334,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
        port = (struct hipz_query_port *)rblock;
        h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
        if (h_ret != H_SUCCESS) {
-               ehca_gen_err("Cannot query port properties. h_ret=%lx",
+               ehca_gen_err("Cannot query port properties. h_ret=%li",
                             h_ret);
                ret = -EPERM;
                goto sense_attributes1;
@@ -380,7 +382,7 @@ int ehca_init_device(struct ehca_shca *shca)
        strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
        shca->ib_device.owner               = THIS_MODULE;
 
-       shca->ib_device.uverbs_abi_ver      = 7;
+       shca->ib_device.uverbs_abi_ver      = 8;
        shca->ib_device.uverbs_cmd_mask     =
                (1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
                (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
@@ -526,13 +528,13 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
 
        ret = ib_destroy_qp(sport->ibqp_aqp1);
        if (ret) {
-               ehca_gen_err("Cannot destroy AQP1 QP. ret=%x", ret);
+               ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
                return ret;
        }
 
        ret = ib_destroy_cq(sport->ibcq_aqp1);
        if (ret)
-               ehca_gen_err("Cannot destroy AQP1 CQ. ret=%x", ret);
+               ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret);
 
        return ret;
 }
@@ -728,7 +730,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
        ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr);
 
        if (ret) {
-               ehca_err(&shca->ib_device, "Cannot create internal MR ret=%x",
+               ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i",
                         ret);
                goto probe5;
        }
@@ -736,7 +738,7 @@ static int __devinit ehca_probe(struct ibmebus_dev *dev,
        ret = ib_register_device(&shca->ib_device);
        if (ret) {
                ehca_err(&shca->ib_device,
-                        "ib_register_device() failed ret=%x", ret);
+                        "ib_register_device() failed ret=%i", ret);
                goto probe6;
        }
 
@@ -777,7 +779,7 @@ probe8:
        ret = ehca_destroy_aqp1(&shca->sport[0]);
        if (ret)
                ehca_err(&shca->ib_device,
-                        "Cannot destroy AQP1 for port 1. ret=%x", ret);
+                        "Cannot destroy AQP1 for port 1. ret=%i", ret);
 
 probe7:
        ib_unregister_device(&shca->ib_device);
@@ -826,7 +828,7 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
                        if (ret)
                                ehca_err(&shca->ib_device,
                                         "Cannot destroy AQP1 for port %x "
-                                        "ret=%x", ret, i);
+                                        "ret=%i", ret, i);
                }
        }
 
@@ -835,20 +837,20 @@ static int __devexit ehca_remove(struct ibmebus_dev *dev)
        ret = ehca_dereg_internal_maxmr(shca);
        if (ret)
                ehca_err(&shca->ib_device,
-                        "Cannot destroy internal MR. ret=%x", ret);
+                        "Cannot destroy internal MR. ret=%i", ret);
 
        ret = ehca_dealloc_pd(&shca->pd->ib_pd);
        if (ret)
                ehca_err(&shca->ib_device,
-                        "Cannot destroy internal PD. ret=%x", ret);
+                        "Cannot destroy internal PD. ret=%i", ret);
 
        ret = ehca_destroy_eq(shca, &shca->eq);
        if (ret)
-               ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%x", ret);
+               ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret);
 
        ret = ehca_destroy_eq(shca, &shca->neq);
        if (ret)
-               ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%x", ret);
+               ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret);
 
        ib_dealloc_device(&shca->ib_device);
 
@@ -909,7 +911,7 @@ int __init ehca_module_init(void)
        int ret;
 
        printk(KERN_INFO "eHCA Infiniband Device Driver "
-              "(Rel.: SVNEHCA_0023)\n");
+              "(Version " HCAD_VERSION ")\n");
 
        ret = ehca_create_comp_pool();
        if (ret) {
index 32a870660bfe7901aaee0fa1b32fe1ebacd08666..e3ef0264ccc6c4aa91890d86149a70b6db87c2b6 100644 (file)
@@ -88,7 +88,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        if (h_ret != H_SUCCESS)
                ehca_err(ibqp->device,
                         "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed "
-                        "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+                        "h_ret=%li", my_qp, ibqp->qp_num, h_ret);
 
        return ehca2ib_return_code(h_ret);
 }
@@ -125,7 +125,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        if (h_ret != H_SUCCESS)
                ehca_err(ibqp->device,
                         "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed "
-                        "h_ret=%lx", my_qp, ibqp->qp_num, h_ret);
+                        "h_ret=%li", my_qp, ibqp->qp_num, h_ret);
 
        return ehca2ib_return_code(h_ret);
 }
index d97eda3e1da0e15f4e131b30cc6c71f270fb912c..da88738265edd23893b136540148a29db3902f09 100644 (file)
@@ -51,6 +51,7 @@
 
 #define NUM_CHUNKS(length, chunk_size) \
        (((length) + (chunk_size - 1)) / (chunk_size))
+
 /* max number of rpages (per hcall register_rpages) */
 #define MAX_RPAGES 512
 
@@ -64,6 +65,11 @@ enum ehca_mr_pgsize {
        EHCA_MR_PGSIZE16M = 0x1000000L
 };
 
+#define EHCA_MR_PGSHIFT4K  12
+#define EHCA_MR_PGSHIFT64K 16
+#define EHCA_MR_PGSHIFT1M  20
+#define EHCA_MR_PGSHIFT16M 24
+
 static u32 ehca_encode_hwpage_size(u32 pgsize)
 {
        u32 idx = 0;
@@ -159,7 +165,7 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
 
 get_dma_mr_exit0:
        if (IS_ERR(ib_mr))
-               ehca_err(&shca->ib_device, "rc=%lx pd=%p mr_access_flags=%x ",
+               ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x",
                         PTR_ERR(ib_mr), pd, mr_access_flags);
        return ib_mr;
 } /* end ehca_get_dma_mr() */
@@ -271,7 +277,7 @@ reg_phys_mr_exit1:
        ehca_mr_delete(e_mr);
 reg_phys_mr_exit0:
        if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "rc=%lx pd=%p phys_buf_array=%p "
+               ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
                         "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
                         PTR_ERR(ib_mr), pd, phys_buf_array,
                         num_phys_buf, mr_access_flags, iova_start);
@@ -347,17 +353,16 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        /* select proper hw_pgsize */
        if (ehca_mr_largepage &&
            (shca->hca_cap_mr_pgsize & HCA_CAP_MR_PGSIZE_16M)) {
-               if (length <= EHCA_MR_PGSIZE4K
-                   && PAGE_SIZE == EHCA_MR_PGSIZE4K)
-                       hwpage_size = EHCA_MR_PGSIZE4K;
-               else if (length <= EHCA_MR_PGSIZE64K)
-                       hwpage_size = EHCA_MR_PGSIZE64K;
-               else if (length <= EHCA_MR_PGSIZE1M)
-                       hwpage_size = EHCA_MR_PGSIZE1M;
-               else
-                       hwpage_size = EHCA_MR_PGSIZE16M;
+               int page_shift = PAGE_SHIFT;
+               if (e_mr->umem->hugetlb) {
+                       /* determine page_shift, clamp between 4K and 16M */
+                       page_shift = (fls64(length - 1) + 3) & ~3;
+                       page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K),
+                                        EHCA_MR_PGSHIFT16M);
+               }
+               hwpage_size = 1UL << page_shift;
        } else
-               hwpage_size = EHCA_MR_PGSIZE4K;
+               hwpage_size = EHCA_MR_PGSIZE4K; /* ehca1 only supports 4k */
        ehca_dbg(pd->device, "hwpage_size=%lx", hwpage_size);
 
 reg_user_mr_fallback:
@@ -403,8 +408,7 @@ reg_user_mr_exit1:
        ehca_mr_delete(e_mr);
 reg_user_mr_exit0:
        if (IS_ERR(ib_mr))
-               ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
-                        " udata=%p",
+               ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p",
                         PTR_ERR(ib_mr), pd, mr_access_flags, udata);
        return ib_mr;
 } /* end ehca_reg_user_mr() */
@@ -565,7 +569,7 @@ rereg_phys_mr_exit1:
        spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
 rereg_phys_mr_exit0:
        if (ret)
-               ehca_err(mr->device, "ret=%x mr=%p mr_rereg_mask=%x pd=%p "
+               ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
                         "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
                         "iova_start=%p",
                         ret, mr, mr_rereg_mask, pd, phys_buf_array,
@@ -607,7 +611,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
 
        h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
        if (h_ret != H_SUCCESS) {
-               ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lx mr=%p "
+               ehca_err(mr->device, "hipz_mr_query failed, h_ret=%li mr=%p "
                         "hca_hndl=%lx mr_hndl=%lx lkey=%x",
                         h_ret, mr, shca->ipz_hca_handle.handle,
                         e_mr->ipz_mr_handle.handle, mr->lkey);
@@ -625,7 +629,7 @@ query_mr_exit1:
        spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
 query_mr_exit0:
        if (ret)
-               ehca_err(mr->device, "ret=%x mr=%p mr_attr=%p",
+               ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
                         ret, mr, mr_attr);
        return ret;
 } /* end ehca_query_mr() */
@@ -667,7 +671,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
        /* TODO: BUSY: MR still has bound window(s) */
        h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
        if (h_ret != H_SUCCESS) {
-               ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lx shca=%p "
+               ehca_err(mr->device, "hipz_free_mr failed, h_ret=%li shca=%p "
                         "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x",
                         h_ret, shca, e_mr, shca->ipz_hca_handle.handle,
                         e_mr->ipz_mr_handle.handle, mr->lkey);
@@ -683,7 +687,7 @@ int ehca_dereg_mr(struct ib_mr *mr)
 
 dereg_mr_exit0:
        if (ret)
-               ehca_err(mr->device, "ret=%x mr=%p", ret, mr);
+               ehca_err(mr->device, "ret=%i mr=%p", ret, mr);
        return ret;
 } /* end ehca_dereg_mr() */
 
@@ -708,7 +712,7 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd)
        h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw,
                                         e_pd->fw_pd, &hipzout);
        if (h_ret != H_SUCCESS) {
-               ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lx "
+               ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%li "
                         "shca=%p hca_hndl=%lx mw=%p",
                         h_ret, shca, shca->ipz_hca_handle.handle, e_mw);
                ib_mw = ERR_PTR(ehca2ib_return_code(h_ret));
@@ -723,7 +727,7 @@ alloc_mw_exit1:
        ehca_mw_delete(e_mw);
 alloc_mw_exit0:
        if (IS_ERR(ib_mw))
-               ehca_err(pd->device, "rc=%lx pd=%p", PTR_ERR(ib_mw), pd);
+               ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd);
        return ib_mw;
 } /* end ehca_alloc_mw() */
 
@@ -750,7 +754,7 @@ int ehca_dealloc_mw(struct ib_mw *mw)
 
        h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw);
        if (h_ret != H_SUCCESS) {
-               ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lx shca=%p "
+               ehca_err(mw->device, "hipz_free_mw failed, h_ret=%li shca=%p "
                         "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx",
                         h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle,
                         e_mw->ipz_mw_handle.handle);
@@ -846,10 +850,6 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
 alloc_fmr_exit1:
        ehca_mr_delete(e_fmr);
 alloc_fmr_exit0:
-       if (IS_ERR(ib_fmr))
-               ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x "
-                        "fmr_attr=%p", PTR_ERR(ib_fmr), pd,
-                        mr_access_flags, fmr_attr);
        return ib_fmr;
 } /* end ehca_alloc_fmr() */
 
@@ -916,7 +916,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr,
 
 map_phys_fmr_exit0:
        if (ret)
-               ehca_err(fmr->device, "ret=%x fmr=%p page_list=%p list_len=%x "
+               ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x "
                         "iova=%lx", ret, fmr, page_list, list_len, iova);
        return ret;
 } /* end ehca_map_phys_fmr() */
@@ -979,7 +979,7 @@ int ehca_unmap_fmr(struct list_head *fmr_list)
 
 unmap_fmr_exit0:
        if (ret)
-               ehca_gen_err("ret=%x fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
+               ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x",
                             ret, fmr_list, num_fmr, unmap_fmr_cnt);
        return ret;
 } /* end ehca_unmap_fmr() */
@@ -1003,7 +1003,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
 
        h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
        if (h_ret != H_SUCCESS) {
-               ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lx e_fmr=%p "
+               ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%li e_fmr=%p "
                         "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x",
                         h_ret, e_fmr, shca->ipz_hca_handle.handle,
                         e_fmr->ipz_mr_handle.handle, fmr->lkey);
@@ -1016,7 +1016,7 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr)
 
 free_fmr_exit0:
        if (ret)
-               ehca_err(&shca->ib_device, "ret=%x fmr=%p", ret, fmr);
+               ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr);
        return ret;
 } /* end ehca_dealloc_fmr() */
 
@@ -1046,7 +1046,7 @@ int ehca_reg_mr(struct ehca_shca *shca,
                                         (u64)iova_start, size, hipz_acl,
                                         e_pd->fw_pd, &hipzout);
        if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lx "
+               ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%li "
                         "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle);
                ret = ehca2ib_return_code(h_ret);
                goto ehca_reg_mr_exit0;
@@ -1072,9 +1072,9 @@ int ehca_reg_mr(struct ehca_shca *shca,
 ehca_reg_mr_exit1:
        h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
        if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "h_ret=%lx shca=%p e_mr=%p "
+               ehca_err(&shca->ib_device, "h_ret=%li shca=%p e_mr=%p "
                         "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x "
-                        "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%x",
+                        "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%i",
                         h_ret, shca, e_mr, iova_start, size, acl, e_pd,
                         hipzout.lkey, pginfo, pginfo->num_kpages,
                         pginfo->num_hwpages, ret);
@@ -1083,7 +1083,7 @@ ehca_reg_mr_exit1:
        }
 ehca_reg_mr_exit0:
        if (ret)
-               ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
                         "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
                         "num_kpages=%lx num_hwpages=%lx",
                         ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo,
@@ -1127,7 +1127,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
                ret = ehca_set_pagebuf(pginfo, rnum, kpage);
                if (ret) {
                        ehca_err(&shca->ib_device, "ehca_set_pagebuf "
-                                "bad rc, ret=%x rnum=%x kpage=%p",
+                                "bad rc, ret=%i rnum=%x kpage=%p",
                                 ret, rnum, kpage);
                        goto ehca_reg_mr_rpages_exit1;
                }
@@ -1155,7 +1155,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
                         */
                        if (h_ret != H_SUCCESS) {
                                ehca_err(&shca->ib_device, "last "
-                                        "hipz_reg_rpage_mr failed, h_ret=%lx "
+                                        "hipz_reg_rpage_mr failed, h_ret=%li "
                                         "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx"
                                         " lkey=%x", h_ret, e_mr, i,
                                         shca->ipz_hca_handle.handle,
@@ -1167,7 +1167,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
                                ret = 0;
                } else if (h_ret != H_PAGE_REGISTERED) {
                        ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, "
-                                "h_ret=%lx e_mr=%p i=%x lkey=%x hca_hndl=%lx "
+                                "h_ret=%li e_mr=%p i=%x lkey=%x hca_hndl=%lx "
                                 "mr_hndl=%lx", h_ret, e_mr, i,
                                 e_mr->ib.ib_mr.lkey,
                                 shca->ipz_hca_handle.handle,
@@ -1183,7 +1183,7 @@ ehca_reg_mr_rpages_exit1:
        ehca_free_fw_ctrlblock(kpage);
 ehca_reg_mr_rpages_exit0:
        if (ret)
-               ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p pginfo=%p "
+               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p "
                         "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr,
                         pginfo, pginfo->num_kpages, pginfo->num_hwpages);
        return ret;
@@ -1244,7 +1244,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
                 * (MW bound or MR is shared)
                 */
                ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed "
-                         "(Rereg1), h_ret=%lx e_mr=%p", h_ret, e_mr);
+                         "(Rereg1), h_ret=%li e_mr=%p", h_ret, e_mr);
                *pginfo = pginfo_save;
                ret = -EAGAIN;
        } else if ((u64 *)hipzout.vaddr != iova_start) {
@@ -1273,7 +1273,7 @@ ehca_rereg_mr_rereg1_exit1:
        ehca_free_fw_ctrlblock(kpage);
 ehca_rereg_mr_rereg1_exit0:
        if ( ret && (ret != -EAGAIN) )
-               ehca_err(&shca->ib_device, "ret=%x lkey=%x rkey=%x "
+               ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x "
                         "pginfo=%p num_kpages=%lx num_hwpages=%lx",
                         ret, *lkey, *rkey, pginfo, pginfo->num_kpages,
                         pginfo->num_hwpages);
@@ -1334,7 +1334,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
                h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr);
                if (h_ret != H_SUCCESS) {
                        ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-                                "h_ret=%lx e_mr=%p hca_hndl=%lx mr_hndl=%lx "
+                                "h_ret=%li e_mr=%p hca_hndl=%lx mr_hndl=%lx "
                                 "mr->lkey=%x",
                                 h_ret, e_mr, shca->ipz_hca_handle.handle,
                                 e_mr->ipz_mr_handle.handle,
@@ -1366,7 +1366,7 @@ int ehca_rereg_mr(struct ehca_shca *shca,
 
 ehca_rereg_mr_exit0:
        if (ret)
-               ehca_err(&shca->ib_device, "ret=%x shca=%p e_mr=%p "
+               ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p "
                         "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p "
                         "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x "
                         "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size,
@@ -1410,7 +1410,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
                 * FMRs are not shared and no MW bound to FMRs
                 */
                ehca_err(&shca->ib_device, "hipz_reregister_pmr failed "
-                        "(Rereg1), h_ret=%lx e_fmr=%p hca_hndl=%lx "
+                        "(Rereg1), h_ret=%li e_fmr=%p hca_hndl=%lx "
                         "mr_hndl=%lx lkey=%x lkey_out=%x",
                         h_ret, e_fmr, shca->ipz_hca_handle.handle,
                         e_fmr->ipz_mr_handle.handle,
@@ -1422,7 +1422,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
        h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr);
        if (h_ret != H_SUCCESS) {
                ehca_err(&shca->ib_device, "hipz_free_mr failed, "
-                        "h_ret=%lx e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
+                        "h_ret=%li e_fmr=%p hca_hndl=%lx mr_hndl=%lx "
                         "lkey=%x",
                         h_ret, e_fmr, shca->ipz_hca_handle.handle,
                         e_fmr->ipz_mr_handle.handle,
@@ -1457,7 +1457,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca,
 
 ehca_unmap_one_fmr_exit0:
        if (ret)
-               ehca_err(&shca->ib_device, "ret=%x tmp_lkey=%x tmp_rkey=%x "
+               ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x "
                         "fmr_max_pages=%x",
                         ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages);
        return ret;
@@ -1486,7 +1486,7 @@ int ehca_reg_smr(struct ehca_shca *shca,
                                    (u64)iova_start, hipz_acl, e_pd->fw_pd,
                                    &hipzout);
        if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
                         "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x "
                         "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
                         h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd,
@@ -1510,7 +1510,7 @@ int ehca_reg_smr(struct ehca_shca *shca,
 
 ehca_reg_smr_exit0:
        if (ret)
-               ehca_err(&shca->ib_device, "ret=%x shca=%p e_origmr=%p "
+               ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p "
                         "e_newmr=%p iova_start=%p acl=%x e_pd=%p",
                         ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd);
        return ret;
@@ -1585,7 +1585,7 @@ ehca_reg_internal_maxmr_exit1:
        ehca_mr_delete(e_mr);
 ehca_reg_internal_maxmr_exit0:
        if (ret)
-               ehca_err(&shca->ib_device, "ret=%x shca=%p e_pd=%p e_maxmr=%p",
+               ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p",
                         ret, shca, e_pd, e_maxmr);
        return ret;
 } /* end ehca_reg_internal_maxmr() */
@@ -1612,7 +1612,7 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
                                    (u64)iova_start, hipz_acl, e_pd->fw_pd,
                                    &hipzout);
        if (h_ret != H_SUCCESS) {
-               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lx "
+               ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li "
                         "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x",
                         h_ret, e_origmr, shca->ipz_hca_handle.handle,
                         e_origmr->ipz_mr_handle.handle,
@@ -1653,7 +1653,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
        ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr);
        if (ret) {
                ehca_err(&shca->ib_device, "dereg internal max-MR failed, "
-                        "ret=%x e_maxmr=%p shca=%p lkey=%x",
+                        "ret=%i e_maxmr=%p shca=%p lkey=%x",
                         ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey);
                shca->maxmr = e_maxmr;
                goto ehca_dereg_internal_maxmr_exit0;
@@ -1663,7 +1663,7 @@ int ehca_dereg_internal_maxmr(struct ehca_shca *shca)
 
 ehca_dereg_internal_maxmr_exit0:
        if (ret)
-               ehca_err(&shca->ib_device, "ret=%x shca=%p shca->maxmr=%p",
+               ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p",
                         ret, shca, shca->maxmr);
        return ret;
 } /* end ehca_dereg_internal_maxmr() */
index 84d435a5ee11951487084981e02c77a3ece16b50..e2bd62be11e71ba735af508731181cf14b2c9d1b 100644 (file)
@@ -273,6 +273,7 @@ static inline void queue2resp(struct ipzu_queue_resp *resp,
        resp->queue_length = queue->queue_length;
        resp->pagesize = queue->pagesize;
        resp->toggle_state = queue->toggle_state;
+       resp->offset = queue->offset;
 }
 
 /*
@@ -309,7 +310,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
        }
 
        if (!ipz_rc) {
-               ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x",
+               ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i",
                         ipz_rc);
                return -EBUSY;
        }
@@ -333,7 +334,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
                if (cnt == (nr_q_pages - 1)) {  /* last page! */
                        if (h_ret != expected_hret) {
                                ehca_err(ib_dev, "hipz_qp_register_rpage() "
-                                        "h_ret= %lx ", h_ret);
+                                        "h_ret=%li", h_ret);
                                ret = ehca2ib_return_code(h_ret);
                                goto init_qp_queue1;
                        }
@@ -347,7 +348,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
                } else {
                        if (h_ret != H_PAGE_REGISTERED) {
                                ehca_err(ib_dev, "hipz_qp_register_rpage() "
-                                        "h_ret= %lx ", h_ret);
+                                        "h_ret=%li", h_ret);
                                ret = ehca2ib_return_code(h_ret);
                                goto init_qp_queue1;
                        }
@@ -512,7 +513,7 @@ static struct ehca_qp *internal_create_qp(
                        } else if (init_attr->cap.max_send_wr > 255) {
                                ehca_err(pd->device,
                                         "Invalid Number of "
-                                        "ax_send_wr=%x for UD QP_TYPE=%x",
+                                        "max_send_wr=%x for UD QP_TYPE=%x",
                                         init_attr->cap.max_send_wr, qp_type);
                                return ERR_PTR(-EINVAL);
                        }
@@ -523,6 +524,18 @@ static struct ehca_qp *internal_create_qp(
                        return ERR_PTR(-EINVAL);
                        break;
                }
+       } else {
+               int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
+                              || qp_type == IB_QPT_GSI) ? 250 : 252;
+
+               if (init_attr->cap.max_send_sge > max_sge
+                   || init_attr->cap.max_recv_sge > max_sge) {
+                       ehca_err(pd->device, "Invalid number of SGEs requested "
+                                "send_sge=%x recv_sge=%x max_sge=%x",
+                                init_attr->cap.max_send_sge,
+                                init_attr->cap.max_recv_sge, max_sge);
+                       return ERR_PTR(-EINVAL);
+               }
        }
 
        if (pd->uobject && udata)
@@ -556,7 +569,6 @@ static struct ehca_qp *internal_create_qp(
                write_lock_irqsave(&ehca_qp_idr_lock, flags);
                ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
                write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
-
        } while (ret == -EAGAIN);
 
        if (ret) {
@@ -565,11 +577,17 @@ static struct ehca_qp *internal_create_qp(
                goto create_qp_exit0;
        }
 
+       if (my_qp->token > 0x1FFFFFF) {
+               ret = -EINVAL;
+               ehca_err(pd->device, "Invalid number of qp");
+               goto create_qp_exit1;
+       }
+
        parms.servicetype = ibqptype2servicetype(qp_type);
        if (parms.servicetype < 0) {
                ret = -EINVAL;
                ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
-               goto create_qp_exit0;
+               goto create_qp_exit1;
        }
 
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
@@ -598,8 +616,7 @@ static struct ehca_qp *internal_create_qp(
        parms.squeue.max_sge = max_send_sge;
        parms.rqueue.max_sge = max_recv_sge;
 
-       if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)
-           && !(context && udata)) { /* no small QP support in userspace ATM */
+       if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
                if (HAS_SQ(my_qp))
                        ehca_determine_small_queue(
                                &parms.squeue, max_send_sge, is_llqp);
@@ -612,7 +629,7 @@ static struct ehca_qp *internal_create_qp(
 
        h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
        if (h_ret != H_SUCCESS) {
-               ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
+               ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li",
                         h_ret);
                ret = ehca2ib_return_code(h_ret);
                goto create_qp_exit1;
@@ -666,7 +683,7 @@ static struct ehca_qp *internal_create_qp(
                        &parms.squeue, swqe_size);
                if (ret) {
                        ehca_err(pd->device, "Couldn't initialize squeue "
-                                "and pages  ret=%x", ret);
+                                "and pages ret=%i", ret);
                        goto create_qp_exit2;
                }
        }
@@ -677,7 +694,7 @@ static struct ehca_qp *internal_create_qp(
                        H_SUCCESS, &parms.rqueue, rwqe_size);
                if (ret) {
                        ehca_err(pd->device, "Couldn't initialize rqueue "
-                                "and pages ret=%x", ret);
+                                "and pages ret=%i", ret);
                        goto create_qp_exit3;
                }
        }
@@ -714,8 +731,6 @@ static struct ehca_qp *internal_create_qp(
        if (qp_type == IB_QPT_GSI) {
                h_ret = ehca_define_sqp(shca, my_qp, init_attr);
                if (h_ret != H_SUCCESS) {
-                       ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
-                                h_ret);
                        ret = ehca2ib_return_code(h_ret);
                        goto create_qp_exit4;
                }
@@ -725,7 +740,7 @@ static struct ehca_qp *internal_create_qp(
                ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
                if (ret) {
                        ehca_err(pd->device,
-                                "Couldn't assign qp to send_cq ret=%x", ret);
+                                "Couldn't assign qp to send_cq ret=%i", ret);
                        goto create_qp_exit4;
                }
        }
@@ -741,12 +756,13 @@ static struct ehca_qp *internal_create_qp(
                resp.ext_type = my_qp->ext_type;
                resp.qkey = my_qp->qkey;
                resp.real_qp_num = my_qp->real_qp_num;
-               resp.ipz_rqueue.offset = my_qp->ipz_rqueue.offset;
-               resp.ipz_squeue.offset = my_qp->ipz_squeue.offset;
+
                if (HAS_SQ(my_qp))
                        queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
                if (HAS_RQ(my_qp))
                        queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
+               resp.fw_handle_ofs = (u32)
+                       (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1));
 
                if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
                        ehca_err(pd->device, "Copy to udata failed");
@@ -841,7 +857,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
                                mqpcb, my_qp->galpas.kernel);
        if (hret != H_SUCCESS) {
                ehca_err(pd->device, "Could not modify SRQ to INIT"
-                        "ehca_qp=%p qp_num=%x hret=%lx",
+                        "ehca_qp=%p qp_num=%x h_ret=%li",
                         my_qp, my_qp->real_qp_num, hret);
                goto create_srq2;
        }
@@ -855,7 +871,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
                                mqpcb, my_qp->galpas.kernel);
        if (hret != H_SUCCESS) {
                ehca_err(pd->device, "Could not enable SRQ"
-                        "ehca_qp=%p qp_num=%x hret=%lx",
+                        "ehca_qp=%p qp_num=%x h_ret=%li",
                         my_qp, my_qp->real_qp_num, hret);
                goto create_srq2;
        }
@@ -869,11 +885,13 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
                                mqpcb, my_qp->galpas.kernel);
        if (hret != H_SUCCESS) {
                ehca_err(pd->device, "Could not modify SRQ to RTR"
-                        "ehca_qp=%p qp_num=%x hret=%lx",
+                        "ehca_qp=%p qp_num=%x h_ret=%li",
                         my_qp, my_qp->real_qp_num, hret);
                goto create_srq2;
        }
 
+       ehca_free_fw_ctrlblock(mqpcb);
+
        return &my_qp->ib_srq;
 
 create_srq2:
@@ -907,7 +925,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
                                           &bad_send_wqe_p, NULL, 2);
        if (h_ret != H_SUCCESS) {
                ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed"
-                        " ehca_qp=%p qp_num=%x h_ret=%lx",
+                        " ehca_qp=%p qp_num=%x h_ret=%li",
                         my_qp, qp_num, h_ret);
                return ehca2ib_return_code(h_ret);
        }
@@ -985,7 +1003,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                                mqpcb, my_qp->galpas.kernel);
        if (h_ret != H_SUCCESS) {
                ehca_err(ibqp->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lx",
+                        "ehca_qp=%p qp_num=%x h_ret=%li",
                         my_qp, ibqp->qp_num, h_ret);
                ret = ehca2ib_return_code(h_ret);
                goto modify_qp_exit1;
@@ -1021,7 +1039,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                        ibqp, &smiqp_attr, smiqp_attr_mask, 1);
                if (smirc) {
                        ehca_err(ibqp->device, "SMI RESET -> INIT failed. "
-                                "ehca_modify_qp() rc=%x", smirc);
+                                "ehca_modify_qp() rc=%i", smirc);
                        ret = H_PARAMETER;
                        goto modify_qp_exit1;
                }
@@ -1123,7 +1141,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt);
                if (ret) {
                        ehca_err(ibqp->device, "prepare_sqe_rts() failed "
-                                "ehca_qp=%p qp_num=%x ret=%x",
+                                "ehca_qp=%p qp_num=%x ret=%i",
                                 my_qp, ibqp->qp_num, ret);
                        goto modify_qp_exit2;
                }
@@ -1149,6 +1167,13 @@ static int internal_modify_qp(struct ib_qp *ibqp,
        }
 
        if (attr_mask & IB_QP_PKEY_INDEX) {
+               if (attr->pkey_index >= 16) {
+                       ret = -EINVAL;
+                       ehca_err(ibqp->device, "Invalid pkey_index=%x. "
+                                "ehca_qp=%p qp_num=%x max_pkey_index=f",
+                                attr->pkey_index, my_qp, ibqp->qp_num);
+                       goto modify_qp_exit2;
+               }
                mqpcb->prim_p_key_idx = attr->pkey_index;
                update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
        }
@@ -1257,50 +1282,78 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                int ehca_mult = ib_rate_to_mult(
                        shca->sport[my_qp->init_attr.port_num].rate);
 
+               if (attr->alt_port_num < 1
+                   || attr->alt_port_num > shca->num_ports) {
+                       ret = -EINVAL;
+                       ehca_err(ibqp->device, "Invalid alt_port=%x. "
+                                "ehca_qp=%p qp_num=%x num_ports=%x",
+                                attr->alt_port_num, my_qp, ibqp->qp_num,
+                                shca->num_ports);
+                       goto modify_qp_exit2;
+               }
+               mqpcb->alt_phys_port = attr->alt_port_num;
+
+               if (attr->alt_pkey_index >= 16) {
+                       ret = -EINVAL;
+                       ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. "
+                                "ehca_qp=%p qp_num=%x max_pkey_index=f",
+                                attr->pkey_index, my_qp, ibqp->qp_num);
+                       goto modify_qp_exit2;
+               }
+               mqpcb->alt_p_key_idx = attr->alt_pkey_index;
+
+               mqpcb->timeout_al = attr->alt_timeout;
                mqpcb->dlid_al = attr->alt_ah_attr.dlid;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1);
                mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits;
-               update_mask |=
-                       EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1);
                mqpcb->service_level_al = attr->alt_ah_attr.sl;
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1);
 
-               if (ah_mult < ehca_mult)
-                       mqpcb->max_static_rate = (ah_mult > 0) ?
-                       ((ehca_mult - 1) / ah_mult) : 0;
+               if (ah_mult > 0 && ah_mult < ehca_mult)
+                       mqpcb->max_static_rate_al = (ehca_mult - 1) / ah_mult;
                else
                        mqpcb->max_static_rate_al = 0;
 
-               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1);
+               /* OpenIB doesn't support alternate retry counts - copy them */
+               mqpcb->retry_count_al = mqpcb->retry_count;
+               mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count;
+
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1)
+                       | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1);
+
+               /*
+                * Always supply the GRH flag, even if it's zero, to give the
+                * hypervisor a clear "yes" or "no" instead of a "perhaps"
+                */
+               update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
 
                /*
                 * only if GRH is TRUE we might consider SOURCE_GID_IDX
                 * and DEST_GID otherwise phype will return H_ATTR_PARM!!!
                 */
                if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) {
-                       mqpcb->send_grh_flag_al = 1 << 31;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1);
-                       mqpcb->source_gid_idx_al =
-                               attr->alt_ah_attr.grh.sgid_index;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1);
+                       mqpcb->send_grh_flag_al = 1;
 
                        for (cnt = 0; cnt < 16; cnt++)
                                mqpcb->dest_gid_al.byte[cnt] =
                                        attr->alt_ah_attr.grh.dgid.raw[cnt];
-
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1);
+                       mqpcb->source_gid_idx_al =
+                               attr->alt_ah_attr.grh.sgid_index;
                        mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1);
                        mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit;
-                       update_mask |=
-                               EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1);
                        mqpcb->traffic_class_al =
                                attr->alt_ah_attr.grh.traffic_class;
+
                        update_mask |=
+                               EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1)
+                               | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1)
+                               | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1)
+                               | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) |
                                EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1);
                }
        }
@@ -1322,7 +1375,14 @@ static int internal_modify_qp(struct ib_qp *ibqp,
        }
 
        if (attr_mask & IB_QP_PATH_MIG_STATE) {
-               mqpcb->path_migration_state = attr->path_mig_state;
+               if (attr->path_mig_state != IB_MIG_REARM
+                   && attr->path_mig_state != IB_MIG_MIGRATED) {
+                       ret = -EINVAL;
+                       ehca_err(ibqp->device, "Invalid mig_state=%x",
+                                attr->path_mig_state);
+                       goto modify_qp_exit2;
+               }
+               mqpcb->path_migration_state = attr->path_mig_state + 1;
                update_mask |=
                        EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1);
        }
@@ -1348,7 +1408,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 
        if (h_ret != H_SUCCESS) {
                ret = ehca2ib_return_code(h_ret);
-               ehca_err(ibqp->device, "hipz_h_modify_qp() failed rc=%lx "
+               ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%li "
                         "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num);
                goto modify_qp_exit2;
        }
@@ -1381,7 +1441,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
                        ret = ehca2ib_return_code(h_ret);
                        ehca_err(ibqp->device, "ENABLE in context of "
                                 "RESET_2_INIT failed! Maybe you didn't get "
-                                "a LID h_ret=%lx ehca_qp=%p qp_num=%x",
+                                "a LID h_ret=%li ehca_qp=%p qp_num=%x",
                                 h_ret, my_qp, ibqp->qp_num);
                        goto modify_qp_exit2;
                }
@@ -1469,7 +1529,7 @@ int ehca_query_qp(struct ib_qp *qp,
        if (h_ret != H_SUCCESS) {
                ret = ehca2ib_return_code(h_ret);
                ehca_err(qp->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lx",
+                        "ehca_qp=%p qp_num=%x h_ret=%li",
                         my_qp, qp->qp_num, h_ret);
                goto query_qp_exit1;
        }
@@ -1490,7 +1550,7 @@ int ehca_query_qp(struct ib_qp *qp,
 
        qp_attr->qkey = qpcb->qkey;
        qp_attr->path_mtu = qpcb->path_mtu;
-       qp_attr->path_mig_state = qpcb->path_migration_state;
+       qp_attr->path_mig_state = qpcb->path_migration_state - 1;
        qp_attr->rq_psn = qpcb->receive_psn;
        qp_attr->sq_psn = qpcb->send_psn;
        qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field;
@@ -1644,7 +1704,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 
        if (h_ret != H_SUCCESS) {
                ret = ehca2ib_return_code(h_ret);
-               ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx "
+               ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%li "
                         "ehca_qp=%p qp_num=%x",
                         h_ret, my_qp, my_qp->real_qp_num);
        }
@@ -1687,12 +1747,13 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
        if (h_ret != H_SUCCESS) {
                ret = ehca2ib_return_code(h_ret);
                ehca_err(srq->device, "hipz_h_query_qp() failed "
-                        "ehca_qp=%p qp_num=%x h_ret=%lx",
+                        "ehca_qp=%p qp_num=%x h_ret=%li",
                         my_qp, my_qp->real_qp_num, h_ret);
                goto query_srq_exit1;
        }
 
        srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
+       srq_attr->max_sge = qpcb->actual_nr_sges_in_rq_wqe;
        srq_attr->srq_limit = EHCA_BMASK_GET(
                MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
 
@@ -1737,7 +1798,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
                ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
                if (ret) {
                        ehca_err(dev, "Couldn't unassign qp from "
-                                "send_cq ret=%x qp_num=%x cq_num=%x", ret,
+                                "send_cq ret=%i qp_num=%x cq_num=%x", ret,
                                 qp_num, my_qp->send_cq->cq_number);
                        return ret;
                }
@@ -1749,7 +1810,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
 
        h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
        if (h_ret != H_SUCCESS) {
-               ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx "
+               ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
                         "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
                return ehca2ib_return_code(h_ret);
        }
index 94eed70fedf58366adbb02d7d2188cecfe0c83fc..ea91360835d3279a27b885a5fd045b7fc9fbfff7 100644 (file)
@@ -526,7 +526,7 @@ poll_cq_one_read_cqe:
        if (!cqe) {
                ret = -EAGAIN;
                ehca_dbg(cq->device, "Completion queue is empty ehca_cq=%p "
-                        "cq_num=%x ret=%x", my_cq, my_cq->cq_number, ret);
+                        "cq_num=%x ret=%i", my_cq, my_cq->cq_number, ret);
                goto  poll_cq_one_exit0;
        }
 
index 9f16e9c79394e5ac9b1b9883c8128302a909883e..f0792e5fbd02eccf7e6a02040df994a86505cc66 100644 (file)
@@ -82,7 +82,7 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
 
                if (ret != H_SUCCESS) {
                        ehca_err(&shca->ib_device,
-                                "Can't define AQP1 for port %x. rc=%lx",
+                                "Can't define AQP1 for port %x. h_ret=%li",
                                 port, ret);
                        return ret;
                }
index 57c77a715f462a3f8c6127b1fbcf4c5821655a3a..4a8346a2bc9e8c1602d04711bfa4cd55d980b724 100644 (file)
@@ -73,40 +73,37 @@ extern int ehca_debug_level;
                if (unlikely(ehca_debug_level)) \
                        dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \
                                   "PU%04x EHCA_DBG:%s " format "\n", \
-                                  get_paca()->paca_index, __FUNCTION__, \
+                                  raw_smp_processor_id(), __FUNCTION__, \
                                   ## arg); \
        } while (0)
 
 #define ehca_info(ib_dev, format, arg...) \
        dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \
-                get_paca()->paca_index, __FUNCTION__, ## arg)
+                raw_smp_processor_id(), __FUNCTION__, ## arg)
 
 #define ehca_warn(ib_dev, format, arg...) \
        dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \
-                get_paca()->paca_index, __FUNCTION__, ## arg)
+                raw_smp_processor_id(), __FUNCTION__, ## arg)
 
 #define ehca_err(ib_dev, format, arg...) \
        dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \
-               get_paca()->paca_index, __FUNCTION__, ## arg)
+               raw_smp_processor_id(), __FUNCTION__, ## arg)
 
 /* use this one only if no ib_dev available */
 #define ehca_gen_dbg(format, arg...) \
        do { \
                if (unlikely(ehca_debug_level)) \
                        printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \
-                              get_paca()->paca_index, __FUNCTION__, ## arg); \
+                              raw_smp_processor_id(), __FUNCTION__, ## arg); \
        } while (0)
 
 #define ehca_gen_warn(format, arg...) \
-       do { \
-               if (unlikely(ehca_debug_level)) \
-                       printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
-                              get_paca()->paca_index, __FUNCTION__, ## arg); \
-       } while (0)
+       printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \
+              raw_smp_processor_id(), __FUNCTION__, ## arg)
 
 #define ehca_gen_err(format, arg...) \
        printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \
-               get_paca()->paca_index, __FUNCTION__, ## arg)
+              raw_smp_processor_id(), __FUNCTION__, ## arg)
 
 /**
  * ehca_dmp - printk a memory block, whose length is n*8 bytes.
index 4bc687fdf531fa5b96dcbc76fc6a9c31d1e69237..5234d6c15c49668555430789fb300d7d42819ad0 100644 (file)
@@ -109,7 +109,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
        u64 vsize, physical;
 
        vsize = vma->vm_end - vma->vm_start;
-       if (vsize != EHCA_PAGESIZE) {
+       if (vsize < EHCA_PAGESIZE) {
                ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start);
                return -EINVAL;
        }
@@ -118,10 +118,10 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
        ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical);
        /* VM_IO | VM_RESERVED are set by remap_pfn_range() */
-       ret = remap_pfn_range(vma, vma->vm_start, physical >> PAGE_SHIFT,
-                             vsize, vma->vm_page_prot);
+       ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
+                          vma->vm_page_prot);
        if (unlikely(ret)) {
-               ehca_gen_err("remap_pfn_range() failed ret=%x", ret);
+               ehca_gen_err("remap_pfn_range() failed ret=%i", ret);
                return -ENOMEM;
        }
 
@@ -146,7 +146,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
                page = virt_to_page(virt_addr);
                ret = vm_insert_page(vma, start, page);
                if (unlikely(ret)) {
-                       ehca_gen_err("vm_insert_page() failed rc=%x", ret);
+                       ehca_gen_err("vm_insert_page() failed rc=%i", ret);
                        return ret;
                }
                start += PAGE_SIZE;
@@ -164,23 +164,23 @@ static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq,
        int ret;
 
        switch (rsrc_type) {
-       case 1: /* galpa fw handle */
+       case 0: /* galpa fw handle */
                ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number);
                ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa);
                if (unlikely(ret)) {
                        ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_fw() failed rc=%x cq_num=%x",
+                                "ehca_mmap_fw() failed rc=%i cq_num=%x",
                                 ret, cq->cq_number);
                        return ret;
                }
                break;
 
-       case 2: /* cq queue_addr */
+       case 1: /* cq queue_addr */
                ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number);
                ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue);
                if (unlikely(ret)) {
                        ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_queue() failed rc=%x cq_num=%x",
+                                "ehca_mmap_queue() failed rc=%i cq_num=%x",
                                 ret, cq->cq_number);
                        return ret;
                }
@@ -201,38 +201,38 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
        int ret;
 
        switch (rsrc_type) {
-       case 1: /* galpa fw handle */
+       case 0: /* galpa fw handle */
                ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num);
                ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa);
                if (unlikely(ret)) {
                        ehca_err(qp->ib_qp.device,
-                                "remap_pfn_range() failed ret=%x qp_num=%x",
+                                "remap_pfn_range() failed ret=%i qp_num=%x",
                                 ret, qp->ib_qp.qp_num);
                        return -ENOMEM;
                }
                break;
 
-       case 2: /* qp rqueue_addr */
+       case 1: /* qp rqueue_addr */
                ehca_dbg(qp->ib_qp.device, "qp_num=%x rqueue",
                         qp->ib_qp.qp_num);
                ret = ehca_mmap_queue(vma, &qp->ipz_rqueue,
                                      &qp->mm_count_rqueue);
                if (unlikely(ret)) {
                        ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_queue(rq) failed rc=%x qp_num=%x",
+                                "ehca_mmap_queue(rq) failed rc=%i qp_num=%x",
                                 ret, qp->ib_qp.qp_num);
                        return ret;
                }
                break;
 
-       case 3: /* qp squeue_addr */
+       case 2: /* qp squeue_addr */
                ehca_dbg(qp->ib_qp.device, "qp_num=%x squeue",
                         qp->ib_qp.qp_num);
                ret = ehca_mmap_queue(vma, &qp->ipz_squeue,
                                      &qp->mm_count_squeue);
                if (unlikely(ret)) {
                        ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_queue(sq) failed rc=%x qp_num=%x",
+                                "ehca_mmap_queue(sq) failed rc=%i qp_num=%x",
                                 ret, qp->ib_qp.qp_num);
                        return ret;
                }
@@ -249,10 +249,10 @@ static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp,
 
 int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
-       u64 fileoffset = vma->vm_pgoff << PAGE_SHIFT;
-       u32 idr_handle = fileoffset >> 32;
-       u32 q_type = (fileoffset >> 28) & 0xF;    /* CQ, QP,...        */
-       u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
+       u64 fileoffset = vma->vm_pgoff;
+       u32 idr_handle = fileoffset & 0x1FFFFFF;
+       u32 q_type = (fileoffset >> 27) & 0x1;    /* CQ, QP,...        */
+       u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */
        u32 cur_pid = current->tgid;
        u32 ret;
        struct ehca_cq *cq;
@@ -261,7 +261,7 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
        struct ib_uobject *uobject;
 
        switch (q_type) {
-       case  1: /* CQ */
+       case  0: /* CQ */
                read_lock(&ehca_cq_idr_lock);
                cq = idr_find(&ehca_cq_idr, idr_handle);
                read_unlock(&ehca_cq_idr_lock);
@@ -283,13 +283,13 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
                ret = ehca_mmap_cq(vma, cq, rsrc_type);
                if (unlikely(ret)) {
                        ehca_err(cq->ib_cq.device,
-                                "ehca_mmap_cq() failed rc=%x cq_num=%x",
+                                "ehca_mmap_cq() failed rc=%i cq_num=%x",
                                 ret, cq->cq_number);
                        return ret;
                }
                break;
 
-       case 2: /* QP */
+       case 1: /* QP */
                read_lock(&ehca_qp_idr_lock);
                qp = idr_find(&ehca_qp_idr, idr_handle);
                read_unlock(&ehca_qp_idr_lock);
@@ -313,7 +313,7 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
                ret = ehca_mmap_qp(vma, qp, rsrc_type);
                if (unlikely(ret)) {
                        ehca_err(qp->ib_qp.device,
-                                "ehca_mmap_qp() failed rc=%x qp_num=%x",
+                                "ehca_mmap_qp() failed rc=%i qp_num=%x",
                                 ret, qp->ib_qp.qp_num);
                        return ret;
                }
index 24f454162f2479029255a3b4097a6ad7b0a61c0b..c16a21374bb5a3c222a73147c28584cb871ab619 100644 (file)
 #define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
 #define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
 
+#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx"
+#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx"
+#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx"
+
 static DEFINE_SPINLOCK(hcall_lock);
 
 static u32 get_longbusy_msecs(int longbusy_rc)
@@ -116,16 +120,28 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
                                    unsigned long arg7)
 {
        long ret;
-       int i, sleep_msecs;
+       int i, sleep_msecs, do_lock;
+       unsigned long flags;
 
-       ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
-                    "arg5=%lx arg6=%lx arg7=%lx",
+       ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT,
                     opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
 
+       /* lock H_FREE_RESOURCE(MR) against itself and H_ALLOC_RESOURCE(MR) */
+       if ((opcode == H_FREE_RESOURCE) && (arg7 == 5)) {
+               arg7 = 0; /* better not upset firmware */
+               do_lock = 1;
+       }
+
        for (i = 0; i < 5; i++) {
+               if (do_lock)
+                       spin_lock_irqsave(&hcall_lock, flags);
+
                ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4,
                                         arg5, arg6, arg7);
 
+               if (do_lock)
+                       spin_unlock_irqrestore(&hcall_lock, flags);
+
                if (H_IS_LONG_BUSY(ret)) {
                        sleep_msecs = get_longbusy_msecs(ret);
                        msleep_interruptible(sleep_msecs);
@@ -133,16 +149,13 @@ static long ehca_plpar_hcall_norets(unsigned long opcode,
                }
 
                if (ret < H_SUCCESS)
-                       ehca_gen_err("opcode=%lx ret=%lx"
-                                    " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
-                                    " arg5=%lx arg6=%lx arg7=%lx ",
-                                    opcode, ret,
-                                    arg1, arg2, arg3, arg4, arg5,
-                                    arg6, arg7);
-
-               ehca_gen_dbg("opcode=%lx ret=%lx", opcode, ret);
-               return ret;
+                       ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT,
+                                    opcode, ret, arg1, arg2, arg3,
+                                    arg4, arg5, arg6, arg7);
+               else
+                       ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret);
 
+               return ret;
        }
 
        return H_BUSY;
@@ -161,25 +174,24 @@ static long ehca_plpar_hcall9(unsigned long opcode,
                              unsigned long arg9)
 {
        long ret;
-       int i, sleep_msecs, lock_is_set = 0;
+       int i, sleep_msecs, do_lock;
        unsigned long flags = 0;
 
-       ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
-                    "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
-                    opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7,
-                    arg8, arg9);
+       ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode,
+                    arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9);
+
+       /* lock H_ALLOC_RESOURCE(MR) against itself and H_FREE_RESOURCE(MR) */
+       do_lock = ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5));
 
        for (i = 0; i < 5; i++) {
-               if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) {
+               if (do_lock)
                        spin_lock_irqsave(&hcall_lock, flags);
-                       lock_is_set = 1;
-               }
 
                ret = plpar_hcall9(opcode, outs,
                                   arg1, arg2, arg3, arg4, arg5,
                                   arg6, arg7, arg8, arg9);
 
-               if (lock_is_set)
+               if (do_lock)
                        spin_unlock_irqrestore(&hcall_lock, flags);
 
                if (H_IS_LONG_BUSY(ret)) {
@@ -188,26 +200,19 @@ static long ehca_plpar_hcall9(unsigned long opcode,
                        continue;
                }
 
-               if (ret < H_SUCCESS)
-                       ehca_gen_err("opcode=%lx ret=%lx"
-                                    " arg1=%lx arg2=%lx arg3=%lx arg4=%lx"
-                                    " arg5=%lx arg6=%lx arg7=%lx arg8=%lx"
-                                    " arg9=%lx"
-                                    " out1=%lx out2=%lx out3=%lx out4=%lx"
-                                    " out5=%lx out6=%lx out7=%lx out8=%lx"
-                                    " out9=%lx",
-                                    opcode, ret,
-                                    arg1, arg2, arg3, arg4, arg5,
-                                    arg6, arg7, arg8, arg9,
-                                    outs[0], outs[1], outs[2], outs[3],
+               if (ret < H_SUCCESS) {
+                       ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT,
+                                    opcode, arg1, arg2, arg3, arg4, arg5,
+                                    arg6, arg7, arg8, arg9);
+                       ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
+                                    ret, outs[0], outs[1], outs[2], outs[3],
+                                    outs[4], outs[5], outs[6], outs[7],
+                                    outs[8]);
+               } else
+                       ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT,
+                                    ret, outs[0], outs[1], outs[2], outs[3],
                                     outs[4], outs[5], outs[6], outs[7],
                                     outs[8]);
-
-               ehca_gen_dbg("opcode=%lx ret=%lx out1=%lx out2=%lx out3=%lx "
-                            "out4=%lx out5=%lx out6=%lx out7=%lx out8=%lx "
-                            "out9=%lx",
-                            opcode, ret, outs[0], outs[1], outs[2], outs[3],
-                            outs[4], outs[5], outs[6], outs[7], outs[8]);
                return ret;
        }
 
@@ -247,7 +252,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle,
        *eq_ist = (u32)outs[5];
 
        if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resource - ret=%lx ", ret);
+               ehca_gen_err("Not enough resource - ret=%li ", ret);
 
        return ret;
 }
@@ -285,7 +290,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
                hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]);
 
        if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lx", ret);
+               ehca_gen_err("Not enough resources. ret=%li", ret);
 
        return ret;
 }
@@ -360,7 +365,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
                hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
 
        if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lx", ret);
+               ehca_gen_err("Not enough resources. ret=%li", ret);
 
        return ret;
 }
@@ -555,7 +560,7 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
                                0, 0, 0, 0, 0);
 
        if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Insufficient resources ret=%lx", ret);
+               ehca_gen_err("Insufficient resources ret=%li", ret);
 
        return ret;
 }
@@ -591,7 +596,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
                                qp->ipz_qp_handle.handle,  /* r6 */
                                0, 0, 0, 0, 0, 0);
        if (ret == H_HARDWARE)
-               ehca_gen_err("HCA not operational. ret=%lx", ret);
+               ehca_gen_err("HCA not operational. ret=%li", ret);
 
        ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE,
                                      adapter_handle.handle,     /* r4 */
@@ -599,7 +604,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle,
                                      0, 0, 0, 0, 0);
 
        if (ret == H_RESOURCE)
-               ehca_gen_err("Resource still in use. ret=%lx", ret);
+               ehca_gen_err("Resource still in use. ret=%li", ret);
 
        return ret;
 }
@@ -634,7 +639,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle,
        *bma_qp_nr = (u32)outs[1];
 
        if (ret == H_ALIAS_EXIST)
-               ehca_gen_err("AQP1 already exists. ret=%lx", ret);
+               ehca_gen_err("AQP1 already exists. ret=%li", ret);
 
        return ret;
 }
@@ -656,7 +661,7 @@ u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle,
                                      0, 0);
 
        if (ret == H_NOT_ENOUGH_RESOURCES)
-               ehca_gen_err("Not enough resources. ret=%lx", ret);
+               ehca_gen_err("Not enough resources. ret=%li", ret);
 
        return ret;
 }
@@ -695,7 +700,7 @@ u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle,
                                      0, 0, 0, 0);
 
        if (ret == H_RESOURCE)
-               ehca_gen_err("H_FREE_RESOURCE failed ret=%lx ", ret);
+               ehca_gen_err("H_FREE_RESOURCE failed ret=%li ", ret);
 
        return ret;
 }
@@ -717,7 +722,7 @@ u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle,
                                      0, 0, 0, 0, 0);
 
        if (ret == H_RESOURCE)
-               ehca_gen_err("Resource in use. ret=%lx ", ret);
+               ehca_gen_err("Resource in use. ret=%li ", ret);
 
        return ret;
 }
@@ -816,7 +821,7 @@ u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle,
        return ehca_plpar_hcall_norets(H_FREE_RESOURCE,
                                       adapter_handle.handle,    /* r4 */
                                       mr->ipz_mr_handle.handle, /* r5 */
-                                      0, 0, 0, 0, 0);
+                                      0, 0, 0, 0, 5);
 }
 
 u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle,
index 29bd476fbd54d3a4e9bff83a44b7ec559726ab88..661f8db62706f32d71388d14fc9d977973ec9f69 100644 (file)
@@ -158,6 +158,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd)
 
        queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9)));
        queue->small_page = page;
+       queue->offset = bit << (order + 9);
        return 1;
 
 out:
index 6ad822c35930417c558dba5f14e9f6dcb81df182..851df8a75e79cfc6ea5dc386530966f9a8c0206d 100644 (file)
@@ -189,6 +189,8 @@ typedef enum _ipath_ureg {
 #define IPATH_RUNTIME_RCVHDR_COPY      0x8
 #define IPATH_RUNTIME_MASTER   0x10
 /* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */
+#define IPATH_RUNTIME_FORCE_PIOAVAIL 0x400
+#define IPATH_RUNTIME_PIO_REGSWAPPED 0x800
 
 /*
  * This structure is returned by ipath_userinit() immediately after
@@ -350,7 +352,7 @@ struct ipath_base_info {
  * may not be implemented; the user code must deal with this if it
  * cares, or it must abort after initialization reports the difference.
  */
-#define IPATH_USER_SWMINOR 5
+#define IPATH_USER_SWMINOR 6
 
 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
 
index a6f04d27ec576c021061df4d13d8f18e3fde61e1..645ed71fd7972f58a73980f8910798423a2b291c 100644 (file)
@@ -76,22 +76,25 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
                }
                return;
        }
-       wc->queue[head].wr_id = entry->wr_id;
-       wc->queue[head].status = entry->status;
-       wc->queue[head].opcode = entry->opcode;
-       wc->queue[head].vendor_err = entry->vendor_err;
-       wc->queue[head].byte_len = entry->byte_len;
-       wc->queue[head].imm_data = (__u32 __force)entry->imm_data;
-       wc->queue[head].qp_num = entry->qp->qp_num;
-       wc->queue[head].src_qp = entry->src_qp;
-       wc->queue[head].wc_flags = entry->wc_flags;
-       wc->queue[head].pkey_index = entry->pkey_index;
-       wc->queue[head].slid = entry->slid;
-       wc->queue[head].sl = entry->sl;
-       wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
-       wc->queue[head].port_num = entry->port_num;
-       /* Make sure queue entry is written before the head index. */
-       smp_wmb();
+       if (cq->ip) {
+               wc->uqueue[head].wr_id = entry->wr_id;
+               wc->uqueue[head].status = entry->status;
+               wc->uqueue[head].opcode = entry->opcode;
+               wc->uqueue[head].vendor_err = entry->vendor_err;
+               wc->uqueue[head].byte_len = entry->byte_len;
+               wc->uqueue[head].imm_data = (__u32 __force)entry->imm_data;
+               wc->uqueue[head].qp_num = entry->qp->qp_num;
+               wc->uqueue[head].src_qp = entry->src_qp;
+               wc->uqueue[head].wc_flags = entry->wc_flags;
+               wc->uqueue[head].pkey_index = entry->pkey_index;
+               wc->uqueue[head].slid = entry->slid;
+               wc->uqueue[head].sl = entry->sl;
+               wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
+               wc->uqueue[head].port_num = entry->port_num;
+               /* Make sure entry is written before the head index. */
+               smp_wmb();
+       } else
+               wc->kqueue[head] = *entry;
        wc->head = next;
 
        if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -130,6 +133,12 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
        int npolled;
        u32 tail;
 
+       /* The kernel can only poll a kernel completion queue */
+       if (cq->ip) {
+               npolled = -EINVAL;
+               goto bail;
+       }
+
        spin_lock_irqsave(&cq->lock, flags);
 
        wc = cq->queue;
@@ -137,31 +146,10 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
        if (tail > (u32) cq->ibcq.cqe)
                tail = (u32) cq->ibcq.cqe;
        for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-               struct ipath_qp *qp;
-
                if (tail == wc->head)
                        break;
-               /* Make sure entry is read after head index is read. */
-               smp_rmb();
-               qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
-                                     wc->queue[tail].qp_num);
-               entry->qp = &qp->ibqp;
-               if (atomic_dec_and_test(&qp->refcount))
-                       wake_up(&qp->wait);
-
-               entry->wr_id = wc->queue[tail].wr_id;
-               entry->status = wc->queue[tail].status;
-               entry->opcode = wc->queue[tail].opcode;
-               entry->vendor_err = wc->queue[tail].vendor_err;
-               entry->byte_len = wc->queue[tail].byte_len;
-               entry->imm_data = wc->queue[tail].imm_data;
-               entry->src_qp = wc->queue[tail].src_qp;
-               entry->wc_flags = wc->queue[tail].wc_flags;
-               entry->pkey_index = wc->queue[tail].pkey_index;
-               entry->slid = wc->queue[tail].slid;
-               entry->sl = wc->queue[tail].sl;
-               entry->dlid_path_bits = wc->queue[tail].dlid_path_bits;
-               entry->port_num = wc->queue[tail].port_num;
+               /* The kernel doesn't need a RMB since it has the lock. */
+               *entry = wc->kqueue[tail];
                if (tail >= cq->ibcq.cqe)
                        tail = 0;
                else
@@ -171,6 +159,7 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 
        spin_unlock_irqrestore(&cq->lock, flags);
 
+bail:
        return npolled;
 }
 
@@ -215,6 +204,7 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
        struct ipath_cq *cq;
        struct ipath_cq_wc *wc;
        struct ib_cq *ret;
+       u32 sz;
 
        if (entries < 1 || entries > ib_ipath_max_cqes) {
                ret = ERR_PTR(-EINVAL);
@@ -235,7 +225,12 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
         * We need to use vmalloc() in order to support mmap and large
         * numbers of entries.
         */
-       wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
+       sz = sizeof(*wc);
+       if (udata && udata->outlen >= sizeof(__u64))
+               sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
+       else
+               sz += sizeof(struct ib_wc) * (entries + 1);
+       wc = vmalloc_user(sz);
        if (!wc) {
                ret = ERR_PTR(-ENOMEM);
                goto bail_cq;
@@ -247,9 +242,8 @@ struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vec
         */
        if (udata && udata->outlen >= sizeof(__u64)) {
                int err;
-               u32 s = sizeof *wc + sizeof(struct ib_wc) * entries;
 
-               cq->ip = ipath_create_mmap_info(dev, s, context, wc);
+               cq->ip = ipath_create_mmap_info(dev, sz, context, wc);
                if (!cq->ip) {
                        ret = ERR_PTR(-ENOMEM);
                        goto bail_wc;
@@ -380,6 +374,7 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
        struct ipath_cq_wc *wc;
        u32 head, tail, n;
        int ret;
+       u32 sz;
 
        if (cqe < 1 || cqe > ib_ipath_max_cqes) {
                ret = -EINVAL;
@@ -389,7 +384,12 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
        /*
         * Need to use vmalloc() if we want to support large #s of entries.
         */
-       wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
+       sz = sizeof(*wc);
+       if (udata && udata->outlen >= sizeof(__u64))
+               sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
+       else
+               sz += sizeof(struct ib_wc) * (cqe + 1);
+       wc = vmalloc_user(sz);
        if (!wc) {
                ret = -ENOMEM;
                goto bail;
@@ -430,7 +430,10 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
                goto bail;
        }
        for (n = 0; tail != head; n++) {
-               wc->queue[n] = old_wc->queue[tail];
+               if (cq->ip)
+                       wc->uqueue[n] = old_wc->uqueue[tail];
+               else
+                       wc->kqueue[n] = old_wc->kqueue[tail];
                if (tail == (u32) cq->ibcq.cqe)
                        tail = 0;
                else
@@ -447,9 +450,8 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
        if (cq->ip) {
                struct ipath_ibdev *dev = to_idev(ibcq->device);
                struct ipath_mmap_info *ip = cq->ip;
-               u32 s = sizeof *wc + sizeof(struct ib_wc) * cqe;
 
-               ipath_update_mmap_info(dev, ip, s, wc);
+               ipath_update_mmap_info(dev, ip, sz, wc);
                spin_lock_irq(&dev->pending_lock);
                if (list_empty(&ip->pending_mmaps))
                        list_add(&ip->pending_mmaps, &dev->pending_mmaps);
index cf25cdab02f9dd3efa5af610d200cf6211b9b85a..4137c7770f1bce083d7342508005c9497e5a91cd 100644 (file)
@@ -446,19 +446,21 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
                           dd->ipath_unit, plen - 1, pbufn);
 
        if (dp.pbc_wd == 0)
-               /* Legacy operation, use computed pbc_wd */
                dp.pbc_wd = plen;
-
-       /* we have to flush after the PBC for correctness on some cpus
-        * or WC buffer can be written out of order */
        writeq(dp.pbc_wd, piobuf);
-       ipath_flush_wc();
-       /* copy all by the trigger word, then flush, so it's written
+       /*
+        * Copy all by the trigger word, then flush, so it's written
         * to chip before trigger word, then write trigger word, then
-        * flush again, so packet is sent. */
-       __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
-       ipath_flush_wc();
-       __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+        * flush again, so packet is sent.
+        */
+       if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
+               ipath_flush_wc();
+               __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+               ipath_flush_wc();
+               __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+       } else
+               __iowrite32_copy(piobuf + 2, tmpbuf, clen);
+
        ipath_flush_wc();
 
        ret = sizeof(dp);
index 6ccba365a24c90821edfd3a247692221318bf57b..1f152ded1e3c48083840e8119aecea8738be1cd3 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/spinlock.h>
 #include <linux/idr.h>
 #include <linux/pci.h>
+#include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
@@ -280,6 +281,89 @@ void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
 {
 }
 
+/*
+ * Perform a PIO buffer bandwidth write test, to verify proper system
+ * configuration.  Even when all the setup calls work, occasionally
+ * BIOS or other issues can prevent write combining from working, or
+ * can cause other bandwidth problems to the chip.
+ *
+ * This test simply writes the same buffer over and over again, and
+ * measures close to the peak bandwidth to the chip (not testing
+ * data bandwidth to the wire).   On chips that use an address-based
+ * trigger to send packets to the wire, this is easy.  On chips that
+ * use a count to trigger, we want to make sure that the packet doesn't
+ * go out on the wire, or trigger flow control checks.
+ */
+static void ipath_verify_pioperf(struct ipath_devdata *dd)
+{
+       u32 pbnum, cnt, lcnt;
+       u32 __iomem *piobuf;
+       u32 *addr;
+       u64 msecs, emsecs;
+
+       piobuf = ipath_getpiobuf(dd, &pbnum);
+       if (!piobuf) {
+               dev_info(&dd->pcidev->dev,
+                       "No PIObufs for checking perf, skipping\n");
+               return;
+       }
+
+       /*
+        * Enough to give us a reasonable test, less than piobuf size, and
+        * likely multiple of store buffer length.
+        */
+       cnt = 1024;
+
+       addr = vmalloc(cnt);
+       if (!addr) {
+               dev_info(&dd->pcidev->dev,
+                       "Couldn't get memory for checking PIO perf,"
+                       " skipping\n");
+               goto done;
+       }
+
+       preempt_disable();  /* we want reasonably accurate elapsed time */
+       msecs = 1 + jiffies_to_msecs(jiffies);
+       for (lcnt = 0; lcnt < 10000U; lcnt++) {
+               /* wait until we cross msec boundary */
+               if (jiffies_to_msecs(jiffies) >= msecs)
+                       break;
+               udelay(1);
+       }
+
+       writeq(0, piobuf); /* length 0, no dwords actually sent */
+       ipath_flush_wc();
+
+       /*
+        * this is only roughly accurate, since even with preempt we
+        * still take interrupts that could take a while.   Running for
+        * >= 5 msec seems to get us "close enough" to accurate values
+        */
+       msecs = jiffies_to_msecs(jiffies);
+       for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
+               __iowrite32_copy(piobuf + 64, addr, cnt >> 2);
+               emsecs = jiffies_to_msecs(jiffies) - msecs;
+       }
+
+       /* 1 GiB/sec, slightly over IB SDR line rate */
+       if (lcnt < (emsecs * 1024U))
+               ipath_dev_err(dd,
+                       "Performance problem: bandwidth to PIO buffers is "
+                       "only %u MiB/sec\n",
+                       lcnt / (u32) emsecs);
+       else
+               ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
+                       lcnt / (u32) emsecs);
+
+       preempt_enable();
+
+       vfree(addr);
+
+done:
+       /* disarm piobuf, so it's available again */
+       ipath_disarm_piobufs(dd, pbnum, 1);
+}
+
 static int __devinit ipath_init_one(struct pci_dev *pdev,
                                    const struct pci_device_id *ent)
 {
@@ -298,8 +382,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 
        ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
 
-       read_bars(dd, pdev, &bar0, &bar1);
-
        ret = pci_enable_device(pdev);
        if (ret) {
                /* This can happen iff:
@@ -445,9 +527,6 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
                goto bail_regions;
        }
 
-       dd->ipath_deviceid = ent->device;       /* save for later use */
-       dd->ipath_vendorid = ent->vendor;
-
        dd->ipath_pcirev = pdev->revision;
 
 #if defined(__powerpc__)
@@ -515,6 +594,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
                ret = 0;
        }
 
+       ipath_verify_pioperf(dd);
+
        ipath_device_create_group(&pdev->dev, dd);
        ipathfs_add_device(dd);
        ipath_user_add(dd);
@@ -2005,6 +2086,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
                            INFINIPATH_IBCC_LINKINITCMD_SHIFT);
        ipath_cancel_sends(dd, 0);
 
+       signal_ib_event(dd, IB_EVENT_PORT_ERR);
+
        /* disable IBC */
        dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
        ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
index b4503e9c1e954bb4a7ff346344894818c42c9a5a..bcfa3ccb555f988cfba9e719602f130767356001 100644 (file)
@@ -596,7 +596,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
                goto bail;
        }
 
-       len = offsetof(struct ipath_flash, if_future);
+       /*
+        * read full flash, not just currently used part, since it may have
+        * been written with a newer definition
+        * */
+       len = sizeof(struct ipath_flash);
        buf = vmalloc(len);
        if (!buf) {
                ipath_dev_err(dd, "Couldn't allocate memory to read %u "
@@ -737,8 +741,10 @@ int ipath_update_eeprom_log(struct ipath_devdata *dd)
        /*
         * The quick-check above determined that there is something worthy
         * of logging, so get current contents and do a more detailed idea.
+        * read full flash, not just currently used part, since it may have
+        * been written with a newer definition
         */
-       len = offsetof(struct ipath_flash, if_future);
+       len = sizeof(struct ipath_flash);
        buf = vmalloc(len);
        ret = 1;
        if (!buf) {
index 33ab0d6b80ff3085a0ed423b4fe980fd1c938807..5de3243a47c38bf023fc6fc0ee97dcdc69886fbb 100644 (file)
@@ -538,6 +538,9 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
                        continue;
                cnt++;
                if (dd->ipath_pageshadow[porttid + tid]) {
+                       struct page *p;
+                       p = dd->ipath_pageshadow[porttid + tid];
+                       dd->ipath_pageshadow[porttid + tid] = NULL;
                        ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
                                   pd->port_pid, tid);
                        dd->ipath_f_put_tid(dd, &tidbase[tid],
@@ -546,9 +549,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
                        pci_unmap_page(dd->pcidev,
                                dd->ipath_physshadow[porttid + tid],
                                PAGE_SIZE, PCI_DMA_FROMDEVICE);
-                       ipath_release_user_pages(
-                               &dd->ipath_pageshadow[porttid + tid], 1);
-                       dd->ipath_pageshadow[porttid + tid] = NULL;
+                       ipath_release_user_pages(&p, 1);
                        ipath_stats.sps_pageunlocks++;
                } else
                        ipath_dbg("Unused tid %u, ignoring\n", tid);
@@ -1341,6 +1342,19 @@ bail:
        return ret;
 }
 
+static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd)
+{
+       unsigned pollflag = 0;
+
+       if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) &&
+           pd->port_hdrqfull != pd->port_hdrqfull_poll) {
+               pollflag |= POLLIN | POLLRDNORM;
+               pd->port_hdrqfull_poll = pd->port_hdrqfull;
+       }
+
+       return pollflag;
+}
+
 static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
                                      struct file *fp,
                                      struct poll_table_struct *pt)
@@ -1350,22 +1364,20 @@ static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
 
        dd = pd->port_dd;
 
-       if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
-               pollflag |= POLLERR;
-               clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
-       }
+       /* variable access in ipath_poll_hdrqfull() needs this */
+       rmb();
+       pollflag = ipath_poll_hdrqfull(pd);
 
-       if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) {
+       if (pd->port_urgent != pd->port_urgent_poll) {
                pollflag |= POLLIN | POLLRDNORM;
-               clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag);
+               pd->port_urgent_poll = pd->port_urgent;
        }
 
        if (!pollflag) {
+               /* this saves a spin_lock/unlock in interrupt handler... */
                set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
-               if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
-                       set_bit(IPATH_PORT_WAITING_OVERFLOW,
-                               &pd->port_flag);
-
+               /* flush waiting flag so don't miss an event... */
+               wmb();
                poll_wait(fp, &pd->port_wait, pt);
        }
 
@@ -1376,31 +1388,27 @@ static unsigned int ipath_poll_next(struct ipath_portdata *pd,
                                    struct file *fp,
                                    struct poll_table_struct *pt)
 {
-       u32 head, tail;
+       u32 head;
+       u32 tail;
        unsigned pollflag = 0;
        struct ipath_devdata *dd;
 
        dd = pd->port_dd;
 
+       /* variable access in ipath_poll_hdrqfull() needs this */
+       rmb();
+       pollflag = ipath_poll_hdrqfull(pd);
+
        head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
        tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr;
 
-       if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
-               pollflag |= POLLERR;
-               clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
-       }
-
-       if (tail != head ||
-           test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) {
+       if (head != tail)
                pollflag |= POLLIN | POLLRDNORM;
-               clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag);
-       }
-
-       if (!pollflag) {
+       else {
+               /* this saves a spin_lock/unlock in interrupt handler */
                set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-               if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
-                       set_bit(IPATH_PORT_WAITING_OVERFLOW,
-                               &pd->port_flag);
+               /* flush waiting flag so we don't miss an event */
+               wmb();
 
                set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
                        &dd->ipath_rcvctrl);
@@ -1917,6 +1925,12 @@ static int ipath_do_user_init(struct file *fp,
        ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
                pd->port_port, head32);
        pd->port_tidcursor = 0; /* start at beginning after open */
+
+       /* initialize poll variables... */
+       pd->port_urgent = 0;
+       pd->port_urgent_poll = 0;
+       pd->port_hdrqfull_poll = pd->port_hdrqfull;
+
        /*
         * now enable the port; the tail registers will be written to memory
         * by the chip as soon as it sees the write to
@@ -2039,9 +2053,11 @@ static int ipath_close(struct inode *in, struct file *fp)
 
        if (dd->ipath_kregbase) {
                int i;
-               /* atomically clear receive enable port. */
+               /* atomically clear receive enable port and intr avail. */
                clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port,
                          &dd->ipath_rcvctrl);
+               clear_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
+                         &dd->ipath_rcvctrl);
                ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
                        dd->ipath_rcvctrl);
                /* and read back from chip to be sure that nothing
index 2e689b974e1f48ed4e6538b72882e166b60c36ce..262c25db05cde8a4888096f7ae7e92f592a88052 100644 (file)
@@ -130,175 +130,6 @@ static const struct file_operations atomic_counters_ops = {
        .read = atomic_counters_read,
 };
 
-static ssize_t atomic_node_info_read(struct file *file, char __user *buf,
-                                    size_t count, loff_t *ppos)
-{
-       u32 nodeinfo[10];
-       struct ipath_devdata *dd;
-       u64 guid;
-
-       dd = file->f_path.dentry->d_inode->i_private;
-
-       guid = be64_to_cpu(dd->ipath_guid);
-
-       nodeinfo[0] =                   /* BaseVersion is SMA */
-               /* ClassVersion is SMA */
-               (1 << 8)                /* NodeType  */
-               | (1 << 0);             /* NumPorts */
-       nodeinfo[1] = (u32) (guid >> 32);
-       nodeinfo[2] = (u32) (guid & 0xffffffff);
-       /* PortGUID == SystemImageGUID for us */
-       nodeinfo[3] = nodeinfo[1];
-       /* PortGUID == SystemImageGUID for us */
-       nodeinfo[4] = nodeinfo[2];
-       /* PortGUID == NodeGUID for us */
-       nodeinfo[5] = nodeinfo[3];
-       /* PortGUID == NodeGUID for us */
-       nodeinfo[6] = nodeinfo[4];
-       nodeinfo[7] = (4 << 16) /* we support 4 pkeys */
-               | (dd->ipath_deviceid << 0);
-       /* our chip version as 16 bits major, 16 bits minor */
-       nodeinfo[8] = dd->ipath_minrev | (dd->ipath_majrev << 16);
-       nodeinfo[9] = (dd->ipath_unit << 24) | (dd->ipath_vendorid << 0);
-
-       return simple_read_from_buffer(buf, count, ppos, nodeinfo,
-                                      sizeof nodeinfo);
-}
-
-static const struct file_operations atomic_node_info_ops = {
-       .read = atomic_node_info_read,
-};
-
-static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
-                                    size_t count, loff_t *ppos)
-{
-       u32 portinfo[13];
-       u32 tmp, tmp2;
-       struct ipath_devdata *dd;
-
-       dd = file->f_path.dentry->d_inode->i_private;
-
-       /* so we only initialize non-zero fields. */
-       memset(portinfo, 0, sizeof portinfo);
-
-       /*
-        * Notimpl yet M_Key (64)
-        * Notimpl yet GID (64)
-        */
-
-       portinfo[4] = (dd->ipath_lid << 16);
-
-       /*
-        * Notimpl yet SMLID.
-        * CapabilityMask is 0, we don't support any of these
-        * DiagCode is 0; we don't store any diag info for now Notimpl yet
-        * M_KeyLeasePeriod (we don't support M_Key)
-        */
-
-       /* LocalPortNum is whichever port number they ask for */
-       portinfo[7] = (dd->ipath_unit << 24)
-               /* LinkWidthEnabled */
-               | (2 << 16)
-               /* LinkWidthSupported (really 2, but not IB valid) */
-               | (3 << 8)
-               /* LinkWidthActive */
-               | (2 << 0);
-       tmp = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
-       tmp2 = 5;
-       if (tmp == IPATH_IBSTATE_INIT)
-               tmp = 2;
-       else if (tmp == IPATH_IBSTATE_ARM)
-               tmp = 3;
-       else if (tmp == IPATH_IBSTATE_ACTIVE)
-               tmp = 4;
-       else {
-               tmp = 0;        /* down */
-               tmp2 = tmp & 0xf;
-       }
-
-       portinfo[8] = (1 << 28) /* LinkSpeedSupported */
-               | (tmp << 24)   /* PortState */
-               | (tmp2 << 20)  /* PortPhysicalState */
-               | (2 << 16)
-
-               /* LinkDownDefaultState */
-               /* M_KeyProtectBits == 0 */
-               /* NotImpl yet LMC == 0 (we can support all values) */
-               | (1 << 4)      /* LinkSpeedActive */
-               | (1 << 0);     /* LinkSpeedEnabled */
-       switch (dd->ipath_ibmtu) {
-       case 4096:
-               tmp = 5;
-               break;
-       case 2048:
-               tmp = 4;
-               break;
-       case 1024:
-               tmp = 3;
-               break;
-       case 512:
-               tmp = 2;
-               break;
-       case 256:
-               tmp = 1;
-               break;
-       default:                /* oops, something is wrong */
-               ipath_dbg("Problem, ipath_ibmtu 0x%x not a valid IB MTU, "
-                         "treat as 2048\n", dd->ipath_ibmtu);
-               tmp = 4;
-               break;
-       }
-       portinfo[9] = (tmp << 28)
-               /* NeighborMTU */
-               /* Notimpl MasterSMSL */
-               | (1 << 20)
-
-               /* VLCap */
-               /* Notimpl InitType (actually, an SMA decision) */
-               /* VLHighLimit is 0 (only one VL) */
-               ; /* VLArbitrationHighCap is 0 (only one VL) */
-       /*
-        * Note: the chips support a maximum MTU of 4096, but the driver
-        * hasn't implemented this feature yet, so set the maximum
-        * to 2048.
-        */
-       portinfo[10] =  /* VLArbitrationLowCap is 0 (only one VL) */
-               /* InitTypeReply is SMA decision */
-               (4 << 16)       /* MTUCap 2048 */
-               | (7 << 13)     /* VLStallCount */
-               | (0x1f << 8)   /* HOQLife */
-               | (1 << 4)
-
-               /* OperationalVLs 0 */
-               /* PartitionEnforcementInbound */
-               /* PartitionEnforcementOutbound not enforced */
-               /* FilterRawinbound not enforced */
-               ;               /* FilterRawOutbound not enforced */
-       /* M_KeyViolations are not counted by hardware, SMA can count */
-       tmp = ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
-       /* P_KeyViolations are counted by hardware. */
-       portinfo[11] = ((tmp & 0xffff) << 0);
-       portinfo[12] =
-               /* Q_KeyViolations are not counted by hardware */
-               (1 << 8)
-
-               /* GUIDCap */
-               /* SubnetTimeOut handled by SMA */
-               /* RespTimeValue handled by SMA */
-               ;
-       /* LocalPhyErrors are programmed to max */
-       portinfo[12] |= (0xf << 20)
-               | (0xf << 16)   /* OverRunErrors are programmed to max */
-               ;
-
-       return simple_read_from_buffer(buf, count, ppos, portinfo,
-                                      sizeof portinfo);
-}
-
-static const struct file_operations atomic_port_info_ops = {
-       .read = atomic_port_info_read,
-};
-
 static ssize_t flash_read(struct file *file, char __user *buf,
                          size_t count, loff_t *ppos)
 {
@@ -427,22 +258,6 @@ static int create_device_files(struct super_block *sb,
                goto bail;
        }
 
-       ret = create_file("node_info", S_IFREG|S_IRUGO, dir, &tmp,
-                         &atomic_node_info_ops, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s/node_info) "
-                      "failed: %d\n", unit, ret);
-               goto bail;
-       }
-
-       ret = create_file("port_info", S_IFREG|S_IRUGO, dir, &tmp,
-                         &atomic_port_info_ops, dd);
-       if (ret) {
-               printk(KERN_ERR "create_file(%s/port_info) "
-                      "failed: %d\n", unit, ret);
-               goto bail;
-       }
-
        ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
                          &flash_ops, dd);
        if (ret) {
@@ -508,8 +323,6 @@ static int remove_device_files(struct super_block *sb,
        }
 
        remove_file(dir, "flash");
-       remove_file(dir, "port_info");
-       remove_file(dir, "node_info");
        remove_file(dir, "atomic_counters");
        d_delete(dir);
        ret = simple_rmdir(root->d_inode, dir);
index 650745d83faccbe0f3da87ffbc6feb11087b1137..ddbebe4bdb2703f08182d5304c1c93d11dac63a1 100644 (file)
@@ -631,56 +631,35 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 {
        char *n = NULL;
        u8 boardrev = dd->ipath_boardrev;
-       int ret;
+       int ret = 0;
 
        switch (boardrev) {
-       case 4:         /* Ponderosa is one of the bringup boards */
-               n = "Ponderosa";
-               break;
        case 5:
                /*
                 * original production board; two production levels, with
                 * different serial number ranges.   See ipath_ht_early_init() for
                 * case where we enable IPATH_GPIO_INTR for later serial # range.
+                * Original 112* serial number is no longer supported.
                 */
                n = "InfiniPath_QHT7040";
                break;
-       case 6:
-               n = "OEM_Board_3";
-               break;
        case 7:
                /* small form factor production board */
                n = "InfiniPath_QHT7140";
                break;
-       case 8:
-               n = "LS/X-1";
-               break;
-       case 9:         /* Comstock bringup test board */
-               n = "Comstock";
-               break;
-       case 10:
-               n = "OEM_Board_2";
-               break;
-       case 11:
-               n = "InfiniPath_HT-470"; /* obsoleted */
-               break;
-       case 12:
-               n = "OEM_Board_4";
-               break;
        default:                /* don't know, just print the number */
                ipath_dev_err(dd, "Don't yet know about board "
                              "with ID %u\n", boardrev);
                snprintf(name, namelen, "Unknown_InfiniPath_QHT7xxx_%u",
                         boardrev);
+               ret = 1;
                break;
        }
        if (n)
                snprintf(name, namelen, "%s", n);
 
-       if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 &&
-           dd->ipath_boardrev != 11) {
+       if (ret) {
                ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
-               ret = 1;
                goto bail;
        }
        if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
@@ -1554,10 +1533,25 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
                 * can use GPIO interrupts.  They have serial #'s starting
                 * with 128, rather than 112.
                 */
-               dd->ipath_flags |= IPATH_GPIO_INTR;
-       } else
-               ipath_dev_err(dd, "Unsupported InfiniPath serial "
-                             "number %.16s!\n", dd->ipath_serial);
+               if (dd->ipath_serial[0] == '1' &&
+                   dd->ipath_serial[1] == '2' &&
+                   dd->ipath_serial[2] == '8')
+                       dd->ipath_flags |= IPATH_GPIO_INTR;
+               else {
+                       ipath_dev_err(dd, "Unsupported InfiniPath board "
+                               "(serial number %.16s)!\n",
+                               dd->ipath_serial);
+                       return 1;
+               }
+       }
+
+       if (dd->ipath_minrev >= 4) {
+               /* Rev4+ reports extra errors via internal GPIO pins */
+               dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+               dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK;
+               ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
+                                dd->ipath_gpio_mask);
+       }
 
        return 0;
 }
@@ -1592,7 +1586,10 @@ static int ipath_ht_get_base_info(struct ipath_portdata *pd, void *kbase)
        struct ipath_base_info *kinfo = kbase;
 
        kinfo->spi_runtime_flags |= IPATH_RUNTIME_HT |
-               IPATH_RUNTIME_RCVHDR_COPY;
+               IPATH_RUNTIME_PIO_REGSWAPPED;
+
+       if (pd->port_dd->ipath_minrev < 4)
+               kinfo->spi_runtime_flags |= IPATH_RUNTIME_RCVHDR_COPY;
 
        return 0;
 }
index 5b6ac9a1a7095ba28ec986ecd89a90e4939fdb02..0103d6f4847b7dcd354548a281b58fac48b1f8f7 100644 (file)
@@ -1143,11 +1143,14 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
                        pa |= 2 << 29;
        }
 
-       /* workaround chip bug 9437 by writing each TID twice
-        * and holding a spinlock around the writes, so they don't
-        * intermix with other TID (eager or expected) writes
-        * Unfortunately, this call can be done from interrupt level
-        * for the port 0 eager TIDs, so we have to use irqsave
+       /*
+        * Workaround chip bug 9437 by writing the scratch register
+        * before and after the TID, and with an io write barrier.
+        * We use a spinlock around the writes, so they can't intermix
+        * with other TID (eager or expected) writes (the chip bug
+        * is triggered by back to back TID writes). Unfortunately, this
+        * call can be done from interrupt level for the port 0 eager TIDs,
+        * so we have to use irqsave locks.
         */
        spin_lock_irqsave(&dd->ipath_tid_lock, flags);
        ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf);
@@ -1273,6 +1276,8 @@ static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
 static int ipath_pe_early_init(struct ipath_devdata *dd)
 {
        dd->ipath_flags |= IPATH_4BYTE_TID;
+       if (ipath_unordered_wc())
+               dd->ipath_flags |= IPATH_PIO_FLUSH_WC;
 
        /*
         * For openfabrics, we need to be able to handle an IB header of
@@ -1343,7 +1348,8 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
        dd = pd->port_dd;
 
 done:
-       kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
+       kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE |
+               IPATH_RUNTIME_FORCE_PIOAVAIL | IPATH_RUNTIME_PIO_REGSWAPPED;
        return 0;
 }
 
index b29fe7e9b11a598686052f3797b8f0b2c1c6f3d1..6a5dd5cd773d13cead689c6ffc16a702af4045e4 100644 (file)
@@ -275,6 +275,16 @@ static char *ib_linkstate(u32 linkstate)
        return ret;
 }
 
+void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
+{
+       struct ib_event event;
+
+       event.device = &dd->verbs_dev->ibdev;
+       event.element.port_num = 1;
+       event.event = ev;
+       ib_dispatch_event(&event);
+}
+
 static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                                     ipath_err_t errs, int noprint)
 {
@@ -373,6 +383,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
        dd->ipath_ibpollcnt = 0;        /* some state other than 2 or 3 */
        ipath_stats.sps_iblink++;
        if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+               if (dd->ipath_flags & IPATH_LINKACTIVE)
+                       signal_ib_event(dd, IB_EVENT_PORT_ERR);
                dd->ipath_flags |= IPATH_LINKDOWN;
                dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
                                     | IPATH_LINKACTIVE |
@@ -405,7 +417,10 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                *dd->ipath_statusp |=
                        IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
                dd->ipath_f_setextled(dd, lstate, ltstate);
+               signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
        } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
+               if (dd->ipath_flags & IPATH_LINKACTIVE)
+                       signal_ib_event(dd, IB_EVENT_PORT_ERR);
                /*
                 * set INIT and DOWN.  Down is checked by most of the other
                 * code, but INIT is useful to know in a few places.
@@ -418,6 +433,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
                                        | IPATH_STATUS_IB_READY);
                dd->ipath_f_setextled(dd, lstate, ltstate);
        } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
+               if (dd->ipath_flags & IPATH_LINKACTIVE)
+                       signal_ib_event(dd, IB_EVENT_PORT_ERR);
                dd->ipath_flags |= IPATH_LINKARMED;
                dd->ipath_flags &=
                        ~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
@@ -688,17 +705,9 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
                                        chkerrpkts = 1;
                                dd->ipath_lastrcvhdrqtails[i] = tl;
                                pd->port_hdrqfull++;
-                               if (test_bit(IPATH_PORT_WAITING_OVERFLOW,
-                                            &pd->port_flag)) {
-                                       clear_bit(
-                                         IPATH_PORT_WAITING_OVERFLOW,
-                                         &pd->port_flag);
-                                       set_bit(
-                                         IPATH_PORT_WAITING_OVERFLOW,
-                                         &pd->int_flag);
-                                       wake_up_interruptible(
-                                         &pd->port_wait);
-                               }
+                               /* flush hdrqfull so that poll() sees it */
+                               wmb();
+                               wake_up_interruptible(&pd->port_wait);
                        }
                }
        }
@@ -960,6 +969,8 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
        int i;
        int rcvdint = 0;
 
+       /* test_bit below needs this... */
+       rmb();
        portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
                 dd->ipath_i_rcvavail_mask)
                | ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
@@ -967,22 +978,15 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
        for (i = 1; i < dd->ipath_cfgports; i++) {
                struct ipath_portdata *pd = dd->ipath_pd[i];
                if (portr & (1 << i) && pd && pd->port_cnt) {
-                       if (test_bit(IPATH_PORT_WAITING_RCV,
-                                    &pd->port_flag)) {
-                               clear_bit(IPATH_PORT_WAITING_RCV,
-                                         &pd->port_flag);
-                               set_bit(IPATH_PORT_WAITING_RCV,
-                                       &pd->int_flag);
+                       if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
+                                              &pd->port_flag)) {
                                clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
                                          &dd->ipath_rcvctrl);
                                wake_up_interruptible(&pd->port_wait);
                                rcvdint = 1;
-                       } else if (test_bit(IPATH_PORT_WAITING_URG,
-                                           &pd->port_flag)) {
-                               clear_bit(IPATH_PORT_WAITING_URG,
-                                         &pd->port_flag);
-                               set_bit(IPATH_PORT_WAITING_URG,
-                                       &pd->int_flag);
+                       } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
+                                                     &pd->port_flag)) {
+                               pd->port_urgent++;
                                wake_up_interruptible(&pd->port_wait);
                        }
                }
@@ -1085,8 +1089,8 @@ irqreturn_t ipath_intr(int irq, void *data)
                 * GPIO_2 indicates (on some HT4xx boards) that a packet
                 *        has arrived for Port 0. Checking for this
                 *        is controlled by flag IPATH_GPIO_INTR.
-                * GPIO_3..5 on IBA6120 Rev2 chips indicate errors
-                *        that we need to count. Checking for this
+                * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
+                *        errors that we need to count. Checking for this
                 *        is controlled by flag IPATH_GPIO_ERRINTRS.
                 */
                u32 gpiostatus;
@@ -1137,10 +1141,8 @@ irqreturn_t ipath_intr(int irq, void *data)
                        /*
                         * Some unexpected bits remain. If they could have
                         * caused the interrupt, complain and clear.
-                        * MEA: this is almost certainly non-ideal.
-                        * we should look into auto-disable of unexpected
-                        * GPIO interrupts, possibly on a "three strikes"
-                        * basis.
+                        * To avoid repetition of this condition, also clear
+                        * the mask. It is almost certainly due to error.
                         */
                        const u32 mask = (u32) dd->ipath_gpio_mask;
 
@@ -1148,6 +1150,10 @@ irqreturn_t ipath_intr(int irq, void *data)
                                ipath_dbg("Unexpected GPIO IRQ bits %x\n",
                                  gpiostatus & mask);
                                to_clear |= (gpiostatus & mask);
+                               dd->ipath_gpio_mask &= ~(gpiostatus & mask);
+                               ipath_write_kreg(dd,
+                                       dd->ipath_kregs->kr_gpio_mask,
+                                       dd->ipath_gpio_mask);
                        }
                }
                if (to_clear) {
index 7a7966f7e4fff96cc5a2027072fb15893126d927..8786dd7922e4e3916170af24d8cb8752793ea7ba 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <asm/io.h>
+#include <rdma/ib_verbs.h>
 
 #include "ipath_common.h"
 #include "ipath_debug.h"
@@ -139,6 +140,12 @@ struct ipath_portdata {
        u32 port_pionowait;
        /* total number of rcvhdrqfull errors */
        u32 port_hdrqfull;
+       /* saved total number of rcvhdrqfull errors for poll edge trigger */
+       u32 port_hdrqfull_poll;
+       /* total number of polled urgent packets */
+       u32 port_urgent;
+       /* saved total number of polled urgent packets for poll edge trigger */
+       u32 port_urgent_poll;
        /* pid of process using this port */
        pid_t port_pid;
        /* same size as task_struct .comm[] */
@@ -724,6 +731,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 #define IPATH_LINKACTIVE    0x200
                /* link current state is unknown */
 #define IPATH_LINKUNK       0x400
+               /* Write combining flush needed for PIO */
+#define IPATH_PIO_FLUSH_WC  0x1000
                /* no IB cable, or no device on IB cable */
 #define IPATH_NOCABLE       0x4000
                /* Supports port zero per packet receive interrupts via
@@ -755,8 +764,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 #define IPATH_PORT_MASTER_UNINIT 4
                /* waiting for an urgent packet to arrive */
 #define IPATH_PORT_WAITING_URG 5
-               /* waiting for a header overflow */
-#define IPATH_PORT_WAITING_OVERFLOW 6
 
 /* free up any allocated data at closes */
 void ipath_free_data(struct ipath_portdata *dd);
@@ -769,6 +776,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
 int ipath_update_eeprom_log(struct ipath_devdata *dd);
 void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
 
 /*
  * Set LED override, only the two LSBs have "public" meaning, but
index d61c030445451076f1d3fae4d01cde99f034cb44..3d1432d1e3f40dbe7a3da11e7dbde429c21c7e19 100644 (file)
@@ -245,7 +245,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 
        /* Only return the mkey if the protection field allows it. */
        if (smp->method == IB_MGMT_METHOD_SET || dev->mkey == smp->mkey ||
-           (dev->mkeyprot_resv_lmc >> 6) == 0)
+           dev->mkeyprot == 0)
                pip->mkey = dev->mkey;
        pip->gid_prefix = dev->gid_prefix;
        lid = dev->dd->ipath_lid;
@@ -264,7 +264,7 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
        pip->portphysstate_linkdown =
                (ipath_cvt_physportstate[ibcstat & 0xf] << 4) |
                (get_linkdowndefaultstate(dev->dd) ? 1 : 2);
-       pip->mkeyprot_resv_lmc = dev->mkeyprot_resv_lmc;
+       pip->mkeyprot_resv_lmc = (dev->mkeyprot << 6) | dev->dd->ipath_lmc;
        pip->linkspeedactive_enabled = 0x11;    /* 2.5Gbps, 2.5Gbps */
        switch (dev->dd->ipath_ibmtu) {
        case 4096:
@@ -401,7 +401,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
        struct ib_port_info *pip = (struct ib_port_info *)smp->data;
        struct ib_event event;
        struct ipath_ibdev *dev;
-       u32 flags;
+       struct ipath_devdata *dd;
        char clientrereg = 0;
        u16 lid, smlid;
        u8 lwe;
@@ -415,6 +415,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
                goto err;
 
        dev = to_idev(ibdev);
+       dd = dev->dd;
        event.device = ibdev;
        event.element.port_num = port;
 
@@ -423,11 +424,12 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
        dev->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
 
        lid = be16_to_cpu(pip->lid);
-       if (lid != dev->dd->ipath_lid) {
+       if (dd->ipath_lid != lid ||
+           dd->ipath_lmc != (pip->mkeyprot_resv_lmc & 7)) {
                /* Must be a valid unicast LID address. */
                if (lid == 0 || lid >= IPATH_MULTICAST_LID_BASE)
                        goto err;
-               ipath_set_lid(dev->dd, lid, pip->mkeyprot_resv_lmc & 7);
+               ipath_set_lid(dd, lid, pip->mkeyprot_resv_lmc & 7);
                event.event = IB_EVENT_LID_CHANGE;
                ib_dispatch_event(&event);
        }
@@ -461,18 +463,18 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
        case 0: /* NOP */
                break;
        case 1: /* SLEEP */
-               if (set_linkdowndefaultstate(dev->dd, 1))
+               if (set_linkdowndefaultstate(dd, 1))
                        goto err;
                break;
        case 2: /* POLL */
-               if (set_linkdowndefaultstate(dev->dd, 0))
+               if (set_linkdowndefaultstate(dd, 0))
                        goto err;
                break;
        default:
                goto err;
        }
 
-       dev->mkeyprot_resv_lmc = pip->mkeyprot_resv_lmc;
+       dev->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
        dev->vl_high_limit = pip->vl_high_limit;
 
        switch ((pip->neighbormtu_mastersmsl >> 4) & 0xF) {
@@ -495,7 +497,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
                /* XXX We have already partially updated our state! */
                goto err;
        }
-       ipath_set_mtu(dev->dd, mtu);
+       ipath_set_mtu(dd, mtu);
 
        dev->sm_sl = pip->neighbormtu_mastersmsl & 0xF;
 
@@ -511,16 +513,16 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
         * later.
         */
        if (pip->pkey_violations == 0)
-               dev->z_pkey_violations = ipath_get_cr_errpkey(dev->dd);
+               dev->z_pkey_violations = ipath_get_cr_errpkey(dd);
 
        if (pip->qkey_violations == 0)
                dev->qkey_violations = 0;
 
        ore = pip->localphyerrors_overrunerrors;
-       if (set_phyerrthreshold(dev->dd, (ore >> 4) & 0xF))
+       if (set_phyerrthreshold(dd, (ore >> 4) & 0xF))
                goto err;
 
-       if (set_overrunthreshold(dev->dd, (ore & 0xF)))
+       if (set_overrunthreshold(dd, (ore & 0xF)))
                goto err;
 
        dev->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
@@ -538,7 +540,6 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
         * is down or is being set to down.
         */
        state = pip->linkspeed_portstate & 0xF;
-       flags = dev->dd->ipath_flags;
        lstate = (pip->portphysstate_linkdown >> 4) & 0xF;
        if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP))
                goto err;
@@ -554,7 +555,7 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
                /* FALLTHROUGH */
        case IB_PORT_DOWN:
                if (lstate == 0)
-                       if (get_linkdowndefaultstate(dev->dd))
+                       if (get_linkdowndefaultstate(dd))
                                lstate = IPATH_IB_LINKDOWN_SLEEP;
                        else
                                lstate = IPATH_IB_LINKDOWN;
@@ -566,27 +567,13 @@ static int recv_subn_set_portinfo(struct ib_smp *smp,
                        lstate = IPATH_IB_LINKDOWN_DISABLE;
                else
                        goto err;
-               ipath_set_linkstate(dev->dd, lstate);
-               if (flags & IPATH_LINKACTIVE) {
-                       event.event = IB_EVENT_PORT_ERR;
-                       ib_dispatch_event(&event);
-               }
+               ipath_set_linkstate(dd, lstate);
                break;
        case IB_PORT_ARMED:
-               if (!(flags & (IPATH_LINKINIT | IPATH_LINKACTIVE)))
-                       break;
-               ipath_set_linkstate(dev->dd, IPATH_IB_LINKARM);
-               if (flags & IPATH_LINKACTIVE) {
-                       event.event = IB_EVENT_PORT_ERR;
-                       ib_dispatch_event(&event);
-               }
+               ipath_set_linkstate(dd, IPATH_IB_LINKARM);
                break;
        case IB_PORT_ACTIVE:
-               if (!(flags & IPATH_LINKARMED))
-                       break;
-               ipath_set_linkstate(dev->dd, IPATH_IB_LINKACTIVE);
-               event.event = IB_EVENT_PORT_ACTIVE;
-               ib_dispatch_event(&event);
+               ipath_set_linkstate(dd, IPATH_IB_LINKACTIVE);
                break;
        default:
                /* XXX We have already partially updated our state! */
@@ -1350,7 +1337,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
        if (dev->mkey_lease_timeout && jiffies >= dev->mkey_lease_timeout) {
                /* Clear timeout and mkey protection field. */
                dev->mkey_lease_timeout = 0;
-               dev->mkeyprot_resv_lmc &= 0x3F;
+               dev->mkeyprot = 0;
        }
 
        /*
@@ -1361,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
            dev->mkey != smp->mkey &&
            (smp->method == IB_MGMT_METHOD_SET ||
             (smp->method == IB_MGMT_METHOD_GET &&
-             (dev->mkeyprot_resv_lmc >> 7) != 0))) {
+             dev->mkeyprot >= 2))) {
                if (dev->mkey_violations != 0xFFFF)
                        ++dev->mkey_violations;
                if (dev->mkey_lease_timeout ||
index 1324b35ff1f855b460e1a62723cd8f95a1023838..6a41fdbc8e57dfca779759de14137fe33b811015 100644 (file)
@@ -338,6 +338,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
        qp->s_busy = 0;
        qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
        qp->s_hdrwords = 0;
+       qp->s_wqe = NULL;
        qp->s_psn = 0;
        qp->r_psn = 0;
        qp->r_msn = 0;
@@ -376,13 +377,15 @@ static void ipath_reset_qp(struct ipath_qp *qp)
  * @err: the receive completion error to signal if a RWQE is active
  *
  * Flushes both send and receive work queues.
+ * Returns true if last WQE event should be generated.
  * The QP s_lock should be held and interrupts disabled.
  */
 
-void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
+int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 {
        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
        struct ib_wc wc;
+       int ret = 0;
 
        ipath_dbg("QP%d/%d in error state\n",
                  qp->ibqp.qp_num, qp->remote_qpn);
@@ -453,7 +456,10 @@ void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
                wq->tail = tail;
 
                spin_unlock(&qp->r_rq.lock);
-       }
+       } else if (qp->ibqp.event_handler)
+               ret = 1;
+
+       return ret;
 }
 
 /**
@@ -472,6 +478,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        struct ipath_qp *qp = to_iqp(ibqp);
        enum ib_qp_state cur_state, new_state;
        unsigned long flags;
+       int lastwqe = 0;
        int ret;
 
        spin_lock_irqsave(&qp->s_lock, flags);
@@ -531,7 +538,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                break;
 
        case IB_QPS_ERR:
-               ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+               lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
                break;
 
        default:
@@ -590,6 +597,14 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        qp->state = new_state;
        spin_unlock_irqrestore(&qp->s_lock, flags);
 
+       if (lastwqe) {
+               struct ib_event ev;
+
+               ev.device = qp->ibqp.device;
+               ev.element.qp = &qp->ibqp;
+               ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+               qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+       }
        ret = 0;
        goto bail;
 
@@ -751,6 +766,9 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
        switch (init_attr->qp_type) {
        case IB_QPT_UC:
        case IB_QPT_RC:
+       case IB_QPT_UD:
+       case IB_QPT_SMI:
+       case IB_QPT_GSI:
                sz = sizeof(struct ipath_sge) *
                        init_attr->cap.max_send_sge +
                        sizeof(struct ipath_swqe);
@@ -759,10 +777,6 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
                        ret = ERR_PTR(-ENOMEM);
                        goto bail;
                }
-               /* FALLTHROUGH */
-       case IB_QPT_UD:
-       case IB_QPT_SMI:
-       case IB_QPT_GSI:
                sz = sizeof(*qp);
                if (init_attr->srq) {
                        struct ipath_srq *srq = to_isrq(init_attr->srq);
@@ -805,8 +819,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
                spin_lock_init(&qp->r_rq.lock);
                atomic_set(&qp->refcount, 0);
                init_waitqueue_head(&qp->wait);
-               tasklet_init(&qp->s_task, ipath_do_ruc_send,
-                            (unsigned long)qp);
+               tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
                INIT_LIST_HEAD(&qp->piowait);
                INIT_LIST_HEAD(&qp->timerwait);
                qp->state = IB_QPS_RESET;
index 46744ea2babdb336510b3772ed7dfd9fb6d8608c..5c29b2bfea17b7c6236181f214b4224fbf4cb60d 100644 (file)
@@ -81,9 +81,8 @@ static void ipath_init_restart(struct ipath_qp *qp, struct ipath_swqe *wqe)
  * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  */
-static int ipath_make_rc_ack(struct ipath_qp *qp,
-                            struct ipath_other_headers *ohdr,
-                            u32 pmtu, u32 *bth0p, u32 *bth2p)
+static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
+                            struct ipath_other_headers *ohdr, u32 pmtu)
 {
        struct ipath_ack_entry *e;
        u32 hwords;
@@ -192,8 +191,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
        }
        qp->s_hdrwords = hwords;
        qp->s_cur_size = len;
-       *bth0p = bth0 | (1 << 22); /* Set M bit */
-       *bth2p = bth2;
+       ipath_make_ruc_header(dev, qp, ohdr, bth0, bth2);
        return 1;
 
 bail:
@@ -203,32 +201,39 @@ bail:
 /**
  * ipath_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  * @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- * @bth0p: pointer to the BTH opcode word
- * @bth2p: pointer to the BTH PSN word
  *
  * Return 1 if constructed; otherwise, return 0.
- * Note the QP s_lock must be held and interrupts disabled.
  */
-int ipath_make_rc_req(struct ipath_qp *qp,
-                     struct ipath_other_headers *ohdr,
-                     u32 pmtu, u32 *bth0p, u32 *bth2p)
+int ipath_make_rc_req(struct ipath_qp *qp)
 {
        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+       struct ipath_other_headers *ohdr;
        struct ipath_sge_state *ss;
        struct ipath_swqe *wqe;
        u32 hwords;
        u32 len;
        u32 bth0;
        u32 bth2;
+       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
        char newreq;
+       unsigned long flags;
+       int ret = 0;
+
+       ohdr = &qp->s_hdr.u.oth;
+       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+               ohdr = &qp->s_hdr.u.l.oth;
+
+       /*
+        * The lock is needed to synchronize between the sending tasklet,
+        * the receive interrupt handler, and timeout resends.
+        */
+       spin_lock_irqsave(&qp->s_lock, flags);
 
        /* Sending responses has higher priority over sending requests. */
        if ((qp->r_head_ack_queue != qp->s_tail_ack_queue ||
             (qp->s_flags & IPATH_S_ACK_PENDING) ||
             qp->s_ack_state != OP(ACKNOWLEDGE)) &&
-           ipath_make_rc_ack(qp, ohdr, pmtu, bth0p, bth2p))
+           ipath_make_rc_ack(dev, qp, ohdr, pmtu))
                goto done;
 
        if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) ||
@@ -560,13 +565,12 @@ int ipath_make_rc_req(struct ipath_qp *qp,
        qp->s_hdrwords = hwords;
        qp->s_cur_sge = ss;
        qp->s_cur_size = len;
-       *bth0p = bth0 | (qp->s_state << 24);
-       *bth2p = bth2;
+       ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
 done:
-       return 1;
-
+       ret = 1;
 bail:
-       return 0;
+       spin_unlock_irqrestore(&qp->s_lock, flags);
+       return ret;
 }
 
 /**
@@ -627,7 +631,7 @@ static void send_rc_ack(struct ipath_qp *qp)
        /*
         * If we can send the ACK, clear the ACK state.
         */
-       if (ipath_verbs_send(dev->dd, hwords, (u32 *) &hdr, 0, NULL) == 0) {
+       if (ipath_verbs_send(qp, &hdr, hwords, NULL, 0) == 0) {
                dev->n_unicast_xmit++;
                goto done;
        }
@@ -757,7 +761,9 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
                wc->vendor_err = 0;
                wc->byte_len = 0;
                wc->qp = &qp->ibqp;
+               wc->imm_data = 0;
                wc->src_qp = qp->remote_qpn;
+               wc->wc_flags = 0;
                wc->pkey_index = 0;
                wc->slid = qp->remote_ah_attr.dlid;
                wc->sl = qp->remote_ah_attr.sl;
@@ -1041,7 +1047,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
                        wc.vendor_err = 0;
                        wc.byte_len = 0;
                        wc.qp = &qp->ibqp;
+                       wc.imm_data = 0;
                        wc.src_qp = qp->remote_qpn;
+                       wc.wc_flags = 0;
                        wc.pkey_index = 0;
                        wc.slid = qp->remote_ah_attr.dlid;
                        wc.sl = qp->remote_ah_attr.sl;
@@ -1453,6 +1461,19 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
                        qp->r_ack_psn = qp->r_psn - 1;
                        goto send_ack;
                }
+               /*
+                * Try to send a simple ACK to work around a Mellanox bug
+                * which doesn't accept a RDMA read response or atomic
+                * response as an ACK for earlier SENDs or RDMA writes.
+                */
+               if (qp->r_head_ack_queue == qp->s_tail_ack_queue &&
+                   !(qp->s_flags & IPATH_S_ACK_PENDING) &&
+                   qp->s_ack_state == OP(ACKNOWLEDGE)) {
+                       spin_unlock_irqrestore(&qp->s_lock, flags);
+                       qp->r_nak_state = 0;
+                       qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
+                       goto send_ack;
+               }
                /*
                 * Resend the RDMA read or atomic op which
                 * ACKs this duplicate request.
@@ -1476,11 +1497,21 @@ send_ack:
 static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
 {
        unsigned long flags;
+       int lastwqe;
 
        spin_lock_irqsave(&qp->s_lock, flags);
        qp->state = IB_QPS_ERR;
-       ipath_error_qp(qp, err);
+       lastwqe = ipath_error_qp(qp, err);
        spin_unlock_irqrestore(&qp->s_lock, flags);
+
+       if (lastwqe) {
+               struct ib_event ev;
+
+               ev.device = qp->ibqp.device;
+               ev.element.qp = &qp->ibqp;
+               ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+               qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+       }
 }
 
 static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
index c69c2523944339eec85a832edc6a0f596267f9ec..4b6b7ee8e5c14f4dda64e3ae0fb05e784b07136e 100644 (file)
@@ -31,6 +31,8 @@
  * SOFTWARE.
  */
 
+#include <linux/spinlock.h>
+
 #include "ipath_verbs.h"
 #include "ipath_kernel.h"
 
@@ -106,27 +108,30 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
        spin_unlock_irqrestore(&dev->pending_lock, flags);
 }
 
-static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+/**
+ * ipath_init_sge - Validate a RWQE and fill in the SGE state
+ * @qp: the QP
+ *
+ * Return 1 if OK.
+ */
+int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+                  u32 *lengthp, struct ipath_sge_state *ss)
 {
-       int user = to_ipd(qp->ibqp.pd)->user;
        int i, j, ret;
        struct ib_wc wc;
 
-       qp->r_len = 0;
+       *lengthp = 0;
        for (i = j = 0; i < wqe->num_sge; i++) {
                if (wqe->sg_list[i].length == 0)
                        continue;
                /* Check LKEY */
-               if ((user && wqe->sg_list[i].lkey == 0) ||
-                   !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
-                                  IB_ACCESS_LOCAL_WRITE))
+               if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
+                                  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
                        goto bad_lkey;
-               qp->r_len += wqe->sg_list[i].length;
+               *lengthp += wqe->sg_list[i].length;
                j++;
        }
-       qp->r_sge.sge = qp->r_sg_list[0];
-       qp->r_sge.sg_list = qp->r_sg_list + 1;
-       qp->r_sge.num_sge = j;
+       ss->num_sge = j;
        ret = 1;
        goto bail;
 
@@ -172,6 +177,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
        u32 tail;
        int ret;
 
+       qp->r_sge.sg_list = qp->r_sg_list;
+
        if (qp->ibqp.srq) {
                srq = to_isrq(qp->ibqp.srq);
                handler = srq->ibsrq.event_handler;
@@ -199,7 +206,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
                wqe = get_rwqe_ptr(rq, tail);
                if (++tail >= rq->size)
                        tail = 0;
-       } while (!wr_id_only && !init_sge(qp, wqe));
+       } while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len,
+                                               &qp->r_sge));
        qp->r_wr_id = wqe->wr_id;
        wq->tail = tail;
 
@@ -239,9 +247,9 @@ bail:
 
 /**
  * ipath_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the loopback QP
+ * @sqp: the sending QP
  *
- * This is called from ipath_do_uc_send() or ipath_do_rc_send() to
+ * This is called from ipath_do_send() to
  * forward a WQE addressed to the same HCA.
  * Note that although we are single threaded due to the tasklet, we still
  * have to protect against post_send().  We don't have to worry about
@@ -450,40 +458,18 @@ again:
        wc.byte_len = wqe->length;
        wc.qp = &qp->ibqp;
        wc.src_qp = qp->remote_qpn;
-       /* XXX do we know which pkey matched? Only needed for GSI. */
        wc.pkey_index = 0;
        wc.slid = qp->remote_ah_attr.dlid;
        wc.sl = qp->remote_ah_attr.sl;
        wc.dlid_path_bits = 0;
+       wc.port_num = 1;
        /* Signal completion event if the solicited bit is set. */
        ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
                       wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 send_comp:
        sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-
-       if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-           (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-               wc.wr_id = wqe->wr.wr_id;
-               wc.status = IB_WC_SUCCESS;
-               wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-               wc.vendor_err = 0;
-               wc.byte_len = wqe->length;
-               wc.qp = &sqp->ibqp;
-               wc.src_qp = 0;
-               wc.pkey_index = 0;
-               wc.slid = 0;
-               wc.sl = 0;
-               wc.dlid_path_bits = 0;
-               wc.port_num = 0;
-               ipath_cq_enter(to_icq(sqp->ibqp.send_cq), &wc, 0);
-       }
-
-       /* Update s_last now that we are finished with the SWQE */
-       spin_lock_irqsave(&sqp->s_lock, flags);
-       if (++sqp->s_last >= sqp->s_size)
-               sqp->s_last = 0;
-       spin_unlock_irqrestore(&sqp->s_lock, flags);
+       ipath_send_complete(sqp, wqe, IB_WC_SUCCESS);
        goto again;
 
 done:
@@ -491,13 +477,11 @@ done:
                wake_up(&qp->wait);
 }
 
-static int want_buffer(struct ipath_devdata *dd)
+static void want_buffer(struct ipath_devdata *dd)
 {
        set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
        ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
                         dd->ipath_sendctrl);
-
-       return 0;
 }
 
 /**
@@ -507,14 +491,11 @@ static int want_buffer(struct ipath_devdata *dd)
  *
  * Called when we run out of PIO buffers.
  */
-static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
+static void ipath_no_bufs_available(struct ipath_qp *qp,
+                                   struct ipath_ibdev *dev)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&dev->pending_lock, flags);
-       if (list_empty(&qp->piowait))
-               list_add_tail(&qp->piowait, &dev->piowait);
-       spin_unlock_irqrestore(&dev->pending_lock, flags);
        /*
         * Note that as soon as want_buffer() is called and
         * possibly before it returns, ipath_ib_piobufavail()
@@ -524,100 +505,13 @@ static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev
         * We leave the busy flag set so that another post send doesn't
         * try to put the same QP on the piowait list again.
         */
+       spin_lock_irqsave(&dev->pending_lock, flags);
+       list_add_tail(&qp->piowait, &dev->piowait);
+       spin_unlock_irqrestore(&dev->pending_lock, flags);
        want_buffer(dev->dd);
        dev->n_piowait++;
 }
 
-/**
- * ipath_post_ruc_send - post RC and UC sends
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
-       struct ipath_swqe *wqe;
-       unsigned long flags;
-       u32 next;
-       int i, j;
-       int acc;
-       int ret;
-
-       /*
-        * Don't allow RDMA reads or atomic operations on UC or
-        * undefined operations.
-        * Make sure buffer is large enough to hold the result for atomics.
-        */
-       if (qp->ibqp.qp_type == IB_QPT_UC) {
-               if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-       } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
-               ret = -EINVAL;
-               goto bail;
-       } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-                  (wr->num_sge == 0 ||
-                   wr->sg_list[0].length < sizeof(u64) ||
-                   wr->sg_list[0].addr & (sizeof(u64) - 1))) {
-               ret = -EINVAL;
-               goto bail;
-       } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       /* IB spec says that num_sge == 0 is OK. */
-       if (wr->num_sge > qp->s_max_sge) {
-               ret = -ENOMEM;
-               goto bail;
-       }
-       spin_lock_irqsave(&qp->s_lock, flags);
-       next = qp->s_head + 1;
-       if (next >= qp->s_size)
-               next = 0;
-       if (next == qp->s_last) {
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       wqe = get_swqe_ptr(qp, qp->s_head);
-       wqe->wr = *wr;
-       wqe->ssn = qp->s_ssn++;
-       wqe->sg_list[0].mr = NULL;
-       wqe->sg_list[0].vaddr = NULL;
-       wqe->sg_list[0].length = 0;
-       wqe->sg_list[0].sge_length = 0;
-       wqe->length = 0;
-       acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
-       for (i = 0, j = 0; i < wr->num_sge; i++) {
-               if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               if (wr->sg_list[i].length == 0)
-                       continue;
-               if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
-                                  acc)) {
-                       spin_unlock_irqrestore(&qp->s_lock, flags);
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               wqe->length += wr->sg_list[i].length;
-               j++;
-       }
-       wqe->wr.num_sge = j;
-       qp->s_head = next;
-       spin_unlock_irqrestore(&qp->s_lock, flags);
-
-       ipath_do_ruc_send((unsigned long) qp);
-
-       ret = 0;
-
-bail:
-       return ret;
-}
-
 /**
  * ipath_make_grh - construct a GRH header
  * @dev: a pointer to the ipath device
@@ -648,39 +542,66 @@ u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
        return sizeof(struct ib_grh) / sizeof(u32);
 }
 
+void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
+                          struct ipath_other_headers *ohdr,
+                          u32 bth0, u32 bth2)
+{
+       u16 lrh0;
+       u32 nwords;
+       u32 extra_bytes;
+
+       /* Construct the header. */
+       extra_bytes = -qp->s_cur_size & 3;
+       nwords = (qp->s_cur_size + extra_bytes) >> 2;
+       lrh0 = IPATH_LRH_BTH;
+       if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
+                                                &qp->remote_ah_attr.grh,
+                                                qp->s_hdrwords, nwords);
+               lrh0 = IPATH_LRH_GRH;
+       }
+       lrh0 |= qp->remote_ah_attr.sl << 4;
+       qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+       qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+       qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
+       bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
+       bth0 |= extra_bytes << 20;
+       ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
+       ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+       ohdr->bth[2] = cpu_to_be32(bth2);
+}
+
 /**
- * ipath_do_ruc_send - perform a send on an RC or UC QP
+ * ipath_do_send - perform a send on a QP
  * @data: contains a pointer to the QP
  *
  * Process entries in the send work queue until credit or queue is
  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, after we drop the QP s_lock, two threads could send
- * packets out of order.
+ * Otherwise, two threads could send packets out of order.
  */
-void ipath_do_ruc_send(unsigned long data)
+void ipath_do_send(unsigned long data)
 {
        struct ipath_qp *qp = (struct ipath_qp *)data;
        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-       unsigned long flags;
-       u16 lrh0;
-       u32 nwords;
-       u32 extra_bytes;
-       u32 bth0;
-       u32 bth2;
-       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-       struct ipath_other_headers *ohdr;
+       int (*make_req)(struct ipath_qp *qp);
 
        if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
                goto bail;
 
-       if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
+       if ((qp->ibqp.qp_type == IB_QPT_RC ||
+            qp->ibqp.qp_type == IB_QPT_UC) &&
+           qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
                ipath_ruc_loopback(qp);
                goto clear;
        }
 
-       ohdr = &qp->s_hdr.u.oth;
-       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-               ohdr = &qp->s_hdr.u.l.oth;
+       if (qp->ibqp.qp_type == IB_QPT_RC)
+              make_req = ipath_make_rc_req;
+       else if (qp->ibqp.qp_type == IB_QPT_UC)
+              make_req = ipath_make_uc_req;
+       else
+              make_req = ipath_make_ud_req;
 
 again:
        /* Check for a constructed packet to be sent. */
@@ -689,9 +610,8 @@ again:
                 * If no PIO bufs are available, return.  An interrupt will
                 * call ipath_ib_piobufavail() when one is available.
                 */
-               if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
-                                    (u32 *) &qp->s_hdr, qp->s_cur_size,
-                                    qp->s_cur_sge)) {
+               if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
+                                    qp->s_cur_sge, qp->s_cur_size)) {
                        ipath_no_bufs_available(qp, dev);
                        goto bail;
                }
@@ -700,54 +620,42 @@ again:
                qp->s_hdrwords = 0;
        }
 
-       /*
-        * The lock is needed to synchronize between setting
-        * qp->s_ack_state, resend timer, and post_send().
-        */
-       spin_lock_irqsave(&qp->s_lock, flags);
-
-       if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
-              ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
-              ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
-               /*
-                * Clear the busy bit before unlocking to avoid races with
-                * adding new work queue items and then failing to process
-                * them.
-                */
-               clear_bit(IPATH_S_BUSY, &qp->s_busy);
-               spin_unlock_irqrestore(&qp->s_lock, flags);
-               goto bail;
-       }
+       if (make_req(qp))
+               goto again;
+clear:
+       clear_bit(IPATH_S_BUSY, &qp->s_busy);
+bail:;
+}
 
-       spin_unlock_irqrestore(&qp->s_lock, flags);
+void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
+                        enum ib_wc_status status)
+{
+       u32 last = qp->s_last;
 
-       /* Construct the header. */
-       extra_bytes = (4 - qp->s_cur_size) & 3;
-       nwords = (qp->s_cur_size + extra_bytes) >> 2;
-       lrh0 = IPATH_LRH_BTH;
-       if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
-                                                &qp->remote_ah_attr.grh,
-                                                qp->s_hdrwords, nwords);
-               lrh0 = IPATH_LRH_GRH;
-       }
-       lrh0 |= qp->remote_ah_attr.sl << 4;
-       qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
-       qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
-                                      SIZE_OF_CRC);
-       qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
-       bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
-       bth0 |= extra_bytes << 20;
-       ohdr->bth[0] = cpu_to_be32(bth0);
-       ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
-       ohdr->bth[2] = cpu_to_be32(bth2);
+       if (++last == qp->s_size)
+               last = 0;
+       qp->s_last = last;
 
-       /* Check for more work to do. */
-       goto again;
+       /* See ch. 11.2.4.1 and 10.7.3.1 */
+       if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
+           (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+           status != IB_WC_SUCCESS) {
+               struct ib_wc wc;
 
-clear:
-       clear_bit(IPATH_S_BUSY, &qp->s_busy);
-bail:
-       return;
+               wc.wr_id = wqe->wr.wr_id;
+               wc.status = status;
+               wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+               wc.vendor_err = 0;
+               wc.byte_len = wqe->length;
+               wc.imm_data = 0;
+               wc.qp = &qp->ibqp;
+               wc.src_qp = 0;
+               wc.wc_flags = 0;
+               wc.pkey_index = 0;
+               wc.slid = 0;
+               wc.sl = 0;
+               wc.dlid_path_bits = 0;
+               wc.port_num = 0;
+               ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+       }
 }
index bae4f56f7271f05b65660a84bf886a1ed19a1b6f..f0271415cd5b19558f7fae74baaef2330a96490d 100644 (file)
@@ -55,7 +55,6 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
        u64 val64;
        unsigned long t0, t1;
        u64 ret;
-       unsigned long flags;
 
        t0 = jiffies;
        /* If fast increment counters are only 32 bits, snapshot them,
@@ -92,18 +91,12 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
        if (creg == dd->ipath_cregs->cr_wordsendcnt) {
                if (val != dd->ipath_lastsword) {
                        dd->ipath_sword += val - dd->ipath_lastsword;
-                       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-                       dd->ipath_traffic_wds += val - dd->ipath_lastsword;
-                       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
                        dd->ipath_lastsword = val;
                }
                val64 = dd->ipath_sword;
        } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
                if (val != dd->ipath_lastrword) {
                        dd->ipath_rword += val - dd->ipath_lastrword;
-                       spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-                       dd->ipath_traffic_wds += val - dd->ipath_lastrword;
-                       spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
                        dd->ipath_lastrword = val;
                }
                val64 = dd->ipath_rword;
@@ -247,6 +240,7 @@ void ipath_get_faststats(unsigned long opaque)
        u32 val;
        static unsigned cnt;
        unsigned long flags;
+       u64 traffic_wds;
 
        /*
         * don't access the chip while running diags, or memory diags can
@@ -262,12 +256,13 @@ void ipath_get_faststats(unsigned long opaque)
         * exceeding a threshold, so we need to check the word-counts
         * even if they are 64-bit.
         */
-       ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-       ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+       traffic_wds = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt) +
+               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
        spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
-       if (dd->ipath_traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
+       traffic_wds -= dd->ipath_traffic_wds;
+       dd->ipath_traffic_wds += traffic_wds;
+       if (traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
                atomic_add(5, &dd->ipath_active_time); /* S/B #define */
-       dd->ipath_traffic_wds = 0;
        spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
 
        if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
index 16238cd3a036ddcf03899b0f37a91fd85fcd2eba..e1ad7cfc21fd66b89e5f46055fb611b730aa94ab 100644 (file)
@@ -163,6 +163,42 @@ static ssize_t show_boardversion(struct device *dev,
        return scnprintf(buf, PAGE_SIZE, "%s", dd->ipath_boardversion);
 }
 
+static ssize_t show_lmc(struct device *dev,
+                       struct device_attribute *attr,
+                       char *buf)
+{
+       struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_lmc);
+}
+
+static ssize_t store_lmc(struct device *dev,
+                        struct device_attribute *attr,
+                        const char *buf,
+                        size_t count)
+{
+       struct ipath_devdata *dd = dev_get_drvdata(dev);
+       u16 lmc = 0;
+       int ret;
+
+       ret = ipath_parse_ushort(buf, &lmc);
+       if (ret < 0)
+               goto invalid;
+
+       if (lmc > 7) {
+               ret = -EINVAL;
+               goto invalid;
+       }
+
+       ipath_set_lid(dd, dd->ipath_lid, lmc);
+
+       goto bail;
+invalid:
+       ipath_dev_err(dd, "attempt to set invalid LMC %u\n", lmc);
+bail:
+       return ret;
+}
+
 static ssize_t show_lid(struct device *dev,
                        struct device_attribute *attr,
                        char *buf)
@@ -190,7 +226,7 @@ static ssize_t store_lid(struct device *dev,
                goto invalid;
        }
 
-       ipath_set_lid(dd, lid, 0);
+       ipath_set_lid(dd, lid, dd->ipath_lmc);
 
        goto bail;
 invalid:
@@ -648,6 +684,7 @@ static struct attribute_group driver_attr_group = {
 };
 
 static DEVICE_ATTR(guid, S_IWUSR | S_IRUGO, show_guid, store_guid);
+static DEVICE_ATTR(lmc, S_IWUSR | S_IRUGO, show_lmc, store_lmc);
 static DEVICE_ATTR(lid, S_IWUSR | S_IRUGO, show_lid, store_lid);
 static DEVICE_ATTR(link_state, S_IWUSR, NULL, store_link_state);
 static DEVICE_ATTR(mlid, S_IWUSR | S_IRUGO, show_mlid, store_mlid);
@@ -667,6 +704,7 @@ static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
 
 static struct attribute *dev_attributes[] = {
        &dev_attr_guid.attr,
+       &dev_attr_lmc.attr,
        &dev_attr_lid.attr,
        &dev_attr_link_state.attr,
        &dev_attr_mlid.attr,
index 8380fbc50d2cbe3f9f398cc12d9cd617541f2e7e..2dd8de20d221a657f670b6bc63b3706ac94f53c3 100644 (file)
 /* cut down ridiculously long IB macro names */
 #define OP(x) IB_OPCODE_UC_##x
 
-static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
-                              struct ib_wc *wc)
-{
-       if (++qp->s_last == qp->s_size)
-               qp->s_last = 0;
-       if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-           (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-               wc->wr_id = wqe->wr.wr_id;
-               wc->status = IB_WC_SUCCESS;
-               wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-               wc->vendor_err = 0;
-               wc->byte_len = wqe->length;
-               wc->qp = &qp->ibqp;
-               wc->src_qp = qp->remote_qpn;
-               wc->pkey_index = 0;
-               wc->slid = qp->remote_ah_attr.dlid;
-               wc->sl = qp->remote_ah_attr.sl;
-               wc->dlid_path_bits = 0;
-               wc->port_num = 0;
-               ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
-       }
-}
-
 /**
  * ipath_make_uc_req - construct a request packet (SEND, RDMA write)
  * @qp: a pointer to the QP
- * @ohdr: a pointer to the IB header being constructed
- * @pmtu: the path MTU
- * @bth0p: pointer to the BTH opcode word
- * @bth2p: pointer to the BTH PSN word
  *
  * Return 1 if constructed; otherwise, return 0.
- * Note the QP s_lock must be held and interrupts disabled.
  */
-int ipath_make_uc_req(struct ipath_qp *qp,
-                     struct ipath_other_headers *ohdr,
-                     u32 pmtu, u32 *bth0p, u32 *bth2p)
+int ipath_make_uc_req(struct ipath_qp *qp)
 {
+       struct ipath_other_headers *ohdr;
        struct ipath_swqe *wqe;
        u32 hwords;
        u32 bth0;
        u32 len;
-       struct ib_wc wc;
+       u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+       int ret = 0;
 
        if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))
                goto done;
 
+       ohdr = &qp->s_hdr.u.oth;
+       if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+               ohdr = &qp->s_hdr.u.l.oth;
+
        /* header size in 32-bit words LRH+BTH = (8+12)/4. */
        hwords = 5;
        bth0 = 1 << 22; /* Set M bit */
 
        /* Get the next send request. */
-       wqe = get_swqe_ptr(qp, qp->s_last);
+       wqe = get_swqe_ptr(qp, qp->s_cur);
+       qp->s_wqe = NULL;
        switch (qp->s_state) {
        default:
-               /*
-                * Signal the completion of the last send
-                * (if there is one).
-                */
-               if (qp->s_last != qp->s_tail) {
-                       complete_last_send(qp, wqe, &wc);
-                       wqe = get_swqe_ptr(qp, qp->s_last);
-               }
-
                /* Check if send work queue is empty. */
-               if (qp->s_tail == qp->s_head)
+               if (qp->s_cur == qp->s_head)
                        goto done;
                /*
                 * Start a new request.
@@ -131,6 +99,9 @@ int ipath_make_uc_req(struct ipath_qp *qp,
                        }
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                bth0 |= 1 << 23;
+                       qp->s_wqe = wqe;
+                       if (++qp->s_cur >= qp->s_size)
+                               qp->s_cur = 0;
                        break;
 
                case IB_WR_RDMA_WRITE:
@@ -157,13 +128,14 @@ int ipath_make_uc_req(struct ipath_qp *qp,
                                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                        bth0 |= 1 << 23;
                        }
+                       qp->s_wqe = wqe;
+                       if (++qp->s_cur >= qp->s_size)
+                               qp->s_cur = 0;
                        break;
 
                default:
                        goto done;
                }
-               if (++qp->s_tail >= qp->s_size)
-                       qp->s_tail = 0;
                break;
 
        case OP(SEND_FIRST):
@@ -185,6 +157,9 @@ int ipath_make_uc_req(struct ipath_qp *qp,
                }
                if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                        bth0 |= 1 << 23;
+               qp->s_wqe = wqe;
+               if (++qp->s_cur >= qp->s_size)
+                       qp->s_cur = 0;
                break;
 
        case OP(RDMA_WRITE_FIRST):
@@ -207,18 +182,22 @@ int ipath_make_uc_req(struct ipath_qp *qp,
                        if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                                bth0 |= 1 << 23;
                }
+               qp->s_wqe = wqe;
+               if (++qp->s_cur >= qp->s_size)
+                       qp->s_cur = 0;
                break;
        }
        qp->s_len -= len;
        qp->s_hdrwords = hwords;
        qp->s_cur_sge = &qp->s_sge;
        qp->s_cur_size = len;
-       *bth0p = bth0 | (qp->s_state << 24);
-       *bth2p = qp->s_next_psn++ & IPATH_PSN_MASK;
-       return 1;
+       ipath_make_ruc_header(to_idev(qp->ibqp.device),
+                             qp, ohdr, bth0 | (qp->s_state << 24),
+                             qp->s_next_psn++ & IPATH_PSN_MASK);
+       ret = 1;
 
 done:
-       return 0;
+       return ret;
 }
 
 /**
@@ -485,6 +464,16 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 
        case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
        rdma_last_imm:
+               if (header_in_data) {
+                       wc.imm_data = *(__be32 *) data;
+                       data += sizeof(__be32);
+               } else {
+                       /* Immediate data comes after BTH */
+                       wc.imm_data = ohdr->u.imm_data;
+               }
+               hdrsize += 4;
+               wc.wc_flags = IB_WC_WITH_IMM;
+
                /* Get the number of bytes the message was padded by. */
                pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
                /* Check for invalid length. */
@@ -505,16 +494,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                        dev->n_pkt_drops++;
                        goto done;
                }
-               if (header_in_data) {
-                       wc.imm_data = *(__be32 *) data;
-                       data += sizeof(__be32);
-               } else {
-                       /* Immediate data comes after BTH */
-                       wc.imm_data = ohdr->u.imm_data;
-               }
-               hdrsize += 4;
-               wc.wc_flags = IB_WC_WITH_IMM;
-               wc.byte_len = 0;
+               wc.byte_len = qp->r_len;
                goto last_imm;
 
        case OP(RDMA_WRITE_LAST):
index f9a3338a5fb7aa6e90b57c44e88c3b55fe8de87f..16a2a938b520517ae74c9746450df2a0d436c06a 100644 (file)
 #include "ipath_verbs.h"
 #include "ipath_kernel.h"
 
-static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
-                   u32 *lengthp, struct ipath_sge_state *ss)
-{
-       int user = to_ipd(qp->ibqp.pd)->user;
-       int i, j, ret;
-       struct ib_wc wc;
-
-       *lengthp = 0;
-       for (i = j = 0; i < wqe->num_sge; i++) {
-               if (wqe->sg_list[i].length == 0)
-                       continue;
-               /* Check LKEY */
-               if ((user && wqe->sg_list[i].lkey == 0) ||
-                   !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
-                                  &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
-                       goto bad_lkey;
-               *lengthp += wqe->sg_list[i].length;
-               j++;
-       }
-       ss->num_sge = j;
-       ret = 1;
-       goto bail;
-
-bad_lkey:
-       wc.wr_id = wqe->wr_id;
-       wc.status = IB_WC_LOC_PROT_ERR;
-       wc.opcode = IB_WC_RECV;
-       wc.vendor_err = 0;
-       wc.byte_len = 0;
-       wc.imm_data = 0;
-       wc.qp = &qp->ibqp;
-       wc.src_qp = 0;
-       wc.wc_flags = 0;
-       wc.pkey_index = 0;
-       wc.slid = 0;
-       wc.sl = 0;
-       wc.dlid_path_bits = 0;
-       wc.port_num = 0;
-       /* Signal solicited completion event. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-       ret = 0;
-bail:
-       return ret;
-}
-
 /**
  * ipath_ud_loopback - handle send on loopback QPs
- * @sqp: the QP
- * @ss: the SGE state
- * @length: the length of the data to send
- * @wr: the work request
- * @wc: the work completion entry
+ * @sqp: the sending QP
+ * @swqe: the send work request
  *
- * This is called from ipath_post_ud_send() to forward a WQE addressed
+ * This is called from ipath_make_ud_req() to forward a WQE addressed
  * to the same HCA.
  * Note that the receive interrupt handler may be calling ipath_ud_rcv()
  * while this is being called.
  */
-static void ipath_ud_loopback(struct ipath_qp *sqp,
-                             struct ipath_sge_state *ss,
-                             u32 length, struct ib_send_wr *wr,
-                             struct ib_wc *wc)
+static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
 {
        struct ipath_ibdev *dev = to_idev(sqp->ibqp.device);
        struct ipath_qp *qp;
@@ -110,12 +59,18 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
        struct ipath_rwq *wq;
        struct ipath_rwqe *wqe;
        void (*handler)(struct ib_event *, void *);
+       struct ib_wc wc;
        u32 tail;
        u32 rlen;
+       u32 length;
 
-       qp = ipath_lookup_qpn(&dev->qp_table, wr->wr.ud.remote_qpn);
-       if (!qp)
-               return;
+       qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
+       if (!qp) {
+               dev->n_pkt_drops++;
+               goto send_comp;
+       }
+
+       rsge.sg_list = NULL;
 
        /*
         * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
@@ -123,39 +78,34 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
         * qkey from the QP context instead of the WR (see 10.2.5).
         */
        if (unlikely(qp->ibqp.qp_num &&
-                    ((int) wr->wr.ud.remote_qkey < 0
-                     ? qp->qkey : wr->wr.ud.remote_qkey) != qp->qkey)) {
+                    ((int) swqe->wr.wr.ud.remote_qkey < 0 ?
+                     sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) {
                /* XXX OK to lose a count once in a while. */
                dev->qkey_violations++;
                dev->n_pkt_drops++;
-               goto done;
+               goto drop;
        }
 
        /*
         * A GRH is expected to preceed the data even if not
         * present on the wire.
         */
-       wc->byte_len = length + sizeof(struct ib_grh);
+       length = swqe->length;
+       wc.byte_len = length + sizeof(struct ib_grh);
 
-       if (wr->opcode == IB_WR_SEND_WITH_IMM) {
-               wc->wc_flags = IB_WC_WITH_IMM;
-               wc->imm_data = wr->imm_data;
+       if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+               wc.wc_flags = IB_WC_WITH_IMM;
+               wc.imm_data = swqe->wr.imm_data;
        } else {
-               wc->wc_flags = 0;
-               wc->imm_data = 0;
+               wc.wc_flags = 0;
+               wc.imm_data = 0;
        }
 
-       if (wr->num_sge > 1) {
-               rsge.sg_list = kmalloc((wr->num_sge - 1) *
-                                       sizeof(struct ipath_sge),
-                                      GFP_ATOMIC);
-       } else
-               rsge.sg_list = NULL;
-
        /*
-        * Get the next work request entry to find where to put the data.
-        * Note that it is safe to drop the lock after changing rq->tail
-        * since ipath_post_receive() won't fill the empty slot.
+        * This would be a lot simpler if we could call ipath_get_rwqe()
+        * but that uses state that the receive interrupt handler uses
+        * so we would need to lock out receive interrupts while doing
+        * local loopback.
         */
        if (qp->ibqp.srq) {
                srq = to_isrq(qp->ibqp.srq);
@@ -167,32 +117,53 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
                rq = &qp->r_rq;
        }
 
+       if (rq->max_sge > 1) {
+               /*
+                * XXX We could use GFP_KERNEL if ipath_do_send()
+                * was always called from the tasklet instead of
+                * from ipath_post_send().
+                */
+               rsge.sg_list = kmalloc((rq->max_sge - 1) *
+                                       sizeof(struct ipath_sge),
+                                      GFP_ATOMIC);
+               if (!rsge.sg_list) {
+                       dev->n_pkt_drops++;
+                       goto drop;
+               }
+       }
+
+       /*
+        * Get the next work request entry to find where to put the data.
+        * Note that it is safe to drop the lock after changing rq->tail
+        * since ipath_post_receive() won't fill the empty slot.
+        */
        spin_lock_irqsave(&rq->lock, flags);
        wq = rq->wq;
        tail = wq->tail;
-       while (1) {
-               if (unlikely(tail == wq->head)) {
-                       spin_unlock_irqrestore(&rq->lock, flags);
-                       dev->n_pkt_drops++;
-                       goto bail_sge;
-               }
-               /* Make sure entry is read after head index is read. */
-               smp_rmb();
-               wqe = get_rwqe_ptr(rq, tail);
-               if (++tail >= rq->size)
-                       tail = 0;
-               if (init_sge(qp, wqe, &rlen, &rsge))
-                       break;
-               wq->tail = tail;
+       /* Validate tail before using it since it is user writable. */
+       if (tail >= rq->size)
+               tail = 0;
+       if (unlikely(tail == wq->head)) {
+               spin_unlock_irqrestore(&rq->lock, flags);
+               dev->n_pkt_drops++;
+               goto drop;
+       }
+       wqe = get_rwqe_ptr(rq, tail);
+       if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
+               spin_unlock_irqrestore(&rq->lock, flags);
+               dev->n_pkt_drops++;
+               goto drop;
        }
        /* Silently drop packets which are too big. */
-       if (wc->byte_len > rlen) {
+       if (wc.byte_len > rlen) {
                spin_unlock_irqrestore(&rq->lock, flags);
                dev->n_pkt_drops++;
-               goto bail_sge;
+               goto drop;
        }
+       if (++tail >= rq->size)
+               tail = 0;
        wq->tail = tail;
-       wc->wr_id = wqe->wr_id;
+       wc.wr_id = wqe->wr_id;
        if (handler) {
                u32 n;
 
@@ -221,13 +192,13 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
        } else
                spin_unlock_irqrestore(&rq->lock, flags);
 
-       ah_attr = &to_iah(wr->wr.ud.ah)->attr;
+       ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
        if (ah_attr->ah_flags & IB_AH_GRH) {
                ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
-               wc->wc_flags |= IB_WC_GRH;
+               wc.wc_flags |= IB_WC_GRH;
        } else
                ipath_skip_sge(&rsge, sizeof(struct ib_grh));
-       sge = &ss->sge;
+       sge = swqe->sg_list;
        while (length) {
                u32 len = sge->length;
 
@@ -241,8 +212,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
                sge->length -= len;
                sge->sge_length -= len;
                if (sge->sge_length == 0) {
-                       if (--ss->num_sge)
-                               *sge = *ss->sg_list++;
+                       if (--swqe->wr.num_sge)
+                               sge++;
                } else if (sge->length == 0 && sge->mr != NULL) {
                        if (++sge->n >= IPATH_SEGSZ) {
                                if (++sge->m >= sge->mr->mapsz)
@@ -256,123 +227,60 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
                }
                length -= len;
        }
-       wc->status = IB_WC_SUCCESS;
-       wc->opcode = IB_WC_RECV;
-       wc->vendor_err = 0;
-       wc->qp = &qp->ibqp;
-       wc->src_qp = sqp->ibqp.qp_num;
+       wc.status = IB_WC_SUCCESS;
+       wc.opcode = IB_WC_RECV;
+       wc.vendor_err = 0;
+       wc.qp = &qp->ibqp;
+       wc.src_qp = sqp->ibqp.qp_num;
        /* XXX do we know which pkey matched? Only needed for GSI. */
-       wc->pkey_index = 0;
-       wc->slid = dev->dd->ipath_lid |
+       wc.pkey_index = 0;
+       wc.slid = dev->dd->ipath_lid |
                (ah_attr->src_path_bits &
-                ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1));
-       wc->sl = ah_attr->sl;
-       wc->dlid_path_bits =
-               ah_attr->dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+                ((1 << dev->dd->ipath_lmc) - 1));
+       wc.sl = ah_attr->sl;
+       wc.dlid_path_bits =
+               ah_attr->dlid & ((1 << dev->dd->ipath_lmc) - 1);
+       wc.port_num = 1;
        /* Signal completion event if the solicited bit is set. */
-       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), wc,
-                      wr->send_flags & IB_SEND_SOLICITED);
-
-bail_sge:
+       ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+                      swqe->wr.send_flags & IB_SEND_SOLICITED);
+drop:
        kfree(rsge.sg_list);
-done:
        if (atomic_dec_and_test(&qp->refcount))
                wake_up(&qp->wait);
+send_comp:
+       ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
 }
 
 /**
- * ipath_post_ud_send - post a UD send on QP
+ * ipath_make_ud_req - construct a UD request packet
  * @qp: the QP
- * @wr: the work request
  *
- * Note that we actually send the data as it is posted instead of putting
- * the request into a ring buffer.  If we wanted to use a ring buffer,
- * we would need to save a reference to the destination address in the SWQE.
+ * Return 1 if constructed; otherwise, return 0.
  */
-int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
+int ipath_make_ud_req(struct ipath_qp *qp)
 {
        struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
        struct ipath_other_headers *ohdr;
        struct ib_ah_attr *ah_attr;
-       struct ipath_sge_state ss;
-       struct ipath_sge *sg_list;
-       struct ib_wc wc;
-       u32 hwords;
+       struct ipath_swqe *wqe;
        u32 nwords;
-       u32 len;
        u32 extra_bytes;
        u32 bth0;
        u16 lrh0;
        u16 lid;
-       int i;
-       int ret;
+       int ret = 0;
 
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
-               ret = 0;
+       if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)))
                goto bail;
-       }
 
-       if (wr->wr.ud.ah->pd != qp->ibqp.pd) {
-               ret = -EPERM;
+       if (qp->s_cur == qp->s_head)
                goto bail;
-       }
 
-       /* IB spec says that num_sge == 0 is OK. */
-       if (wr->num_sge > qp->s_max_sge) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       if (wr->num_sge > 1) {
-               sg_list = kmalloc((qp->s_max_sge - 1) * sizeof(*sg_list),
-                                 GFP_ATOMIC);
-               if (!sg_list) {
-                       ret = -ENOMEM;
-                       goto bail;
-               }
-       } else
-               sg_list = NULL;
-
-       /* Check the buffer to send. */
-       ss.sg_list = sg_list;
-       ss.sge.mr = NULL;
-       ss.sge.vaddr = NULL;
-       ss.sge.length = 0;
-       ss.sge.sge_length = 0;
-       ss.num_sge = 0;
-       len = 0;
-       for (i = 0; i < wr->num_sge; i++) {
-               /* Check LKEY */
-               if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-
-               if (wr->sg_list[i].length == 0)
-                       continue;
-               if (!ipath_lkey_ok(qp, ss.num_sge ?
-                                  sg_list + ss.num_sge - 1 : &ss.sge,
-                                  &wr->sg_list[i], 0)) {
-                       ret = -EINVAL;
-                       goto bail;
-               }
-               len += wr->sg_list[i].length;
-               ss.num_sge++;
-       }
-       /* Check for invalid packet size. */
-       if (len > dev->dd->ipath_ibmtu) {
-               ret = -EINVAL;
-               goto bail;
-       }
-       extra_bytes = (4 - len) & 3;
-       nwords = (len + extra_bytes) >> 2;
+       wqe = get_swqe_ptr(qp, qp->s_cur);
 
        /* Construct the header. */
-       ah_attr = &to_iah(wr->wr.ud.ah)->attr;
-       if (ah_attr->dlid == 0) {
-               ret = -EINVAL;
-               goto bail;
-       }
+       ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
        if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
                if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
                        dev->n_multicast_xmit++;
@@ -381,74 +289,63 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
        } else {
                dev->n_unicast_xmit++;
                lid = ah_attr->dlid &
-                       ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+                       ~((1 << dev->dd->ipath_lmc) - 1);
                if (unlikely(lid == dev->dd->ipath_lid)) {
-                       /*
-                        * Pass in an uninitialized ib_wc to save stack
-                        * space.
-                        */
-                       ipath_ud_loopback(qp, &ss, len, wr, &wc);
+                       ipath_ud_loopback(qp, wqe);
                        goto done;
                }
        }
+
+       extra_bytes = -wqe->length & 3;
+       nwords = (wqe->length + extra_bytes) >> 2;
+
+       /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
+       qp->s_hdrwords = 7;
+       if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+               qp->s_hdrwords++;
+       qp->s_cur_size = wqe->length;
+       qp->s_cur_sge = &qp->s_sge;
+       qp->s_wqe = wqe;
+       qp->s_sge.sge = wqe->sg_list[0];
+       qp->s_sge.sg_list = wqe->sg_list + 1;
+       qp->s_sge.num_sge = wqe->wr.num_sge;
+
        if (ah_attr->ah_flags & IB_AH_GRH) {
                /* Header size in 32-bit words. */
-               hwords = 17;
+               qp->s_hdrwords += ipath_make_grh(dev, &qp->s_hdr.u.l.grh,
+                                                &ah_attr->grh,
+                                                qp->s_hdrwords, nwords);
                lrh0 = IPATH_LRH_GRH;
                ohdr = &qp->s_hdr.u.l.oth;
-               qp->s_hdr.u.l.grh.version_tclass_flow =
-                       cpu_to_be32((6 << 28) |
-                                   (ah_attr->grh.traffic_class << 20) |
-                                   ah_attr->grh.flow_label);
-               qp->s_hdr.u.l.grh.paylen =
-                       cpu_to_be16(((wr->opcode ==
-                                     IB_WR_SEND_WITH_IMM ? 6 : 5) +
-                                    nwords + SIZE_OF_CRC) << 2);
-               /* next_hdr is defined by C8-7 in ch. 8.4.1 */
-               qp->s_hdr.u.l.grh.next_hdr = 0x1B;
-               qp->s_hdr.u.l.grh.hop_limit = ah_attr->grh.hop_limit;
-               /* The SGID is 32-bit aligned. */
-               qp->s_hdr.u.l.grh.sgid.global.subnet_prefix =
-                       dev->gid_prefix;
-               qp->s_hdr.u.l.grh.sgid.global.interface_id =
-                       dev->dd->ipath_guid;
-               qp->s_hdr.u.l.grh.dgid = ah_attr->grh.dgid;
                /*
                 * Don't worry about sending to locally attached multicast
                 * QPs.  It is unspecified by the spec. what happens.
                 */
        } else {
                /* Header size in 32-bit words. */
-               hwords = 7;
                lrh0 = IPATH_LRH_BTH;
                ohdr = &qp->s_hdr.u.oth;
        }
-       if (wr->opcode == IB_WR_SEND_WITH_IMM) {
-               ohdr->u.ud.imm_data = wr->imm_data;
-               wc.imm_data = wr->imm_data;
-               hwords += 1;
+       if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+               ohdr->u.ud.imm_data = wqe->wr.imm_data;
                bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
-       } else if (wr->opcode == IB_WR_SEND) {
-               wc.imm_data = 0;
+       } else
                bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
-       } else {
-               ret = -EINVAL;
-               goto bail;
-       }
        lrh0 |= ah_attr->sl << 4;
        if (qp->ibqp.qp_type == IB_QPT_SMI)
                lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
        qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
        qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-       qp->s_hdr.lrh[2] = cpu_to_be16(hwords + nwords + SIZE_OF_CRC);
+       qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
+                                          SIZE_OF_CRC);
        lid = dev->dd->ipath_lid;
        if (lid) {
                lid |= ah_attr->src_path_bits &
-                       ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+                       ((1 << dev->dd->ipath_lmc) - 1);
                qp->s_hdr.lrh[3] = cpu_to_be16(lid);
        } else
                qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
-       if (wr->send_flags & IB_SEND_SOLICITED)
+       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
                bth0 |= 1 << 23;
        bth0 |= extra_bytes << 20;
        bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
@@ -460,38 +357,20 @@ int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr)
        ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
                ah_attr->dlid != IPATH_PERMISSIVE_LID ?
                __constant_cpu_to_be32(IPATH_MULTICAST_QPN) :
-               cpu_to_be32(wr->wr.ud.remote_qpn);
-       /* XXX Could lose a PSN count but not worth locking */
+               cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
        ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
        /*
         * Qkeys with the high order bit set mean use the
         * qkey from the QP context instead of the WR (see 10.2.5).
         */
-       ohdr->u.ud.deth[0] = cpu_to_be32((int)wr->wr.ud.remote_qkey < 0 ?
-                                        qp->qkey : wr->wr.ud.remote_qkey);
+       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
+                                        qp->qkey : wqe->wr.wr.ud.remote_qkey);
        ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
-       if (ipath_verbs_send(dev->dd, hwords, (u32 *) &qp->s_hdr,
-                            len, &ss))
-               dev->n_no_piobuf++;
 
 done:
-       /* Queue the completion status entry. */
-       if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-           (wr->send_flags & IB_SEND_SIGNALED)) {
-               wc.wr_id = wr->wr_id;
-               wc.status = IB_WC_SUCCESS;
-               wc.vendor_err = 0;
-               wc.opcode = IB_WC_SEND;
-               wc.byte_len = len;
-               wc.qp = &qp->ibqp;
-               wc.src_qp = 0;
-               wc.wc_flags = 0;
-               /* XXX initialize other fields? */
-               ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
-       }
-       kfree(sg_list);
-
-       ret = 0;
+       if (++qp->s_cur >= qp->s_size)
+               qp->s_cur = 0;
+       ret = 1;
 
 bail:
        return ret;
@@ -672,7 +551,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
         * Save the LMC lower bits if the destination LID is a unicast LID.
         */
        wc.dlid_path_bits = dlid >= IPATH_MULTICAST_LID_BASE ? 0 :
-               dlid & ((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+               dlid & ((1 << dev->dd->ipath_lmc) - 1);
+       wc.port_num = 1;
        /* Signal completion event if the solicited bit is set. */
        ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
                       (ohdr->bth[0] &
index 16aa61fd80856419957fdbe7ec6dcbb0054de026..74f77e7c2c1bb6ffc5632b2d446a35bfca3e803b 100644 (file)
@@ -230,6 +230,121 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
        }
 }
 
+static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
+{
+       struct ib_wc wc;
+
+       memset(&wc, 0, sizeof(wc));
+       wc.wr_id = wr->wr_id;
+       wc.status = IB_WC_WR_FLUSH_ERR;
+       wc.opcode = ib_ipath_wc_opcode[wr->opcode];
+       wc.qp = &qp->ibqp;
+       ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
+}
+
+/**
+ * ipath_post_one_send - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
+{
+       struct ipath_swqe *wqe;
+       u32 next;
+       int i;
+       int j;
+       int acc;
+       int ret;
+       unsigned long flags;
+
+       spin_lock_irqsave(&qp->s_lock, flags);
+
+       /* Check that state is OK to post send. */
+       if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) {
+               if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR)
+                       goto bail_inval;
+               /* C10-96 says generate a flushed completion entry. */
+               ipath_flush_wqe(qp, wr);
+               ret = 0;
+               goto bail;
+       }
+
+       /* IB spec says that num_sge == 0 is OK. */
+       if (wr->num_sge > qp->s_max_sge)
+               goto bail_inval;
+
+       /*
+        * Don't allow RDMA reads or atomic operations on UC or
+        * undefined operations.
+        * Make sure buffer is large enough to hold the result for atomics.
+        */
+       if (qp->ibqp.qp_type == IB_QPT_UC) {
+               if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
+                       goto bail_inval;
+       } else if (qp->ibqp.qp_type == IB_QPT_UD) {
+               /* Check UD opcode */
+               if (wr->opcode != IB_WR_SEND &&
+                   wr->opcode != IB_WR_SEND_WITH_IMM)
+                       goto bail_inval;
+               /* Check UD destination address PD */
+               if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+                       goto bail_inval;
+       } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
+               goto bail_inval;
+       else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+                  (wr->num_sge == 0 ||
+                   wr->sg_list[0].length < sizeof(u64) ||
+                   wr->sg_list[0].addr & (sizeof(u64) - 1)))
+               goto bail_inval;
+       else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
+               goto bail_inval;
+
+       next = qp->s_head + 1;
+       if (next >= qp->s_size)
+               next = 0;
+       if (next == qp->s_last)
+               goto bail_inval;
+
+       wqe = get_swqe_ptr(qp, qp->s_head);
+       wqe->wr = *wr;
+       wqe->ssn = qp->s_ssn++;
+       wqe->length = 0;
+       if (wr->num_sge) {
+               acc = wr->opcode >= IB_WR_RDMA_READ ?
+                       IB_ACCESS_LOCAL_WRITE : 0;
+               for (i = 0, j = 0; i < wr->num_sge; i++) {
+                       u32 length = wr->sg_list[i].length;
+                       int ok;
+
+                       if (length == 0)
+                               continue;
+                       ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
+                                          &wr->sg_list[i], acc);
+                       if (!ok)
+                               goto bail_inval;
+                       wqe->length += length;
+                       j++;
+               }
+               wqe->wr.num_sge = j;
+       }
+       if (qp->ibqp.qp_type == IB_QPT_UC ||
+           qp->ibqp.qp_type == IB_QPT_RC) {
+               if (wqe->length > 0x80000000U)
+                       goto bail_inval;
+       } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
+               goto bail_inval;
+       qp->s_head = next;
+
+       ret = 0;
+       goto bail;
+
+bail_inval:
+       ret = -EINVAL;
+bail:
+       spin_unlock_irqrestore(&qp->s_lock, flags);
+       return ret;
+}
+
 /**
  * ipath_post_send - post a send on a QP
  * @ibqp: the QP to post the send on
@@ -244,35 +359,17 @@ static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        struct ipath_qp *qp = to_iqp(ibqp);
        int err = 0;
 
-       /* Check that state is OK to post send. */
-       if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)) {
-               *bad_wr = wr;
-               err = -EINVAL;
-               goto bail;
-       }
-
        for (; wr; wr = wr->next) {
-               switch (qp->ibqp.qp_type) {
-               case IB_QPT_UC:
-               case IB_QPT_RC:
-                       err = ipath_post_ruc_send(qp, wr);
-                       break;
-
-               case IB_QPT_SMI:
-               case IB_QPT_GSI:
-               case IB_QPT_UD:
-                       err = ipath_post_ud_send(qp, wr);
-                       break;
-
-               default:
-                       err = -EINVAL;
-               }
+               err = ipath_post_one_send(qp, wr);
                if (err) {
                        *bad_wr = wr;
-                       break;
+                       goto bail;
                }
        }
 
+       /* Try to do the send work in the caller's context. */
+       ipath_do_send((unsigned long) qp);
+
 bail:
        return err;
 }
@@ -416,7 +513,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
        /* Check for a valid destination LID (see ch. 7.11.1). */
        lid = be16_to_cpu(hdr->lrh[1]);
        if (lid < IPATH_MULTICAST_LID_BASE) {
-               lid &= ~((1 << (dev->mkeyprot_resv_lmc & 7)) - 1);
+               lid &= ~((1 << dev->dd->ipath_lmc) - 1);
                if (unlikely(lid != dev->dd->ipath_lid)) {
                        dev->rcv_errors++;
                        goto bail;
@@ -631,7 +728,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 #endif
 
 static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
-                   u32 length)
+                   u32 length, unsigned flush_wc)
 {
        u32 extra = 0;
        u32 data = 0;
@@ -641,11 +738,11 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
                u32 len = ss->sge.length;
                u32 off;
 
-               BUG_ON(len == 0);
                if (len > length)
                        len = length;
                if (len > ss->sge.sge_length)
                        len = ss->sge.sge_length;
+               BUG_ON(len == 0);
                /* If the source address is not aligned, try to align it. */
                off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
                if (off) {
@@ -757,36 +854,25 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
        }
        /* Update address before sending packet. */
        update_sge(ss, length);
-       /* must flush early everything before trigger word */
-       ipath_flush_wc();
-       __raw_writel(last, piobuf);
-       /* be sure trigger word is written */
-       ipath_flush_wc();
+       if (flush_wc) {
+               /* must flush early everything before trigger word */
+               ipath_flush_wc();
+               __raw_writel(last, piobuf);
+               /* be sure trigger word is written */
+               ipath_flush_wc();
+       } else
+               __raw_writel(last, piobuf);
 }
 
-/**
- * ipath_verbs_send - send a packet
- * @dd: the infinipath device
- * @hdrwords: the number of words in the header
- * @hdr: the packet header
- * @len: the length of the packet in bytes
- * @ss: the SGE to send
- */
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-                    u32 *hdr, u32 len, struct ipath_sge_state *ss)
+static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords,
+                               struct ipath_sge_state *ss, u32 len,
+                               u32 plen, u32 dwords)
 {
+       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
        u32 __iomem *piobuf;
-       u32 plen;
+       unsigned flush_wc;
        int ret;
 
-       /* +1 is for the qword padding of pbc */
-       plen = hdrwords + ((len + 3) >> 2) + 1;
-       if (unlikely((plen << 2) > dd->ipath_ibmaxlen)) {
-               ret = -EINVAL;
-               goto bail;
-       }
-
-       /* Get a PIO buffer to use. */
        piobuf = ipath_getpiobuf(dd, NULL);
        if (unlikely(piobuf == NULL)) {
                ret = -EBUSY;
@@ -799,51 +885,90 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
         * or WC buffer can be written out of order.
         */
        writeq(plen, piobuf);
-       ipath_flush_wc();
        piobuf += 2;
+
+       flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
        if (len == 0) {
                /*
                 * If there is just the header portion, must flush before
                 * writing last word of header for correctness, and after
                 * the last header word (trigger word).
                 */
-               __iowrite32_copy(piobuf, hdr, hdrwords - 1);
-               ipath_flush_wc();
-               __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
-               ipath_flush_wc();
-               ret = 0;
-               goto bail;
+               if (flush_wc) {
+                       ipath_flush_wc();
+                       __iowrite32_copy(piobuf, hdr, hdrwords - 1);
+                       ipath_flush_wc();
+                       __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+                       ipath_flush_wc();
+               } else
+                       __iowrite32_copy(piobuf, hdr, hdrwords);
+               goto done;
        }
 
+       if (flush_wc)
+               ipath_flush_wc();
        __iowrite32_copy(piobuf, hdr, hdrwords);
        piobuf += hdrwords;
 
        /* The common case is aligned and contained in one segment. */
        if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
                   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
-               u32 w;
                u32 *addr = (u32 *) ss->sge.vaddr;
 
                /* Update address before sending packet. */
                update_sge(ss, len);
-               /* Need to round up for the last dword in the packet. */
-               w = (len + 3) >> 2;
-               __iowrite32_copy(piobuf, addr, w - 1);
-               /* must flush early everything before trigger word */
-               ipath_flush_wc();
-               __raw_writel(addr[w - 1], piobuf + w - 1);
-               /* be sure trigger word is written */
-               ipath_flush_wc();
-               ret = 0;
-               goto bail;
+               if (flush_wc) {
+                       __iowrite32_copy(piobuf, addr, dwords - 1);
+                       /* must flush early everything before trigger word */
+                       ipath_flush_wc();
+                       __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
+                       /* be sure trigger word is written */
+                       ipath_flush_wc();
+               } else
+                       __iowrite32_copy(piobuf, addr, dwords);
+               goto done;
        }
-       copy_io(piobuf, ss, len);
+       copy_io(piobuf, ss, len, flush_wc);
+done:
+       if (qp->s_wqe)
+               ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
        ret = 0;
-
 bail:
        return ret;
 }
 
+/**
+ * ipath_verbs_send - send a packet
+ * @qp: the QP to send on
+ * @hdr: the packet header
+ * @hdrwords: the number of words in the header
+ * @ss: the SGE to send
+ * @len: the length of the packet in bytes
+ */
+int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
+                    u32 hdrwords, struct ipath_sge_state *ss, u32 len)
+{
+       struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+       u32 plen;
+       int ret;
+       u32 dwords = (len + 3) >> 2;
+
+       /* +1 is for the qword padding of pbc */
+       plen = hdrwords + dwords + 1;
+
+       /* Drop non-VL15 packets if we are not in the active state */
+       if (!(dd->ipath_flags & IPATH_LINKACTIVE) &&
+           qp->ibqp.qp_type != IB_QPT_SMI) {
+               if (qp->s_wqe)
+                       ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+               ret = 0;
+       } else
+               ret = ipath_verbs_send_pio(qp, (u32 *) hdr, hdrwords,
+                                          ss, len, plen, dwords);
+
+       return ret;
+}
+
 int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
                            u64 *rwords, u64 *spkts, u64 *rpkts,
                            u64 *xmit_wait)
@@ -852,7 +977,6 @@ int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
 
        if (!(dd->ipath_flags & IPATH_INITTED)) {
                /* no hardware, freeze, etc. */
-               ipath_dbg("unit %u not usable\n", dd->ipath_unit);
                ret = -EINVAL;
                goto bail;
        }
@@ -878,48 +1002,44 @@ bail:
 int ipath_get_counters(struct ipath_devdata *dd,
                       struct ipath_verbs_counters *cntrs)
 {
+       struct ipath_cregs const *crp = dd->ipath_cregs;
        int ret;
 
        if (!(dd->ipath_flags & IPATH_INITTED)) {
                /* no hardware, freeze, etc. */
-               ipath_dbg("unit %u not usable\n", dd->ipath_unit);
                ret = -EINVAL;
                goto bail;
        }
        cntrs->symbol_error_counter =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+               ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
        cntrs->link_error_recovery_counter =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+               ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
        /*
         * The link downed counter counts when the other side downs the
         * connection.  We add in the number of times we downed the link
         * due to local link integrity errors to compensate.
         */
        cntrs->link_downed_counter =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_iblinkdowncnt);
+               ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
        cntrs->port_rcv_errors =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rxdroppktcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvovflcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_portovflcnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_err_rlencnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_invalidrlencnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) +
+               ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
+               ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
+               ipath_snap_cntr(dd, crp->cr_portovflcnt) +
+               ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
+               ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
+               ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
+               ipath_snap_cntr(dd, crp->cr_erricrccnt) +
+               ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
+               ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
+               ipath_snap_cntr(dd, crp->cr_badformatcnt) +
                dd->ipath_rxfc_unsupvl_errs;
        cntrs->port_rcv_remphys_errors =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
-       cntrs->port_xmit_discards =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_unsupvlcnt);
-       cntrs->port_xmit_data =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-       cntrs->port_rcv_data =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
-       cntrs->port_xmit_packets =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
-       cntrs->port_rcv_packets =
-               ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
+               ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
+       cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
+       cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
+       cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
+       cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
+       cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
        cntrs->local_link_integrity_errors =
                (dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
                dd->ipath_lli_errs : dd->ipath_lli_errors;
@@ -1033,25 +1153,26 @@ static int ipath_query_port(struct ib_device *ibdev,
                            u8 port, struct ib_port_attr *props)
 {
        struct ipath_ibdev *dev = to_idev(ibdev);
+       struct ipath_devdata *dd = dev->dd;
        enum ib_mtu mtu;
-       u16 lid = dev->dd->ipath_lid;
+       u16 lid = dd->ipath_lid;
        u64 ibcstat;
 
        memset(props, 0, sizeof(*props));
        props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE);
-       props->lmc = dev->mkeyprot_resv_lmc & 7;
+       props->lmc = dd->ipath_lmc;
        props->sm_lid = dev->sm_lid;
        props->sm_sl = dev->sm_sl;
-       ibcstat = dev->dd->ipath_lastibcstat;
+       ibcstat = dd->ipath_lastibcstat;
        props->state = ((ibcstat >> 4) & 0x3) + 1;
        /* See phys_state_show() */
        props->phys_state = ipath_cvt_physportstate[
-               dev->dd->ipath_lastibcstat & 0xf];
+               dd->ipath_lastibcstat & 0xf];
        props->port_cap_flags = dev->port_cap_flags;
        props->gid_tbl_len = 1;
        props->max_msg_sz = 0x80000000;
-       props->pkey_tbl_len = ipath_get_npkeys(dev->dd);
-       props->bad_pkey_cntr = ipath_get_cr_errpkey(dev->dd) -
+       props->pkey_tbl_len = ipath_get_npkeys(dd);
+       props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
                dev->z_pkey_violations;
        props->qkey_viol_cntr = dev->qkey_violations;
        props->active_width = IB_WIDTH_4X;
@@ -1061,12 +1182,12 @@ static int ipath_query_port(struct ib_device *ibdev,
        props->init_type_reply = 0;
 
        /*
-        * Note: the chips support a maximum MTU of 4096, but the driver
+        * Note: the chip supports a maximum MTU of 4096, but the driver
         * hasn't implemented this feature yet, so set the maximum value
         * to 2048.
         */
        props->max_mtu = IB_MTU_2048;
-       switch (dev->dd->ipath_ibmtu) {
+       switch (dd->ipath_ibmtu) {
        case 4096:
                mtu = IB_MTU_4096;
                break;
@@ -1415,9 +1536,7 @@ static int disable_timer(struct ipath_devdata *dd)
 {
        /* Disable GPIO bit 2 interrupt */
        if (dd->ipath_flags & IPATH_GPIO_INTR) {
-                u64 val;
                 /* Disable GPIO bit 2 interrupt */
-                val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
                dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
                ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
                                 dd->ipath_gpio_mask);
index 1a24c6a4a8143deea93b6efbaa69314d9e6f4953..6ccb54f104a3fff2ddc6e08746949a29f1cb5a1e 100644 (file)
@@ -42,6 +42,8 @@
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
 
+#include "ipath_kernel.h"
+
 #define IPATH_MAX_RDMA_ATOMIC  4
 
 #define QPN_MAX                 (1 << 24)
@@ -59,6 +61,7 @@
  */
 #define IB_CQ_NONE     (IB_CQ_NEXT_COMP + 1)
 
+/* AETH NAK opcode values */
 #define IB_RNR_NAK                     0x20
 #define IB_NAK_PSN_ERROR               0x60
 #define IB_NAK_INVALID_REQUEST         0x61
@@ -66,6 +69,7 @@
 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
 #define IB_NAK_INVALID_RD_REQUEST      0x64
 
+/* Flags for checking QP state (see ib_ipath_state_ops[]) */
 #define IPATH_POST_SEND_OK             0x01
 #define IPATH_POST_RECV_OK             0x02
 #define IPATH_PROCESS_RECV_OK          0x04
@@ -187,7 +191,11 @@ struct ipath_mmap_info {
 struct ipath_cq_wc {
        u32 head;               /* index of next entry to fill */
        u32 tail;               /* index of next ib_poll_cq() entry */
-       struct ib_uverbs_wc queue[1]; /* this is actually size ibcq.cqe + 1 */
+       union {
+               /* these are actually size ibcq.cqe + 1 */
+               struct ib_uverbs_wc uqueue[0];
+               struct ib_wc kqueue[0];
+       };
 };
 
 /*
@@ -239,7 +247,7 @@ struct ipath_mregion {
  */
 struct ipath_sge {
        struct ipath_mregion *mr;
-       void *vaddr;            /* current pointer into the segment */
+       void *vaddr;            /* kernel virtual address of segment */
        u32 sge_length;         /* length of the SGE */
        u32 length;             /* remaining length of the segment */
        u16 m;                  /* current index: mr->map[m] */
@@ -407,6 +415,7 @@ struct ipath_qp {
        u32 s_ssn;              /* SSN of tail entry */
        u32 s_lsn;              /* limit sequence number (credit) */
        struct ipath_swqe *s_wq;        /* send work queue */
+       struct ipath_swqe *s_wqe;
        struct ipath_rq r_rq;           /* receive work queue */
        struct ipath_sge r_sg_list[0];  /* verified SGEs */
 };
@@ -492,7 +501,7 @@ struct ipath_ibdev {
        int ib_unit;            /* This is the device number */
        u16 sm_lid;             /* in host order */
        u8 sm_sl;
-       u8 mkeyprot_resv_lmc;
+       u8 mkeyprot;
        /* non-zero when timer is set */
        unsigned long mkey_lease_timeout;
 
@@ -667,7 +676,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 
 int ipath_destroy_qp(struct ib_qp *ibqp);
 
-void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
+int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
 
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                    int attr_mask, struct ib_udata *udata);
@@ -683,8 +692,8 @@ void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
 
 void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
 
-int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
-                    u32 *hdr, u32 len, struct ipath_sge_state *ss);
+int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
+                    u32 hdrwords, struct ipath_sge_state *ss, u32 len);
 
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
 
@@ -692,8 +701,6 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
 
 void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
 
-int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr);
-
 void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
                  int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
 
@@ -733,6 +740,8 @@ int ipath_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
 
 int ipath_destroy_srq(struct ib_srq *ibsrq);
 
+void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
+
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
 
 struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries, int comp_vector,
@@ -782,18 +791,28 @@ int ipath_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
 
 void ipath_insert_rnr_queue(struct ipath_qp *qp);
 
+int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+                  u32 *lengthp, struct ipath_sge_state *ss);
+
 int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only);
 
 u32 ipath_make_grh(struct ipath_ibdev *dev, struct ib_grh *hdr,
                   struct ib_global_route *grh, u32 hwords, u32 nwords);
 
-void ipath_do_ruc_send(unsigned long data);
+void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
+                          struct ipath_other_headers *ohdr,
+                          u32 bth0, u32 bth2);
+
+void ipath_do_send(unsigned long data);
+
+void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
+                        enum ib_wc_status status);
+
+int ipath_make_rc_req(struct ipath_qp *qp);
 
-int ipath_make_rc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
-                     u32 pmtu, u32 *bth0p, u32 *bth2p);
+int ipath_make_uc_req(struct ipath_qp *qp);
 
-int ipath_make_uc_req(struct ipath_qp *qp, struct ipath_other_headers *ohdr,
-                     u32 pmtu, u32 *bth0p, u32 *bth2p);
+int ipath_make_ud_req(struct ipath_qp *qp);
 
 int ipath_register_ib_device(struct ipath_devdata *);
 
index dde8fe9af47e3f24cf13574c363f873a1297fec3..d8287d9db41e5a8eb0bb3167db75635c266b4960 100644 (file)
@@ -476,9 +476,48 @@ out:
        return err;
 }
 
+static ssize_t show_hca(struct class_device *cdev, char *buf)
+{
+       struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
+       return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
+}
+
+static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
+{
+       struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
+       return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32),
+                      (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
+                      (int) dev->dev->caps.fw_ver & 0xffff);
+}
+
+static ssize_t show_rev(struct class_device *cdev, char *buf)
+{
+       struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
+       return sprintf(buf, "%x\n", dev->dev->rev_id);
+}
+
+static ssize_t show_board(struct class_device *cdev, char *buf)
+{
+       struct mlx4_ib_dev *dev = container_of(cdev, struct mlx4_ib_dev, ib_dev.class_dev);
+       return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
+}
+
+static CLASS_DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
+static CLASS_DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
+static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
+static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
+
+static struct class_device_attribute *mlx4_class_attributes[] = {
+       &class_device_attr_hw_rev,
+       &class_device_attr_fw_ver,
+       &class_device_attr_hca_type,
+       &class_device_attr_board_id
+};
+
 static void *mlx4_ib_add(struct mlx4_dev *dev)
 {
        struct mlx4_ib_dev *ibdev;
+       int i;
 
        ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
        if (!ibdev) {
@@ -568,6 +607,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.detach_mcast      = mlx4_ib_mcg_detach;
        ibdev->ib_dev.process_mad       = mlx4_ib_process_mad;
 
+       ibdev->ib_dev.alloc_fmr         = mlx4_ib_fmr_alloc;
+       ibdev->ib_dev.map_phys_fmr      = mlx4_ib_map_phys_fmr;
+       ibdev->ib_dev.unmap_fmr         = mlx4_ib_unmap_fmr;
+       ibdev->ib_dev.dealloc_fmr       = mlx4_ib_fmr_dealloc;
+
        if (init_node_data(ibdev))
                goto err_map;
 
@@ -580,6 +624,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (mlx4_ib_mad_init(ibdev))
                goto err_reg;
 
+       for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
+               if (class_device_create_file(&ibdev->ib_dev.class_dev,
+                                              mlx4_class_attributes[i]))
+                       goto err_reg;
+       }
+
        return ibdev;
 
 err_reg:
index 705ff2fa237e85e73d3ddf8a6e0d8911f737a9e1..28697653a370f18fac368f3ecaed4c070517601c 100644 (file)
@@ -93,6 +93,11 @@ struct mlx4_ib_mr {
        struct ib_umem         *umem;
 };
 
+struct mlx4_ib_fmr {
+       struct ib_fmr           ibfmr;
+       struct mlx4_fmr         mfmr;
+};
+
 struct mlx4_ib_wq {
        u64                    *wrid;
        spinlock_t              lock;
@@ -199,6 +204,10 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
        return container_of(ibmr, struct mlx4_ib_mr, ibmr);
 }
 
+static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
+{
+       return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
+}
 static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
 {
        return container_of(ibqp, struct mlx4_ib_qp, ibqp);
@@ -284,6 +293,13 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags,    u8 port_num,
 int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
 void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
 
+struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags,
+                                 struct ib_fmr_attr *fmr_attr);
+int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
+                        u64 iova);
+int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
+int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
+
 static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
 {
        return !!(ah->av.g_slid & 0x80);
index 85ae906f1d12815d1f4d207db071f1c4432dc3f5..7dc91a3e712ddb812304984ad5b5f24403709e68 100644 (file)
@@ -96,11 +96,10 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
                                pages[i++] = sg_dma_address(&chunk->page_list[j]) +
                                        umem->page_size * k;
                                /*
-                                * Be friendly to WRITE_MTT firmware
-                                * command, and pass it chunks of
-                                * appropriate size.
+                                * Be friendly to mlx4_write_mtt() and
+                                * pass it chunks of appropriate size.
                                 */
-                               if (i == PAGE_SIZE / sizeof (u64) - 2) {
+                               if (i == PAGE_SIZE / sizeof (u64)) {
                                        err = mlx4_write_mtt(dev->dev, mtt, n,
                                                             i, pages);
                                        if (err)
@@ -182,3 +181,96 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 
        return 0;
 }
+
+struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
+                                struct ib_fmr_attr *fmr_attr)
+{
+       struct mlx4_ib_dev *dev = to_mdev(pd->device);
+       struct mlx4_ib_fmr *fmr;
+       int err = -ENOMEM;
+
+       fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
+       if (!fmr)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
+                            fmr_attr->max_pages, fmr_attr->max_maps,
+                            fmr_attr->page_shift, &fmr->mfmr);
+       if (err)
+               goto err_free;
+
+       err = mlx4_mr_enable(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
+       if (err)
+               goto err_mr;
+
+       fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
+
+       return &fmr->ibfmr;
+
+err_mr:
+       mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
+
+err_free:
+       kfree(fmr);
+
+       return ERR_PTR(err);
+}
+
+int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+                     int npages, u64 iova)
+{
+       struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
+       struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
+
+       return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
+                                &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
+}
+
+int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
+{
+       struct ib_fmr *ibfmr;
+       int err;
+       struct mlx4_dev *mdev = NULL;
+
+       list_for_each_entry(ibfmr, fmr_list, list) {
+               if (mdev && to_mdev(ibfmr->device)->dev != mdev)
+                       return -EINVAL;
+               mdev = to_mdev(ibfmr->device)->dev;
+       }
+
+       if (!mdev)
+               return 0;
+
+       list_for_each_entry(ibfmr, fmr_list, list) {
+               struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
+
+               mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
+       }
+
+       /*
+        * Make sure all MPT status updates are visible before issuing
+        * SYNC_TPT firmware command.
+        */
+       wmb();
+
+       err = mlx4_SYNC_TPT(mdev);
+       if (err)
+               printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
+                      "unmapping FMRs\n", err);
+
+       return 0;
+}
+
+int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
+{
+       struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
+       struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
+       int err;
+
+       err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
+
+       if (!err)
+               kfree(ifmr);
+
+       return err;
+}
index 85c51bdc36f1d403130fdf81655c51c7eca6e817..31a480e5b0d03064e0597765e16ae62791402d33 100644 (file)
@@ -1249,6 +1249,13 @@ static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
        dseg->byte_count = cpu_to_be32(sg->length);
 }
 
+static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+       dseg->byte_count = cpu_to_be32(sg->length);
+       dseg->lkey       = cpu_to_be32(sg->lkey);
+       dseg->addr       = cpu_to_be64(sg->addr);
+}
+
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr)
 {
@@ -1464,11 +1471,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
                scat = get_recv_wqe(qp, ind);
 
-               for (i = 0; i < wr->num_sge; ++i) {
-                       scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
-                       scat[i].lkey       = cpu_to_be32(wr->sg_list[i].lkey);
-                       scat[i].addr       = cpu_to_be64(wr->sg_list[i].addr);
-               }
+               for (i = 0; i < wr->num_sge; ++i)
+                       __set_data_seg(scat + i, wr->sg_list + i);
 
                if (i < qp->rq.max_gs) {
                        scat[i].byte_count = 0;
index 408748fb5285697d24c44b8cd5bebbb19a780f68..e7e9a3d0dac343f5dbd333d87a1839be635f12db 100644 (file)
@@ -251,7 +251,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
        if (ret)
                return ret;
 
-       srq_attr->srq_limit = be16_to_cpu(limit_watermark);
+       srq_attr->srq_limit = limit_watermark;
        srq_attr->max_wr    = srq->msrq.max - 1;
        srq_attr->max_sge   = srq->msrq.max_gs;
 
index acc95892713a02c9da6f479d519db43f4de49e16..6966f943f44036b2fee4c07ccb4f0eda25a0956f 100644 (file)
@@ -290,6 +290,12 @@ static int mthca_cmd_post(struct mthca_dev *dev,
                err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
                                         op_modifier, op, token, event);
 
+       /*
+        * Make sure that our HCR writes don't get mixed in with
+        * writes from another CPU starting a FW command.
+        */
+       mmiowb();
+
        mutex_unlock(&dev->cmd.hcr_mutex);
        return err;
 }
index 9bae3cc606039660884a72e97e06cfa7db765c43..15aa32eb78b6b6b093c42e8f419448d042a4596d 100644 (file)
@@ -83,7 +83,7 @@ enum {
        MTHCA_QP_CONTEXT_SIZE = 0x200,
        MTHCA_RDB_ENTRY_SIZE  =  0x20,
        MTHCA_AV_SIZE         =  0x20,
-       MTHCA_MGM_ENTRY_SIZE  =  0x40,
+       MTHCA_MGM_ENTRY_SIZE  = 0x100,
 
        /* Arbel FW gives us these, but we need them for Tavor */
        MTHCA_MPT_ENTRY_SIZE  =  0x40,
index 76fed7545c536334c6bb5567ae87aca4df1df6f9..60de6f93869e7487fa91696950507b82e22b4267 100644 (file)
@@ -61,7 +61,7 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
 
 #ifdef CONFIG_PCI_MSI
 
-static int msi_x = 0;
+static int msi_x = 1;
 module_param(msi_x, int, 0444);
 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
 
@@ -137,40 +137,23 @@ static const char mthca_version[] __devinitdata =
 
 static int mthca_tune_pci(struct mthca_dev *mdev)
 {
-       int cap;
-       u16 val;
-
        if (!tune_pci)
                return 0;
 
        /* First try to max out Read Byte Count */
-       cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
-       if (cap) {
-               if (pci_read_config_word(mdev->pdev, cap + PCI_X_CMD, &val)) {
-                       mthca_err(mdev, "Couldn't read PCI-X command register, "
-                                 "aborting.\n");
-                       return -ENODEV;
-               }
-               val = (val & ~PCI_X_CMD_MAX_READ) | (3 << 2);
-               if (pci_write_config_word(mdev->pdev, cap + PCI_X_CMD, val)) {
-                       mthca_err(mdev, "Couldn't write PCI-X command register, "
-                                 "aborting.\n");
+       if (pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX)) {
+               if (pcix_set_mmrbc(mdev->pdev, pcix_get_max_mmrbc(mdev->pdev))) {
+                       mthca_err(mdev, "Couldn't set PCI-X max read count, "
+                               "aborting.\n");
                        return -ENODEV;
                }
        } else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE))
                mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
 
-       cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
-       if (cap) {
-               if (pci_read_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, &val)) {
-                       mthca_err(mdev, "Couldn't read PCI Express device control "
-                                 "register, aborting.\n");
-                       return -ENODEV;
-               }
-               val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12);
-               if (pci_write_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, val)) {
-                       mthca_err(mdev, "Couldn't write PCI Express device control "
-                                 "register, aborting.\n");
+       if (pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP)) {
+               if (pcie_set_readrq(mdev->pdev, 4096)) {
+                       mthca_err(mdev, "Couldn't write PCI Express read request, "
+                               "aborting.\n");
                        return -ENODEV;
                }
        } else if (mdev->mthca_flags & MTHCA_FLAG_PCIE)
@@ -833,14 +816,19 @@ static int mthca_setup_hca(struct mthca_dev *dev)
 
        err = mthca_NOP(dev, &status);
        if (err || status) {
-               mthca_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting.\n",
-                         dev->mthca_flags & MTHCA_FLAG_MSI_X ?
-                         dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector :
-                         dev->pdev->irq);
-               if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))
-                       mthca_err(dev, "Try again with MSI/MSI-X disabled.\n");
-               else
+               if (dev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X)) {
+                       mthca_warn(dev, "NOP command failed to generate interrupt "
+                                  "(IRQ %d).\n",
+                                  dev->mthca_flags & MTHCA_FLAG_MSI_X ?
+                                  dev->eq_table.eq[MTHCA_EQ_CMD].msi_x_vector :
+                                  dev->pdev->irq);
+                       mthca_warn(dev, "Trying again with MSI/MSI-X disabled.\n");
+               } else {
+                       mthca_err(dev, "NOP command failed to generate interrupt "
+                                 "(IRQ %d), aborting.\n",
+                                 dev->pdev->irq);
                        mthca_err(dev, "BIOS or ACPI interrupt routing problem?\n");
+               }
 
                goto err_cmd_poll;
        }
@@ -1115,24 +1103,6 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
                goto err_free_dev;
        }
 
-       if (msi_x && !mthca_enable_msi_x(mdev))
-               mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
-       else if (msi) {
-               static int warned;
-
-               if (!warned) {
-                       printk(KERN_WARNING PFX "WARNING: MSI support will be "
-                              "removed from the ib_mthca driver in January 2008.\n");
-                       printk(KERN_WARNING "    If you are using MSI and cannot "
-                              "switch to MSI-X, please tell "
-                              "<general@lists.openfabrics.org>.\n");
-                       ++warned;
-               }
-
-               if (!pci_enable_msi(pdev))
-                       mdev->mthca_flags |= MTHCA_FLAG_MSI;
-       }
-
        if (mthca_cmd_init(mdev)) {
                mthca_err(mdev, "Failed to init command interface, aborting.\n");
                goto err_free_dev;
@@ -1156,7 +1126,35 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
                mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
        }
 
+       if (msi_x && !mthca_enable_msi_x(mdev))
+               mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
+       else if (msi) {
+               static int warned;
+
+               if (!warned) {
+                       printk(KERN_WARNING PFX "WARNING: MSI support will be "
+                              "removed from the ib_mthca driver in January 2008.\n");
+                       printk(KERN_WARNING "    If you are using MSI and cannot "
+                              "switch to MSI-X, please tell "
+                              "<general@lists.openfabrics.org>.\n");
+                       ++warned;
+               }
+
+               if (!pci_enable_msi(pdev))
+                       mdev->mthca_flags |= MTHCA_FLAG_MSI;
+       }
+
        err = mthca_setup_hca(mdev);
+       if (err == -EBUSY && (mdev->mthca_flags & (MTHCA_FLAG_MSI | MTHCA_FLAG_MSI_X))) {
+               if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+                       pci_disable_msix(pdev);
+               if (mdev->mthca_flags & MTHCA_FLAG_MSI)
+                       pci_disable_msi(pdev);
+               mdev->mthca_flags &= ~(MTHCA_FLAG_MSI_X | MTHCA_FLAG_MSI);
+
+               err = mthca_setup_hca(mdev);
+       }
+
        if (err)
                goto err_close;
 
@@ -1192,17 +1190,17 @@ err_cleanup:
        mthca_cleanup_uar_table(mdev);
 
 err_close:
+       if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
+               pci_disable_msix(pdev);
+       if (mdev->mthca_flags & MTHCA_FLAG_MSI)
+               pci_disable_msi(pdev);
+
        mthca_close_hca(mdev);
 
 err_cmd:
        mthca_cmd_cleanup(mdev);
 
 err_free_dev:
-       if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
-               pci_disable_msix(pdev);
-       if (mdev->mthca_flags & MTHCA_FLAG_MSI)
-               pci_disable_msi(pdev);
-
        ib_dealloc_device(&mdev->ib_dev);
 
 err_free_res:
index 88d219e730ad5a2d773a6206512fabae40bd07a2..3f58c11a62b77f2618714e682ad8aea7bddf710a 100644 (file)
@@ -509,7 +509,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
        for (nreq = 0; wr; wr = wr->next) {
                ind = srq->first_free;
 
-               if (ind < 0) {
+               if (unlikely(ind < 0)) {
                        mthca_err(dev, "SRQ %06x full\n", srq->srqn);
                        err = -ENOMEM;
                        *bad_wr = wr;
@@ -519,7 +519,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
                wqe       = get_wqe(srq, ind);
                next_ind  = *wqe_to_link(wqe);
 
-               if (next_ind < 0) {
+               if (unlikely(next_ind < 0)) {
                        mthca_err(dev, "SRQ %06x full\n", srq->srqn);
                        err = -ENOMEM;
                        *bad_wr = wr;
@@ -623,7 +623,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
                ind = srq->first_free;
 
-               if (ind < 0) {
+               if (unlikely(ind < 0)) {
                        mthca_err(dev, "SRQ %06x full\n", srq->srqn);
                        err = -ENOMEM;
                        *bad_wr = wr;
@@ -633,7 +633,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
                wqe       = get_wqe(srq, ind);
                next_ind  = *wqe_to_link(wqe);
 
-               if (next_ind < 0) {
+               if (unlikely(next_ind < 0)) {
                        mthca_err(dev, "SRQ %06x full\n", srq->srqn);
                        err = -ENOMEM;
                        *bad_wr = wr;
index 34c6128d2a34836902307260ec6d158eea24e30b..6545fa798b12664e96be153eaaec305249c2c9d8 100644 (file)
@@ -86,6 +86,7 @@ enum {
        IPOIB_MCAST_STARTED       = 8,
        IPOIB_FLAG_NETIF_STOPPED  = 9,
        IPOIB_FLAG_ADMIN_CM       = 10,
+       IPOIB_FLAG_UMCAST         = 11,
 
        IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -113,7 +114,27 @@ struct ipoib_pseudoheader {
        u8  hwaddr[INFINIBAND_ALEN];
 };
 
-struct ipoib_mcast;
+/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
+struct ipoib_mcast {
+       struct ib_sa_mcmember_rec mcmember;
+       struct ib_sa_multicast   *mc;
+       struct ipoib_ah          *ah;
+
+       struct rb_node    rb_node;
+       struct list_head  list;
+
+       unsigned long created;
+       unsigned long backoff;
+
+       unsigned long flags;
+       unsigned char logcount;
+
+       struct list_head  neigh_list;
+
+       struct sk_buff_head pkt_queue;
+
+       struct net_device *dev;
+};
 
 struct ipoib_rx_buf {
        struct sk_buff *skb;
@@ -364,6 +385,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
 
 int ipoib_open(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
+int ipoib_add_umcast_attr(struct net_device *dev);
 
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
                struct ipoib_ah *address, u32 qpn);
index 1afd93cdd6bbb759bafd3878cbb962a0798efce3..0a0dcb8fdfd1ac2ca54ec326974341a6ff187baa 100644 (file)
@@ -810,14 +810,16 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ib_qp_init_attr attr = {};
-       attr.recv_cq = priv->cq;
-       attr.srq = priv->cm.srq;
-       attr.cap.max_send_wr = ipoib_sendq_size;
-       attr.cap.max_send_sge = 1;
-       attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-       attr.qp_type = IB_QPT_RC;
-       attr.send_cq = cq;
+       struct ib_qp_init_attr attr = {
+               .send_cq                = cq,
+               .recv_cq                = priv->cq,
+               .srq                    = priv->cm.srq,
+               .cap.max_send_wr        = ipoib_sendq_size,
+               .cap.max_send_sge       = 1,
+               .sq_sig_type            = IB_SIGNAL_ALL_WR,
+               .qp_type                = IB_QPT_RC,
+        };
+
        return ib_create_qp(priv->pd, &attr);
 }
 
index 0ec28c302fbf154c999541f4f14f477668530b3e..1a77e79f6b432748accdbb9a813f9a257aa4c1c6 100644 (file)
@@ -553,6 +553,14 @@ void ipoib_drain_cq(struct net_device *dev)
        do {
                n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
                for (i = 0; i < n; ++i) {
+                       /*
+                        * Convert any successful completions to flush
+                        * errors to avoid passing packets up the
+                        * stack after bringing the device down.
+                        */
+                       if (priv->ibwc[i].status == IB_WC_SUCCESS)
+                               priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
+
                        if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
                                ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
                        else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
index 855c9deca8b716569d7014a0117ad220d00b6c73..e072f3c32ce6f307aa3bdaf7935557b4bc5a53d0 100644 (file)
@@ -473,9 +473,10 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
        INIT_LIST_HEAD(&path->neigh_list);
 
        memcpy(path->pathrec.dgid.raw, gid, sizeof (union ib_gid));
-       path->pathrec.sgid      = priv->local_gid;
-       path->pathrec.pkey      = cpu_to_be16(priv->pkey);
-       path->pathrec.numb_path = 1;
+       path->pathrec.sgid          = priv->local_gid;
+       path->pathrec.pkey          = cpu_to_be16(priv->pkey);
+       path->pathrec.numb_path     = 1;
+       path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
 
        return path;
 }
@@ -496,6 +497,7 @@ static int path_rec_start(struct net_device *dev,
                                   IB_SA_PATH_REC_DGID          |
                                   IB_SA_PATH_REC_SGID          |
                                   IB_SA_PATH_REC_NUMB_PATH     |
+                                  IB_SA_PATH_REC_TRAFFIC_CLASS |
                                   IB_SA_PATH_REC_PKEY,
                                   1000, GFP_ATOMIC,
                                   path_rec_completion,
@@ -1015,6 +1017,37 @@ static ssize_t show_pkey(struct device *dev,
 }
 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
 
+static ssize_t show_umcast(struct device *dev,
+                          struct device_attribute *attr, char *buf)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+
+       return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
+}
+
+static ssize_t set_umcast(struct device *dev,
+                         struct device_attribute *attr,
+                         const char *buf, size_t count)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
+       unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
+
+       if (umcast_val > 0) {
+               set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
+               ipoib_warn(priv, "ignoring multicast groups joined directly "
+                               "by userspace\n");
+       } else
+               clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
+
+       return count;
+}
+static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast);
+
+int ipoib_add_umcast_attr(struct net_device *dev)
+{
+       return device_create_file(&dev->dev, &dev_attr_umcast);
+}
+
 static ssize_t create_child(struct device *dev,
                            struct device_attribute *attr,
                            const char *buf, size_t count)
@@ -1081,7 +1114,7 @@ static struct net_device *ipoib_add_port(const char *format,
        if (result) {
                printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
                       hca->name, port, result);
-               goto alloc_mem_failed;
+               goto device_init_failed;
        }
 
        /*
@@ -1097,7 +1130,7 @@ static struct net_device *ipoib_add_port(const char *format,
        if (result) {
                printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
                       hca->name, port, result);
-               goto alloc_mem_failed;
+               goto device_init_failed;
        } else
                memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
 
@@ -1132,6 +1165,8 @@ static struct net_device *ipoib_add_port(const char *format,
                goto sysfs_failed;
        if (ipoib_add_pkey_attr(priv->dev))
                goto sysfs_failed;
+       if (ipoib_add_umcast_attr(priv->dev))
+               goto sysfs_failed;
        if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
                goto sysfs_failed;
        if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
index 98e904a7f3e8f3226f6119a98eb8a22cdf7bd77f..827820ec66d1f3db03cb3a23698ef0244176d9e2 100644 (file)
@@ -57,28 +57,6 @@ MODULE_PARM_DESC(mcast_debug_level,
 
 static DEFINE_MUTEX(mcast_mutex);
 
-/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
-struct ipoib_mcast {
-       struct ib_sa_mcmember_rec mcmember;
-       struct ib_sa_multicast   *mc;
-       struct ipoib_ah          *ah;
-
-       struct rb_node    rb_node;
-       struct list_head  list;
-
-       unsigned long created;
-       unsigned long backoff;
-
-       unsigned long flags;
-       unsigned char logcount;
-
-       struct list_head  neigh_list;
-
-       struct sk_buff_head pkt_queue;
-
-       struct net_device *dev;
-};
-
 struct ipoib_mcast_iter {
        struct net_device *dev;
        union ib_gid       mgid;
@@ -783,6 +761,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
        struct ipoib_mcast *mcast, *tmcast;
        LIST_HEAD(remove_list);
        unsigned long flags;
+       struct ib_sa_mcmember_rec rec;
 
        ipoib_dbg_mcast(priv, "restarting multicast task\n");
 
@@ -816,6 +795,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
                if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
                        struct ipoib_mcast *nmcast;
 
+                       /* ignore group which is directly joined by userspace */
+                       if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
+                           !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
+                               ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid "
+                                               IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
+                               continue;
+                       }
+
                        /* Not found or send-only group, let's add a new entry */
                        ipoib_dbg_mcast(priv, "adding multicast entry for mgid "
                                        IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
index 563aeacf9e14088cc9054030f418d4c0bf717756..3c6e45db0ab5079b3c36c39ec0bb34ce597a6055 100644 (file)
@@ -185,7 +185,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                goto out_free_cq;
 
        init_attr.send_cq = priv->cq;
-       init_attr.recv_cq = priv->cq,
+       init_attr.recv_cq = priv->cq;
 
        priv->qp = ib_create_qp(priv->pd, &init_attr);
        if (IS_ERR(priv->qp)) {
index 6762988439d1f6fcd96b82aad8a25f51e7bf5a86..293f5b892e3f5f20d33ababa2a50f1688254f891 100644 (file)
@@ -119,6 +119,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
                goto sysfs_failed;
        if (ipoib_add_pkey_attr(priv->dev))
                goto sysfs_failed;
+       if (ipoib_add_umcast_attr(priv->dev))
+               goto sysfs_failed;
 
        if (device_create_file(&priv->dev->dev, &dev_attr_parent))
                goto sysfs_failed;
index 9ea5b9aaba7c679059f3b0c8d298506a152facc7..a6f2303ed14a40016fe09bd1e13757fe6b49467b 100644 (file)
@@ -34,8 +34,6 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <asm/io.h>
-#include <asm/scatterlist.h>
 #include <linux/scatterlist.h>
 #include <linux/kfifo.h>
 #include <scsi/scsi_cmnd.h>
index 36cdf77ae92abd79d1bb34818052034c92e16648..e05690e3592ffeefc154e69009053588d361d434 100644 (file)
@@ -36,8 +36,6 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
-#include <asm/io.h>
-#include <asm/scatterlist.h>
 #include <linux/scatterlist.h>
 
 #include "iscsi_iser.h"
index d42ec0156eec7bb215317ea649a8140da451785a..654a4dce0236ac8dfd65ceacb946c0d684891e88 100644 (file)
@@ -32,7 +32,6 @@
  *
  * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $
  */
-#include <asm/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
index f6a05142814447fb3106fb7037e0b541d0a908c7..9ccc63886d92904911a46a1630747478df12693d 100644 (file)
@@ -285,6 +285,7 @@ static int srp_lookup_path(struct srp_target_port *target)
                                                   target->srp_host->dev->dev,
                                                   target->srp_host->port,
                                                   &target->path,
+                                                  IB_SA_PATH_REC_SERVICE_ID    |
                                                   IB_SA_PATH_REC_DGID          |
                                                   IB_SA_PATH_REC_SGID          |
                                                   IB_SA_PATH_REC_NUMB_PATH     |
@@ -1692,6 +1693,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
                                goto out;
                        }
                        target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
+                       target->path.service_id = target->service_id;
                        kfree(p);
                        break;
 
index a9f31753661acc66a54ca7930b78781524187273..db49051b97b168b7a004e52c865bef94311ce56a 100644 (file)
@@ -95,7 +95,7 @@ enum {
 };
 
 enum {
-       GO_BIT_TIMEOUT          = 10000
+       GO_BIT_TIMEOUT_MSECS    = 10000
 };
 
 struct mlx4_cmd_context {
@@ -155,7 +155,7 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 
        end = jiffies;
        if (event)
-               end += HZ * 10;
+               end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
 
        while (cmd_pending(dev)) {
                if (time_after_eq(jiffies, end))
@@ -184,6 +184,13 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
                                               (event ? (1 << HCR_E_BIT) : 0)   |
                                               (op_modifier << HCR_OPMOD_SHIFT) |
                                               op),                       hcr + 6);
+
+       /*
+        * Make sure that our HCR writes don't get mixed in with
+        * writes from another CPU starting a FW command.
+        */
+       mmiowb();
+
        cmd->toggle = cmd->toggle ^ 1;
 
        ret = 0;
index 39253d0c1590eca709e54f5bab833990926e0860..d4441fee3d80b9dc44fffd6a48b9e3f4a37e71e0 100644 (file)
@@ -231,7 +231,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
 }
 EXPORT_SYMBOL_GPL(mlx4_cq_free);
 
-int __devinit mlx4_init_cq_table(struct mlx4_dev *dev)
+int mlx4_init_cq_table(struct mlx4_dev *dev)
 {
        struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
        int err;
index 2095c843fa15067dd25e67a82119d89e53c15368..9c36c2034030afbfd5250e7817f865968d02e73b 100644 (file)
@@ -300,8 +300,7 @@ static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
                            MLX4_CMD_TIME_CLASS_A);
 }
 
-static void __devinit __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev,
-                                              struct mlx4_eq *eq)
+static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        int index;
@@ -323,8 +322,8 @@ static void __devinit __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev,
        return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
 }
 
-static int __devinit mlx4_create_eq(struct mlx4_dev *dev, int nent,
-                                   u8 intr, struct mlx4_eq *eq)
+static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
+                         u8 intr, struct mlx4_eq *eq)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_cmd_mailbox *mailbox;
@@ -485,7 +484,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
                        free_irq(eq_table->eq[i].irq, eq_table->eq + i);
 }
 
-static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev)
+static int mlx4_map_clr_int(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
 
@@ -506,7 +505,7 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
        iounmap(priv->clr_base);
 }
 
-int __devinit mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
+int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        int ret;
@@ -548,7 +547,7 @@ void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
        __free_page(priv->eq_table.icm_page);
 }
 
-int __devinit mlx4_init_eq_table(struct mlx4_dev *dev)
+int mlx4_init_eq_table(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        int err;
index c45cbe43a0c483830891e109beb306d11dc51d2a..6471d33afb7d8509b927105bcb89280aa4e76ce2 100644 (file)
@@ -76,7 +76,7 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u32 flags)
                [ 0] = "RC transport",
                [ 1] = "UC transport",
                [ 2] = "UD transport",
-               [ 3] = "SRC transport",
+               [ 3] = "XRC transport",
                [ 4] = "reliable multicast",
                [ 5] = "FCoIB support",
                [ 6] = "SRQ support",
index b7a4aa8476fb48b095819276b4e6732fe7e83ac4..4b3c109d5eaee2d6f7c73a5a31db57bca749c053 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
+#include <linux/scatterlist.h>
 
 #include <linux/mlx4/cmd.h>
 
@@ -50,19 +51,41 @@ enum {
        MLX4_TABLE_CHUNK_SIZE   = 1 << 18
 };
 
-void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm)
+static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
 {
-       struct mlx4_icm_chunk *chunk, *tmp;
        int i;
 
-       list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
-               if (chunk->nsg > 0)
-                       pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
-                                    PCI_DMA_BIDIRECTIONAL);
+       if (chunk->nsg > 0)
+               pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
+                            PCI_DMA_BIDIRECTIONAL);
+
+       for (i = 0; i < chunk->npages; ++i)
+               __free_pages(chunk->mem[i].page,
+                            get_order(chunk->mem[i].length));
+}
+
+static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
+{
+       int i;
+
+       for (i = 0; i < chunk->npages; ++i)
+               dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
+                                 lowmem_page_address(chunk->mem[i].page),
+                                 sg_dma_address(&chunk->mem[i]));
+}
+
+void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
+{
+       struct mlx4_icm_chunk *chunk, *tmp;
 
-               for (i = 0; i < chunk->npages; ++i)
-                       __free_pages(chunk->mem[i].page,
-                                    get_order(chunk->mem[i].length));
+       if (!icm)
+               return;
+
+       list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
+               if (coherent)
+                       mlx4_free_icm_coherent(dev, chunk);
+               else
+                       mlx4_free_icm_pages(dev, chunk);
 
                kfree(chunk);
        }
@@ -70,16 +93,45 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm)
        kfree(icm);
 }
 
+static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
+{
+       mem->page = alloc_pages(gfp_mask, order);
+       if (!mem->page)
+               return -ENOMEM;
+
+       mem->length = PAGE_SIZE << order;
+       mem->offset = 0;
+       return 0;
+}
+
+static int mlx4_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
+                                   int order, gfp_t gfp_mask)
+{
+       void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order,
+                                      &sg_dma_address(mem), gfp_mask);
+       if (!buf)
+               return -ENOMEM;
+
+       sg_set_buf(mem, buf, PAGE_SIZE << order);
+       BUG_ON(mem->offset);
+       sg_dma_len(mem) = PAGE_SIZE << order;
+       return 0;
+}
+
 struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
-                               gfp_t gfp_mask)
+                               gfp_t gfp_mask, int coherent)
 {
        struct mlx4_icm *icm;
        struct mlx4_icm_chunk *chunk = NULL;
        int cur_order;
+       int ret;
+
+       /* We use sg_set_buf for coherent allocs, which assumes low memory */
+       BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
 
        icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
        if (!icm)
-               return icm;
+               return NULL;
 
        icm->refcount = 0;
        INIT_LIST_HEAD(&icm->chunk_list);
@@ -101,12 +153,20 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
                while (1 << cur_order > npages)
                        --cur_order;
 
-               chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order);
-               if (chunk->mem[chunk->npages].page) {
-                       chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order;
-                       chunk->mem[chunk->npages].offset = 0;
+               if (coherent)
+                       ret = mlx4_alloc_icm_coherent(&dev->pdev->dev,
+                                                     &chunk->mem[chunk->npages],
+                                                     cur_order, gfp_mask);
+               else
+                       ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
+                                                  cur_order, gfp_mask);
+
+               if (!ret) {
+                       ++chunk->npages;
 
-                       if (++chunk->npages == MLX4_ICM_CHUNK_LEN) {
+                       if (coherent)
+                               ++chunk->nsg;
+                       else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
                                chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
                                                        chunk->npages,
                                                        PCI_DMA_BIDIRECTIONAL);
@@ -125,7 +185,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
                }
        }
 
-       if (chunk) {
+       if (!coherent && chunk) {
                chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
                                        chunk->npages,
                                        PCI_DMA_BIDIRECTIONAL);
@@ -137,7 +197,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
        return icm;
 
 fail:
-       mlx4_free_icm(dev, icm);
+       mlx4_free_icm(dev, icm, coherent);
        return NULL;
 }
 
@@ -202,7 +262,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
 
        table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
                                       (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
-                                      __GFP_NOWARN);
+                                      __GFP_NOWARN, table->coherent);
        if (!table->icm[i]) {
                ret = -ENOMEM;
                goto out;
@@ -210,7 +270,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
 
        if (mlx4_MAP_ICM(dev, table->icm[i], table->virt +
                         (u64) i * MLX4_TABLE_CHUNK_SIZE)) {
-               mlx4_free_icm(dev, table->icm[i]);
+               mlx4_free_icm(dev, table->icm[i], table->coherent);
                table->icm[i] = NULL;
                ret = -ENOMEM;
                goto out;
@@ -234,16 +294,16 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
        if (--table->icm[i]->refcount == 0) {
                mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
                               MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
-               mlx4_free_icm(dev, table->icm[i]);
+               mlx4_free_icm(dev, table->icm[i], table->coherent);
                table->icm[i] = NULL;
        }
 
        mutex_unlock(&table->mutex);
 }
 
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj)
+void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
 {
-       int idx, offset, i;
+       int idx, offset, dma_offset, i;
        struct mlx4_icm_chunk *chunk;
        struct mlx4_icm *icm;
        struct page *page = NULL;
@@ -253,15 +313,26 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj)
 
        mutex_lock(&table->mutex);
 
-       idx = obj & (table->num_obj - 1);
-       icm = table->icm[idx / (MLX4_TABLE_CHUNK_SIZE / table->obj_size)];
-       offset = idx % (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
+       idx = (obj & (table->num_obj - 1)) * table->obj_size;
+       icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
+       dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
 
        if (!icm)
                goto out;
 
        list_for_each_entry(chunk, &icm->chunk_list, list) {
                for (i = 0; i < chunk->npages; ++i) {
+                       if (dma_handle && dma_offset >= 0) {
+                               if (sg_dma_len(&chunk->mem[i]) > dma_offset)
+                                       *dma_handle = sg_dma_address(&chunk->mem[i]) +
+                                               dma_offset;
+                               dma_offset -= sg_dma_len(&chunk->mem[i]);
+                       }
+                       /*
+                        * DMA mapping can merge pages but not split them,
+                        * so if we found the page, dma_handle has already
+                        * been assigned to.
+                        */
                        if (chunk->mem[i].length > offset) {
                                page = chunk->mem[i].page;
                                goto out;
@@ -309,7 +380,7 @@ void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 
 int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
                        u64 virt, int obj_size, int nobj, int reserved,
-                       int use_lowmem)
+                       int use_lowmem, int use_coherent)
 {
        int obj_per_chunk;
        int num_icm;
@@ -327,6 +398,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
        table->num_obj  = nobj;
        table->obj_size = obj_size;
        table->lowmem   = use_lowmem;
+       table->coherent = use_coherent;
        mutex_init(&table->mutex);
 
        for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
@@ -336,11 +408,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 
                table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
                                               (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
-                                              __GFP_NOWARN);
+                                              __GFP_NOWARN, use_coherent);
                if (!table->icm[i])
                        goto err;
                if (mlx4_MAP_ICM(dev, table->icm[i], virt + i * MLX4_TABLE_CHUNK_SIZE)) {
-                       mlx4_free_icm(dev, table->icm[i]);
+                       mlx4_free_icm(dev, table->icm[i], use_coherent);
                        table->icm[i] = NULL;
                        goto err;
                }
@@ -359,7 +431,7 @@ err:
                if (table->icm[i]) {
                        mlx4_UNMAP_ICM(dev, virt + i * MLX4_TABLE_CHUNK_SIZE,
                                       MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
-                       mlx4_free_icm(dev, table->icm[i]);
+                       mlx4_free_icm(dev, table->icm[i], use_coherent);
                }
 
        return -ENOMEM;
@@ -373,7 +445,7 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
                if (table->icm[i]) {
                        mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
                                       MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
-                       mlx4_free_icm(dev, table->icm[i]);
+                       mlx4_free_icm(dev, table->icm[i], table->coherent);
                }
 
        kfree(table->icm);
index bea223d879a59c4f09d86ac6549876fb1bef0a14..6c44edf35847e07c1de518f2bc78d4dbe70947b7 100644 (file)
@@ -67,8 +67,9 @@ struct mlx4_icm_iter {
 
 struct mlx4_dev;
 
-struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, gfp_t gfp_mask);
-void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm);
+struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
+                               gfp_t gfp_mask, int coherent);
+void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
 
 int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
@@ -78,11 +79,11 @@ void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
                          int start, int end);
 int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
                        u64 virt, int obj_size, int nobj, int reserved,
-                       int use_lowmem);
+                       int use_lowmem, int use_coherent);
 void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
 int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
-void *mlx4_table_find(struct mlx4_icm_table *table, int obj);
+void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
                         int start, int end);
 void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
index 4dc9dc19b7167a22bc69e8750aaccec6ccb0417e..e029b8afbd370d8077dc21086ec742192159b180 100644 (file)
@@ -61,7 +61,7 @@ MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
 
 #ifdef CONFIG_PCI_MSI
 
-static int msi_x;
+static int msi_x = 1;
 module_param(msi_x, int, 0444);
 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
 
@@ -85,7 +85,7 @@ static struct mlx4_profile default_profile = {
        .num_mtt        = 1 << 20,
 };
 
-static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
        int err;
        int i;
@@ -149,7 +149,8 @@ static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev
        dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
        dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
        dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
-       dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
+       dev->caps.reserved_mtts      = DIV_ROUND_UP(dev_cap->reserved_mtts,
+                                                   MLX4_MTT_ENTRY_PER_SEG);
        dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
        dev->caps.reserved_uars      = dev_cap->reserved_uars;
        dev->caps.reserved_pds       = dev_cap->reserved_pds;
@@ -168,7 +169,7 @@ static int __devinit mlx4_load_fw(struct mlx4_dev *dev)
        int err;
 
        priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
-                                        GFP_HIGHUSER | __GFP_NOWARN);
+                                        GFP_HIGHUSER | __GFP_NOWARN, 0);
        if (!priv->fw.fw_icm) {
                mlx4_err(dev, "Couldn't allocate FW area, aborting.\n");
                return -ENOMEM;
@@ -192,7 +193,7 @@ err_unmap_fa:
        mlx4_UNMAP_FA(dev);
 
 err_free:
-       mlx4_free_icm(dev, priv->fw.fw_icm);
+       mlx4_free_icm(dev, priv->fw.fw_icm, 0);
        return err;
 }
 
@@ -207,7 +208,7 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
                                  ((u64) (MLX4_CMPT_TYPE_QP *
                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
                                  cmpt_entry_sz, dev->caps.num_qps,
-                                 dev->caps.reserved_qps, 0);
+                                 dev->caps.reserved_qps, 0, 0);
        if (err)
                goto err;
 
@@ -216,7 +217,7 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
                                  ((u64) (MLX4_CMPT_TYPE_SRQ *
                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
                                  cmpt_entry_sz, dev->caps.num_srqs,
-                                 dev->caps.reserved_srqs, 0);
+                                 dev->caps.reserved_srqs, 0, 0);
        if (err)
                goto err_qp;
 
@@ -225,7 +226,7 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
                                  ((u64) (MLX4_CMPT_TYPE_CQ *
                                          cmpt_entry_sz) << MLX4_CMPT_SHIFT),
                                  cmpt_entry_sz, dev->caps.num_cqs,
-                                 dev->caps.reserved_cqs, 0);
+                                 dev->caps.reserved_cqs, 0, 0);
        if (err)
                goto err_srq;
 
@@ -236,7 +237,7 @@ static int __devinit mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
                                  cmpt_entry_sz,
                                  roundup_pow_of_two(MLX4_NUM_EQ +
                                                     dev->caps.reserved_eqs),
-                                 MLX4_NUM_EQ + dev->caps.reserved_eqs, 0);
+                                 MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0);
        if (err)
                goto err_cq;
 
@@ -255,10 +256,8 @@ err:
        return err;
 }
 
-static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
-                                  struct mlx4_dev_cap *dev_cap,
-                                  struct mlx4_init_hca_param *init_hca,
-                                  u64 icm_size)
+static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
+                        struct mlx4_init_hca_param *init_hca, u64 icm_size)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        u64 aux_pages;
@@ -275,7 +274,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                 (unsigned long long) aux_pages << 2);
 
        priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
-                                         GFP_HIGHUSER | __GFP_NOWARN);
+                                         GFP_HIGHUSER | __GFP_NOWARN, 0);
        if (!priv->fw.aux_icm) {
                mlx4_err(dev, "Couldn't allocate aux memory, aborting.\n");
                return -ENOMEM;
@@ -299,11 +298,22 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                goto err_unmap_cmpt;
        }
 
+       /*
+        * Reserved MTT entries must be aligned up to a cacheline
+        * boundary, since the FW will write to them, while the driver
+        * writes to all other MTT entries. (The variable
+        * dev->caps.mtt_entry_sz below is really the MTT segment
+        * size, not the raw entry size)
+        */
+       dev->caps.reserved_mtts =
+               ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
+                     dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
+
        err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
                                  init_hca->mtt_base,
                                  dev->caps.mtt_entry_sz,
                                  dev->caps.num_mtt_segs,
-                                 dev->caps.reserved_mtts, 1);
+                                 dev->caps.reserved_mtts, 1, 0);
        if (err) {
                mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
                goto err_unmap_eq;
@@ -313,7 +323,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                                  init_hca->dmpt_base,
                                  dev_cap->dmpt_entry_sz,
                                  dev->caps.num_mpts,
-                                 dev->caps.reserved_mrws, 1);
+                                 dev->caps.reserved_mrws, 1, 1);
        if (err) {
                mlx4_err(dev, "Failed to map dMPT context memory, aborting.\n");
                goto err_unmap_mtt;
@@ -323,7 +333,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                                  init_hca->qpc_base,
                                  dev_cap->qpc_entry_sz,
                                  dev->caps.num_qps,
-                                 dev->caps.reserved_qps, 0);
+                                 dev->caps.reserved_qps, 0, 0);
        if (err) {
                mlx4_err(dev, "Failed to map QP context memory, aborting.\n");
                goto err_unmap_dmpt;
@@ -333,7 +343,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                                  init_hca->auxc_base,
                                  dev_cap->aux_entry_sz,
                                  dev->caps.num_qps,
-                                 dev->caps.reserved_qps, 0);
+                                 dev->caps.reserved_qps, 0, 0);
        if (err) {
                mlx4_err(dev, "Failed to map AUXC context memory, aborting.\n");
                goto err_unmap_qp;
@@ -343,7 +353,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                                  init_hca->altc_base,
                                  dev_cap->altc_entry_sz,
                                  dev->caps.num_qps,
-                                 dev->caps.reserved_qps, 0);
+                                 dev->caps.reserved_qps, 0, 0);
        if (err) {
                mlx4_err(dev, "Failed to map ALTC context memory, aborting.\n");
                goto err_unmap_auxc;
@@ -353,7 +363,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                                  init_hca->rdmarc_base,
                                  dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
                                  dev->caps.num_qps,
-                                 dev->caps.reserved_qps, 0);
+                                 dev->caps.reserved_qps, 0, 0);
        if (err) {
                mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
                goto err_unmap_altc;
@@ -363,7 +373,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                                  init_hca->cqc_base,
                                  dev_cap->cqc_entry_sz,
                                  dev->caps.num_cqs,
-                                 dev->caps.reserved_cqs, 0);
+                                 dev->caps.reserved_cqs, 0, 0);
        if (err) {
                mlx4_err(dev, "Failed to map CQ context memory, aborting.\n");
                goto err_unmap_rdmarc;
@@ -373,7 +383,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                                  init_hca->srqc_base,
                                  dev_cap->srq_entry_sz,
                                  dev->caps.num_srqs,
-                                 dev->caps.reserved_srqs, 0);
+                                 dev->caps.reserved_srqs, 0, 0);
        if (err) {
                mlx4_err(dev, "Failed to map SRQ context memory, aborting.\n");
                goto err_unmap_cq;
@@ -388,7 +398,7 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
                                  init_hca->mc_base, MLX4_MGM_ENTRY_SIZE,
                                  dev->caps.num_mgms + dev->caps.num_amgms,
                                  dev->caps.num_mgms + dev->caps.num_amgms,
-                                 0);
+                                 0, 0);
        if (err) {
                mlx4_err(dev, "Failed to map MCG context memory, aborting.\n");
                goto err_unmap_srq;
@@ -433,7 +443,7 @@ err_unmap_aux:
        mlx4_UNMAP_ICM_AUX(dev);
 
 err_free_aux:
-       mlx4_free_icm(dev, priv->fw.aux_icm);
+       mlx4_free_icm(dev, priv->fw.aux_icm, 0);
 
        return err;
 }
@@ -458,7 +468,7 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
        mlx4_unmap_eq_icm(dev);
 
        mlx4_UNMAP_ICM_AUX(dev);
-       mlx4_free_icm(dev, priv->fw.aux_icm);
+       mlx4_free_icm(dev, priv->fw.aux_icm, 0);
 }
 
 static void mlx4_close_hca(struct mlx4_dev *dev)
@@ -466,10 +476,10 @@ static void mlx4_close_hca(struct mlx4_dev *dev)
        mlx4_CLOSE_HCA(dev, 0);
        mlx4_free_icms(dev);
        mlx4_UNMAP_FA(dev);
-       mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm);
+       mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
 }
 
-static int __devinit mlx4_init_hca(struct mlx4_dev *dev)
+static int mlx4_init_hca(struct mlx4_dev *dev)
 {
        struct mlx4_priv          *priv = mlx4_priv(dev);
        struct mlx4_adapter        adapter;
@@ -524,8 +534,8 @@ static int __devinit mlx4_init_hca(struct mlx4_dev *dev)
        }
 
        priv->eq_table.inta_pin = adapter.inta_pin;
-       priv->rev_id            = adapter.revision_id;
-       memcpy(priv->board_id, adapter.board_id, sizeof priv->board_id);
+       dev->rev_id             = adapter.revision_id;
+       memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
 
        return 0;
 
@@ -537,12 +547,12 @@ err_free_icm:
 
 err_stop_fw:
        mlx4_UNMAP_FA(dev);
-       mlx4_free_icm(dev, priv->fw.fw_icm);
+       mlx4_free_icm(dev, priv->fw.fw_icm, 0);
 
        return err;
 }
 
-static int __devinit mlx4_setup_hca(struct mlx4_dev *dev)
+static int mlx4_setup_hca(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        int err;
@@ -599,13 +609,17 @@ static int __devinit mlx4_setup_hca(struct mlx4_dev *dev)
 
        err = mlx4_NOP(dev);
        if (err) {
-               mlx4_err(dev, "NOP command failed to generate interrupt "
-                        "(IRQ %d), aborting.\n",
-                        priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
-               if (dev->flags & MLX4_FLAG_MSI_X)
-                       mlx4_err(dev, "Try again with MSI-X disabled.\n");
-               else
+               if (dev->flags & MLX4_FLAG_MSI_X) {
+                       mlx4_warn(dev, "NOP command failed to generate MSI-X "
+                                 "interrupt IRQ %d).\n",
+                                 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+                       mlx4_warn(dev, "Trying again without MSI-X.\n");
+               } else {
+                       mlx4_err(dev, "NOP command failed to generate interrupt "
+                                "(IRQ %d), aborting.\n",
+                                priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
                        mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
+               }
 
                goto err_cmd_poll;
        }
@@ -705,19 +719,12 @@ no_msi:
                priv->eq_table.eq[i].irq = dev->pdev->irq;
 }
 
-static int __devinit mlx4_init_one(struct pci_dev *pdev,
-                                  const struct pci_device_id *id)
+static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-       static int mlx4_version_printed;
        struct mlx4_priv *priv;
        struct mlx4_dev *dev;
        int err;
 
-       if (!mlx4_version_printed) {
-               printk(KERN_INFO "%s", mlx4_version);
-               ++mlx4_version_printed;
-       }
-
        printk(KERN_INFO PFX "Initializing %s\n",
               pci_name(pdev));
 
@@ -803,8 +810,6 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev,
                goto err_free_dev;
        }
 
-       mlx4_enable_msi_x(dev);
-
        if (mlx4_cmd_init(dev)) {
                mlx4_err(dev, "Failed to init command interface, aborting.\n");
                goto err_free_dev;
@@ -814,7 +819,15 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev,
        if (err)
                goto err_cmd;
 
+       mlx4_enable_msi_x(dev);
+
        err = mlx4_setup_hca(dev);
+       if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
+               dev->flags &= ~MLX4_FLAG_MSI_X;
+               pci_disable_msix(pdev);
+               err = mlx4_setup_hca(dev);
+       }
+
        if (err)
                goto err_close;
 
@@ -838,15 +851,15 @@ err_cleanup:
        mlx4_cleanup_uar_table(dev);
 
 err_close:
+       if (dev->flags & MLX4_FLAG_MSI_X)
+               pci_disable_msix(pdev);
+
        mlx4_close_hca(dev);
 
 err_cmd:
        mlx4_cmd_cleanup(dev);
 
 err_free_dev:
-       if (dev->flags & MLX4_FLAG_MSI_X)
-               pci_disable_msix(pdev);
-
        kfree(priv);
 
 err_release_bar2:
@@ -861,7 +874,20 @@ err_disable_pdev:
        return err;
 }
 
-static void __devexit mlx4_remove_one(struct pci_dev *pdev)
+static int __devinit mlx4_init_one(struct pci_dev *pdev,
+                                  const struct pci_device_id *id)
+{
+       static int mlx4_version_printed;
+
+       if (!mlx4_version_printed) {
+               printk(KERN_INFO "%s", mlx4_version);
+               ++mlx4_version_printed;
+       }
+
+       return mlx4_init_one(pdev, id);
+}
+
+static void mlx4_remove_one(struct pci_dev *pdev)
 {
        struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
        struct mlx4_priv *priv = mlx4_priv(dev);
@@ -902,7 +928,7 @@ static void __devexit mlx4_remove_one(struct pci_dev *pdev)
 int mlx4_restart_one(struct pci_dev *pdev)
 {
        mlx4_remove_one(pdev);
-       return mlx4_init_one(pdev, NULL);
+       return __mlx4_init_one(pdev, NULL);
 }
 
 static struct pci_device_id mlx4_pci_table[] = {
index 672024a0ee71e4053237f3af7124a6ebb290d104..a99e7729d3333ac00ec982ab4f68c214bc751aea 100644 (file)
@@ -359,7 +359,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
 
-int __devinit mlx4_init_mcg_table(struct mlx4_dev *dev)
+int mlx4_init_mcg_table(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        int err;
index be304a7c2c9163bee2178c24463bdd57b955045c..53a1cdddfc13b1e4a40a37abfa630b60a942301f 100644 (file)
@@ -56,11 +56,7 @@ enum {
 };
 
 enum {
-       MLX4_BOARD_ID_LEN       = 64
-};
-
-enum {
-       MLX4_MGM_ENTRY_SIZE     =  0x40,
+       MLX4_MGM_ENTRY_SIZE     =  0x100,
        MLX4_QP_PER_MGM         = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2),
        MLX4_MTT_ENTRY_PER_SEG  = 8
 };
@@ -133,6 +129,7 @@ struct mlx4_icm_table {
        int                     num_obj;
        int                     obj_size;
        int                     lowmem;
+       int                     coherent;
        struct mutex            mutex;
        struct mlx4_icm       **icm;
 };
@@ -277,9 +274,6 @@ struct mlx4_priv {
 
        struct mlx4_uar         driver_uar;
        void __iomem           *kar;
-
-       u32                     rev_id;
-       char                    board_id[MLX4_BOARD_ID_LEN];
 };
 
 static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
index 5b87183e62ce47c292f2b95234d5d4896369503d..0c05a10bae3bfcd290b2797b006a2b33df09ab11 100644 (file)
@@ -68,6 +68,9 @@ struct mlx4_mpt_entry {
 
 #define MLX4_MTT_FLAG_PRESENT          1
 
+#define MLX4_MPT_STATUS_SW             0xF0
+#define MLX4_MPT_STATUS_HW             0x00
+
 static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
 {
        int o;
@@ -349,58 +352,57 @@ err_table:
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_enable);
 
-static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
-                         int num_mtt)
+static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
+                               int start_index, int npages, u64 *page_list)
 {
-       return mlx4_cmd(dev, mailbox->dma, num_mtt, 0, MLX4_CMD_WRITE_MTT,
-                       MLX4_CMD_TIME_CLASS_B);
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       __be64 *mtts;
+       dma_addr_t dma_handle;
+       int i;
+       int s = start_index * sizeof (u64);
+
+       /* All MTTs must fit in the same page */
+       if (start_index / (PAGE_SIZE / sizeof (u64)) !=
+           (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
+               return -EINVAL;
+
+       if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1))
+               return -EINVAL;
+
+       mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
+                               s / dev->caps.mtt_entry_sz, &dma_handle);
+       if (!mtts)
+               return -ENOMEM;
+
+       for (i = 0; i < npages; ++i)
+               mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
+
+       dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE);
+
+       return 0;
 }
 
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
                   int start_index, int npages, u64 *page_list)
 {
-       struct mlx4_cmd_mailbox *mailbox;
-       __be64 *mtt_entry;
-       int i;
-       int err = 0;
+       int chunk;
+       int err;
 
        if (mtt->order < 0)
                return -EINVAL;
 
-       mailbox = mlx4_alloc_cmd_mailbox(dev);
-       if (IS_ERR(mailbox))
-               return PTR_ERR(mailbox);
-
-       mtt_entry = mailbox->buf;
-
        while (npages > 0) {
-               mtt_entry[0] = cpu_to_be64(mlx4_mtt_addr(dev, mtt) + start_index * 8);
-               mtt_entry[1] = 0;
-
-               for (i = 0; i < npages && i < MLX4_MAILBOX_SIZE / 8 - 2; ++i)
-                       mtt_entry[i + 2] = cpu_to_be64(page_list[i] |
-                                                      MLX4_MTT_FLAG_PRESENT);
-
-               /*
-                * If we have an odd number of entries to write, add
-                * one more dummy entry for firmware efficiency.
-                */
-               if (i & 1)
-                       mtt_entry[i + 2] = 0;
-
-               err = mlx4_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
+               chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
+               err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
                if (err)
-                       goto out;
+                       return err;
 
-               npages      -= i;
-               start_index += i;
-               page_list   += i;
+               npages      -= chunk;
+               start_index += chunk;
+               page_list   += chunk;
        }
 
-out:
-       mlx4_free_cmd_mailbox(dev, mailbox);
-
-       return err;
+       return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
 
@@ -428,7 +430,7 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
 
-int __devinit mlx4_init_mr_table(struct mlx4_dev *dev)
+int mlx4_init_mr_table(struct mlx4_dev *dev)
 {
        struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
        int err;
@@ -444,7 +446,7 @@ int __devinit mlx4_init_mr_table(struct mlx4_dev *dev)
                goto err_buddy;
 
        if (dev->caps.reserved_mtts) {
-               if (mlx4_alloc_mtt_range(dev, ilog2(dev->caps.reserved_mtts)) == -1) {
+               if (mlx4_alloc_mtt_range(dev, fls(dev->caps.reserved_mtts - 1)) == -1) {
                        mlx4_warn(dev, "MTT table of order %d is too small.\n",
                                  mr_table->mtt_buddy.max_order);
                        err = -ENOMEM;
@@ -470,3 +472,165 @@ void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
        mlx4_buddy_cleanup(&mr_table->mtt_buddy);
        mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
 }
+
+static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list,
+                                 int npages, u64 iova)
+{
+       int i, page_mask;
+
+       if (npages > fmr->max_pages)
+               return -EINVAL;
+
+       page_mask = (1 << fmr->page_shift) - 1;
+
+       /* We are getting page lists, so va must be page aligned. */
+       if (iova & page_mask)
+               return -EINVAL;
+
+       /* Trust the user not to pass misaligned data in page_list */
+       if (0)
+               for (i = 0; i < npages; ++i) {
+                       if (page_list[i] & ~page_mask)
+                               return -EINVAL;
+               }
+
+       if (fmr->maps >= fmr->max_maps)
+               return -EINVAL;
+
+       return 0;
+}
+
+int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
+                     int npages, u64 iova, u32 *lkey, u32 *rkey)
+{
+       u32 key;
+       int i, err;
+
+       err = mlx4_check_fmr(fmr, page_list, npages, iova);
+       if (err)
+               return err;
+
+       ++fmr->maps;
+
+       key = key_to_hw_index(fmr->mr.key);
+       key += dev->caps.num_mpts;
+       *lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
+
+       *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
+
+       /* Make sure MPT status is visible before writing MTT entries */
+       wmb();
+
+       for (i = 0; i < npages; ++i)
+               fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
+
+       dma_sync_single(&dev->pdev->dev, fmr->dma_handle,
+                       npages * sizeof(u64), DMA_TO_DEVICE);
+
+       fmr->mpt->key    = cpu_to_be32(key);
+       fmr->mpt->lkey   = cpu_to_be32(key);
+       fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
+       fmr->mpt->start  = cpu_to_be64(iova);
+
+       /* Make MTT entries are visible before setting MPT status */
+       wmb();
+
+       *(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW;
+
+       /* Make sure MPT status is visible before consumer can use FMR */
+       wmb();
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);
+
+int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
+                  int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       u64 mtt_seg;
+       int err = -ENOMEM;
+
+       if (page_shift < 12 || page_shift >= 32)
+               return -EINVAL;
+
+       /* All MTTs must fit in the same page */
+       if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
+               return -EINVAL;
+
+       fmr->page_shift = page_shift;
+       fmr->max_pages  = max_pages;
+       fmr->max_maps   = max_maps;
+       fmr->maps = 0;
+
+       err = mlx4_mr_alloc(dev, pd, 0, 0, access, max_pages,
+                           page_shift, &fmr->mr);
+       if (err)
+               return err;
+
+       mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
+
+       fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
+                                   fmr->mr.mtt.first_seg,
+                                   &fmr->dma_handle);
+       if (!fmr->mtts) {
+               err = -ENOMEM;
+               goto err_free;
+       }
+
+       fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table,
+                                   key_to_hw_index(fmr->mr.key), NULL);
+       if (!fmr->mpt) {
+               err = -ENOMEM;
+               goto err_free;
+       }
+
+       return 0;
+
+err_free:
+       mlx4_mr_free(dev, &fmr->mr);
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
+
+int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
+{
+       return mlx4_mr_enable(dev, &fmr->mr);
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_enable);
+
+void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
+                   u32 *lkey, u32 *rkey)
+{
+       u32 key;
+
+       if (!fmr->maps)
+               return;
+
+       key = key_to_hw_index(fmr->mr.key);
+       key &= dev->caps.num_mpts - 1;
+       *lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
+
+       fmr->maps = 0;
+
+       *(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
+
+int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
+{
+       if (fmr->maps)
+               return -EBUSY;
+
+       fmr->mr.enabled = 0;
+       mlx4_mr_free(dev, &fmr->mr);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_fmr_free);
+
+int mlx4_SYNC_TPT(struct mlx4_dev *dev)
+{
+       return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
+}
+EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
index 23dea1ee775046b92790b4071d1030d4a707c4f4..3a93c5f0f7ab8cc0ef33b8559d598b6e0fa82f5c 100644 (file)
@@ -57,7 +57,7 @@ void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
 }
 EXPORT_SYMBOL_GPL(mlx4_pd_free);
 
-int __devinit mlx4_init_pd_table(struct mlx4_dev *dev)
+int mlx4_init_pd_table(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
 
index 19b48c71cf7f2e315eb31ba8a47af6b824c2d181..cc4b1be182196c8b11836def9387215272d4bba8 100644 (file)
@@ -240,7 +240,8 @@ void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
        mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
        mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
 
-       mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
+       if (qp->qpn < dev->caps.sqp_start + 8)
+               mlx4_bitmap_free(&qp_table->bitmap, qp->qpn);
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_free);
 
@@ -250,7 +251,7 @@ static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
                        MLX4_CMD_TIME_CLASS_B);
 }
 
-int __devinit mlx4_init_qp_table(struct mlx4_dev *dev)
+int mlx4_init_qp_table(struct mlx4_dev *dev)
 {
        struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
        int err;
index b061c86d6839aa3bd72aa0a7828f98c477b4bec9..d23f46d692ef5daf02ad7076842b4859c558bd82 100644 (file)
@@ -227,7 +227,7 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm
        err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn);
        if (err)
                goto err_out;
-       *limit_watermark = srq_context->limit_watermark;
+       *limit_watermark = be16_to_cpu(srq_context->limit_watermark);
 
 err_out:
        mlx4_free_cmd_mailbox(dev, mailbox);
@@ -235,7 +235,7 @@ err_out:
 }
 EXPORT_SYMBOL_GPL(mlx4_srq_query);
 
-int __devinit mlx4_init_srq_table(struct mlx4_dev *dev)
+int mlx4_init_srq_table(struct mlx4_dev *dev)
 {
        struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
        int err;
index cfb78fb2c0460812b1a56cbb78e5a801a8168647..222815d91c40108f87d8c47b3a0f5541d7b0c5ae 100644 (file)
@@ -48,6 +48,10 @@ enum {
        MLX4_MAX_PORTS          = 2
 };
 
+enum {
+       MLX4_BOARD_ID_LEN = 64
+};
+
 enum {
        MLX4_DEV_CAP_FLAG_RC            = 1 <<  0,
        MLX4_DEV_CAP_FLAG_UC            = 1 <<  1,
@@ -210,6 +214,17 @@ struct mlx4_mr {
        int                     enabled;
 };
 
+struct mlx4_fmr {
+       struct mlx4_mr          mr;
+       struct mlx4_mpt_entry  *mpt;
+       __be64                 *mtts;
+       dma_addr_t              dma_handle;
+       int                     max_pages;
+       int                     max_maps;
+       int                     maps;
+       u8                      page_shift;
+};
+
 struct mlx4_uar {
        unsigned long           pfn;
        int                     index;
@@ -272,6 +287,8 @@ struct mlx4_dev {
        unsigned long           flags;
        struct mlx4_caps        caps;
        struct radix_tree_root  qp_table_tree;
+       u32                     rev_id;
+       char                    board_id[MLX4_BOARD_ID_LEN];
 };
 
 struct mlx4_init_port_param {
@@ -331,4 +348,14 @@ int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
 int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
 int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]);
 
+int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
+                     int npages, u64 iova, u32 *lkey, u32 *rkey);
+int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
+                  int max_maps, u8 page_shift, struct mlx4_fmr *fmr);
+int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
+void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
+                   u32 *lkey, u32 *rkey);
+int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
+int mlx4_SYNC_TPT(struct mlx4_dev *dev);
+
 #endif /* MLX4_DEVICE_H */
index 12243e80c70615676b224310ed90d1785128662b..a627c8682d2fbd49a3847245ad29ac3398e2a9b6 100644 (file)
@@ -477,12 +477,15 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
                   const void *private_data,
                   u8 private_data_len);
 
+#define IB_CM_MRA_FLAG_DELAY 0x80  /* Send MRA only after a duplicate msg */
+
 /**
  * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
  *   message.
  * @cm_id: Connection identifier associated with the connection message.
- * @service_timeout: The maximum time required for the sender to reply to
- *   to the connection message.
+ * @service_timeout: The lower 5-bits specify the maximum time required for
+ *   the sender to reply to to the connection message.  The upper 3-bits
+ *   specify additional control flags.
  * @private_data: Optional user-defined private data sent with the
  *   message receipt acknowledgement.
  * @private_data_len: Size of the private data buffer, in bytes.
index 5e26b2f53f86e198f1c8893c72d460d624a4c419..942692b0b92e2063664ce0af9e0550f16ab38507 100644 (file)
@@ -109,8 +109,8 @@ enum ib_sa_selector {
  * Reserved rows are indicated with comments to help maintainability.
  */
 
-/* reserved:                                                            0 */
-/* reserved:                                                            1 */
+#define IB_SA_PATH_REC_SERVICE_ID                     (IB_SA_COMP_MASK( 0) |\
+                                                       IB_SA_COMP_MASK( 1))
 #define IB_SA_PATH_REC_DGID                            IB_SA_COMP_MASK( 2)
 #define IB_SA_PATH_REC_SGID                            IB_SA_COMP_MASK( 3)
 #define IB_SA_PATH_REC_DLID                            IB_SA_COMP_MASK( 4)
@@ -123,7 +123,7 @@ enum ib_sa_selector {
 #define IB_SA_PATH_REC_REVERSIBLE                      IB_SA_COMP_MASK(11)
 #define IB_SA_PATH_REC_NUMB_PATH                       IB_SA_COMP_MASK(12)
 #define IB_SA_PATH_REC_PKEY                            IB_SA_COMP_MASK(13)
-/* reserved:                                                           14 */
+#define IB_SA_PATH_REC_QOS_CLASS                       IB_SA_COMP_MASK(14)
 #define IB_SA_PATH_REC_SL                              IB_SA_COMP_MASK(15)
 #define IB_SA_PATH_REC_MTU_SELECTOR                    IB_SA_COMP_MASK(16)
 #define IB_SA_PATH_REC_MTU                             IB_SA_COMP_MASK(17)
@@ -134,8 +134,7 @@ enum ib_sa_selector {
 #define IB_SA_PATH_REC_PREFERENCE                      IB_SA_COMP_MASK(22)
 
 struct ib_sa_path_rec {
-       /* reserved */
-       /* reserved */
+       __be64       service_id;
        union ib_gid dgid;
        union ib_gid sgid;
        __be16       dlid;
@@ -148,7 +147,7 @@ struct ib_sa_path_rec {
        int          reversible;
        u8           numb_path;
        __be16       pkey;
-       /* reserved */
+       __be16       qos_class;
        u8           sl;
        u8           mtu_selector;
        u8           mtu;
index c533d6c7903f0990ffa489581d0011bc5f094e1f..22298423cf0b3d0e597e347de0ab376b8bbcaf24 100644 (file)
@@ -45,6 +45,7 @@ struct ib_umem {
        int                     offset;
        int                     page_size;
        int                     writable;
+       int                     hugetlb;
        struct list_head        chunk_list;
        struct work_struct      work;
        struct mm_struct       *mm;
index d66b15ea82c4ffaaa92540ee6a0a2b5747e66598..29d2c7205a90f17d76928dd1c01ae9940866005c 100644 (file)
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
  */
 
+/**
+ * ib_user_mad_hdr_old - Old version of MAD packet header without pkey_index
+ * @id - ID of agent MAD received with/to be sent with
+ * @status - 0 on successful receive, ETIMEDOUT if no response
+ *   received (transaction ID in data[] will be set to TID of original
+ *   request) (ignored on send)
+ * @timeout_ms - Milliseconds to wait for response (unset on receive)
+ * @retries - Number of automatic retries to attempt
+ * @qpn - Remote QP number received from/to be sent to
+ * @qkey - Remote Q_Key to be sent with (unset on receive)
+ * @lid - Remote lid received from/to be sent to
+ * @sl - Service level received with/to be sent with
+ * @path_bits - Local path bits received with/to be sent with
+ * @grh_present - If set, GRH was received/should be sent
+ * @gid_index - Local GID index to send with (unset on receive)
+ * @hop_limit - Hop limit in GRH
+ * @traffic_class - Traffic class in GRH
+ * @gid - Remote GID in GRH
+ * @flow_label - Flow label in GRH
+ */
+struct ib_user_mad_hdr_old {
+       __u32   id;
+       __u32   status;
+       __u32   timeout_ms;
+       __u32   retries;
+       __u32   length;
+       __be32  qpn;
+       __be32  qkey;
+       __be16  lid;
+       __u8    sl;
+       __u8    path_bits;
+       __u8    grh_present;
+       __u8    gid_index;
+       __u8    hop_limit;
+       __u8    traffic_class;
+       __u8    gid[16];
+       __be32  flow_label;
+};
+
 /**
  * ib_user_mad_hdr - MAD packet header
+ *   This layout allows specifying/receiving the P_Key index.  To use
+ *   this capability, an application must call the
+ *   IB_USER_MAD_ENABLE_PKEY ioctl on the user MAD file handle before
+ *   any other actions with the file handle.
  * @id - ID of agent MAD received with/to be sent with
  * @status - 0 on successful receive, ETIMEDOUT if no response
  *   received (transaction ID in data[] will be set to TID of original
  * @traffic_class - Traffic class in GRH
  * @gid - Remote GID in GRH
  * @flow_label - Flow label in GRH
+ * @pkey_index - P_Key index
  */
 struct ib_user_mad_hdr {
        __u32   id;
@@ -88,6 +132,8 @@ struct ib_user_mad_hdr {
        __u8    traffic_class;
        __u8    gid[16];
        __be32  flow_label;
+       __u16   pkey_index;
+       __u8    reserved[6];
 };
 
 /**
@@ -101,6 +147,26 @@ struct ib_user_mad {
        __u64   data[0];
 };
 
+/*
+ * Earlier versions of this interface definition declared the
+ * method_mask[] member as an array of __u32 but treated it as a
+ * bitmap made up of longs in the kernel.  This ambiguity meant that
+ * 32-bit big-endian applications that can run on both 32-bit and
+ * 64-bit kernels had no consistent ABI to rely on, and 64-bit
+ * big-endian applications that treated method_mask as being made up
+ * of 32-bit words would have their bitmap misinterpreted.
+ *
+ * To clear up this confusion, we change the declaration of
+ * method_mask[] to use unsigned long and handle the conversion from
+ * 32-bit userspace to 64-bit kernel for big-endian systems in the
+ * compat_ioctl method.  Unfortunately, to keep the structure layout
+ * the same, we need the method_mask[] array to be aligned only to 4
+ * bytes even when long is 64 bits, which forces us into this ugly
+ * typedef.
+ */
+typedef unsigned long __attribute__((aligned(4))) packed_ulong;
+#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof (long)))
+
 /**
  * ib_user_mad_reg_req - MAD registration request
  * @id - Set by the kernel; used to identify agent in future requests.
@@ -119,7 +185,7 @@ struct ib_user_mad {
  */
 struct ib_user_mad_reg_req {
        __u32   id;
-       __u32   method_mask[4];
+       packed_ulong method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK];
        __u8    qpn;
        __u8    mgmt_class;
        __u8    mgmt_class_version;
@@ -134,4 +200,6 @@ struct ib_user_mad_reg_req {
 
 #define IB_USER_MAD_UNREGISTER_AGENT   _IOW(IB_IOCTL_MAGIC, 2, __u32)
 
+#define IB_USER_MAD_ENABLE_PKEY                _IO(IB_IOCTL_MAGIC, 3)
+
 #endif /* IB_USER_MAD_H */
index 2d6a7705eae7fbafbbaab4a8a9d8d6c9fde8ceac..010f876f41d8b559804fee1d0e3e42f7dec1a1d6 100644 (file)
@@ -314,4 +314,18 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
  */
 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
 
+/**
+ * rdma_set_service_type - Set the type of service associated with a
+ *   connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @tos: Type of service.
+ *
+ * The type of service is interpretted as a differentiated service
+ * field (RFC 2474).  The service type should be specified before
+ * performing route resolution, as existing communication on the
+ * connection identifier may be unaffected.  The type of service
+ * requested may not be supported by the network to all destinations.
+ */
+void rdma_set_service_type(struct rdma_cm_id *id, int tos);
+
 #endif /* RDMA_CM_H */
index f632b0c007c9808eb0f50e60bdab3b30c0775e38..9749c1b34d008e34e353c956bc9ac1f401d92acb 100644 (file)
@@ -212,4 +212,22 @@ struct rdma_ucm_event_resp {
        } param;
 };
 
+/* Option levels */
+enum {
+       RDMA_OPTION_ID          = 0
+};
+
+/* Option details */
+enum {
+       RDMA_OPTION_ID_TOS      = 0
+};
+
+struct rdma_ucm_set_option {
+       __u64 optval;
+       __u32 id;
+       __u32 level;
+       __u32 optname;
+       __u32 optlen;
+};
+
 #endif /* RDMA_USER_CM_H */