2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include "iwch_provider.h"
39 static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
47 if (wr->send_flags & IB_SEND_SOLICITED)
48 wqe->send.rdmaop = T3_SEND_WITH_SE;
50 wqe->send.rdmaop = T3_SEND;
51 wqe->send.rem_stag = 0;
53 case IB_WR_SEND_WITH_INV:
54 if (wr->send_flags & IB_SEND_SOLICITED)
55 wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
57 wqe->send.rdmaop = T3_SEND_WITH_INV;
58 wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
63 if (wr->num_sge > T3_MAX_SGE)
65 wqe->send.reserved[0] = 0;
66 wqe->send.reserved[1] = 0;
67 wqe->send.reserved[2] = 0;
69 for (i = 0; i < wr->num_sge; i++) {
70 if ((plen + wr->sg_list[i].length) < plen)
73 plen += wr->sg_list[i].length;
74 wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
75 wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
76 wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
78 wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
79 *flit_cnt = 4 + ((wr->num_sge) << 1);
80 wqe->send.plen = cpu_to_be32(plen);
84 static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
89 if (wr->num_sge > T3_MAX_SGE)
91 wqe->write.rdmaop = T3_RDMA_WRITE;
92 wqe->write.reserved[0] = 0;
93 wqe->write.reserved[1] = 0;
94 wqe->write.reserved[2] = 0;
95 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
96 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
98 if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
100 wqe->write.sgl[0].stag = wr->ex.imm_data;
101 wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
102 wqe->write.num_sgle = __constant_cpu_to_be32(0);
106 for (i = 0; i < wr->num_sge; i++) {
107 if ((plen + wr->sg_list[i].length) < plen) {
110 plen += wr->sg_list[i].length;
111 wqe->write.sgl[i].stag =
112 cpu_to_be32(wr->sg_list[i].lkey);
113 wqe->write.sgl[i].len =
114 cpu_to_be32(wr->sg_list[i].length);
115 wqe->write.sgl[i].to =
116 cpu_to_be64(wr->sg_list[i].addr);
118 wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
119 *flit_cnt = 5 + ((wr->num_sge) << 1);
121 wqe->write.plen = cpu_to_be32(plen);
125 static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
130 wqe->read.rdmaop = T3_READ_REQ;
131 if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
132 wqe->read.local_inv = 1;
134 wqe->read.local_inv = 0;
135 wqe->read.reserved[0] = 0;
136 wqe->read.reserved[1] = 0;
137 wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
138 wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
139 wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
140 wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
141 wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
142 *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
146 static int iwch_build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
147 u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
152 if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH)
155 wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
156 wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length);
157 wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
158 wqe->fastreg.va_base_lo_fbo =
159 cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff);
160 wqe->fastreg.page_type_perms = cpu_to_be32(
161 V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) |
162 V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) |
163 V_FR_TYPE(TPT_VATO) |
164 V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags)));
165 p = &wqe->fastreg.pbl_addrs[0];
166 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) {
168 /* If we need a 2nd WR, then set it up */
169 if (i == T3_MAX_FASTREG_FRAG) {
171 wqe = (union t3_wr *)(wq->queue +
172 Q_PTR2IDX((wq->wptr+1), wq->size_log2));
173 build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
174 Q_GENBIT(wq->wptr + 1, wq->size_log2),
175 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG,
178 p = &wqe->pbl_frag.pbl_addrs[0];
180 *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
182 *flit_cnt = 5 + wr->wr.fast_reg.page_list_len;
188 static int iwch_build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
191 wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
192 wqe->local_inv.reserved = 0;
193 *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
198 * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
200 static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
201 u32 num_sgle, u32 * pbl_addr, u8 * page_size)
206 for (i = 0; i < num_sgle; i++) {
208 mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
210 PDBG("%s %d\n", __func__, __LINE__);
213 if (!mhp->attr.state) {
214 PDBG("%s %d\n", __func__, __LINE__);
217 if (mhp->attr.zbva) {
218 PDBG("%s %d\n", __func__, __LINE__);
222 if (sg_list[i].addr < mhp->attr.va_fbo) {
223 PDBG("%s %d\n", __func__, __LINE__);
226 if (sg_list[i].addr + ((u64) sg_list[i].length) <
228 PDBG("%s %d\n", __func__, __LINE__);
231 if (sg_list[i].addr + ((u64) sg_list[i].length) >
232 mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
233 PDBG("%s %d\n", __func__, __LINE__);
236 offset = sg_list[i].addr - mhp->attr.va_fbo;
237 offset += ((u32) mhp->attr.va_fbo) %
238 (1UL << (12 + mhp->attr.page_size));
239 pbl_addr[i] = ((mhp->attr.pbl_addr -
240 rhp->rdev.rnic_info.pbl_base) >> 3) +
241 (offset >> (12 + mhp->attr.page_size));
242 page_size[i] = mhp->attr.page_size;
247 static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe,
248 struct ib_recv_wr *wr)
251 if (wr->num_sge > T3_MAX_SGE)
253 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
254 for (i = 0; i < wr->num_sge; i++) {
255 wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
256 wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
257 wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
259 for (; i < T3_MAX_SGE; i++) {
260 wqe->recv.sgl[i].stag = 0;
261 wqe->recv.sgl[i].len = 0;
262 wqe->recv.sgl[i].to = 0;
267 int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
268 struct ib_send_wr **bad_wr)
271 u8 uninitialized_var(t3_wr_flit_cnt);
272 enum t3_wr_opcode t3_wr_opcode = 0;
273 enum t3_wr_flags t3_wr_flags;
282 qhp = to_iwch_qp(ibqp);
283 spin_lock_irqsave(&qhp->lock, flag);
284 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
285 spin_unlock_irqrestore(&qhp->lock, flag);
288 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
289 qhp->wq.sq_size_log2);
291 spin_unlock_irqrestore(&qhp->lock, flag);
300 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
301 wqe = (union t3_wr *) (qhp->wq.queue + idx);
303 if (wr->send_flags & IB_SEND_SOLICITED)
304 t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
305 if (wr->send_flags & IB_SEND_SIGNALED)
306 t3_wr_flags |= T3_COMPLETION_FLAG;
308 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
309 switch (wr->opcode) {
311 case IB_WR_SEND_WITH_INV:
312 if (wr->send_flags & IB_SEND_FENCE)
313 t3_wr_flags |= T3_READ_FENCE_FLAG;
314 t3_wr_opcode = T3_WR_SEND;
315 err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
317 case IB_WR_RDMA_WRITE:
318 case IB_WR_RDMA_WRITE_WITH_IMM:
319 t3_wr_opcode = T3_WR_WRITE;
320 err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
322 case IB_WR_RDMA_READ:
323 case IB_WR_RDMA_READ_WITH_INV:
324 t3_wr_opcode = T3_WR_READ;
325 t3_wr_flags = 0; /* T3 reads are always signaled */
326 err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
329 sqp->read_len = wqe->read.local_len;
330 if (!qhp->wq.oldest_read)
331 qhp->wq.oldest_read = sqp;
333 case IB_WR_FAST_REG_MR:
334 t3_wr_opcode = T3_WR_FASTREG;
335 err = iwch_build_fastreg(wqe, wr, &t3_wr_flit_cnt,
338 case IB_WR_LOCAL_INV:
339 if (wr->send_flags & IB_SEND_FENCE)
340 t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
341 t3_wr_opcode = T3_WR_INV_STAG;
342 err = iwch_build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
345 PDBG("%s post of type=%d TBD!\n", __func__,
353 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
354 sqp->wr_id = wr->wr_id;
355 sqp->opcode = wr2opcode(t3_wr_opcode);
356 sqp->sq_wptr = qhp->wq.sq_wptr;
358 sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
360 build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
361 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
363 (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
364 PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
365 __func__, (unsigned long long) wr->wr_id, idx,
366 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
370 qhp->wq.wptr += wr_cnt;
373 spin_unlock_irqrestore(&qhp->lock, flag);
374 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
378 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
379 struct ib_recv_wr **bad_wr)
388 qhp = to_iwch_qp(ibqp);
389 spin_lock_irqsave(&qhp->lock, flag);
390 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
391 spin_unlock_irqrestore(&qhp->lock, flag);
394 num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
395 qhp->wq.rq_size_log2) - 1;
397 spin_unlock_irqrestore(&qhp->lock, flag);
401 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
402 wqe = (union t3_wr *) (qhp->wq.queue + idx);
404 err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
411 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
413 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
414 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
415 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
416 PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
417 "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
418 idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
424 spin_unlock_irqrestore(&qhp->lock, flag);
425 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
429 int iwch_bind_mw(struct ib_qp *qp,
431 struct ib_mw_bind *mw_bind)
433 struct iwch_dev *rhp;
443 enum t3_wr_flags t3_wr_flags;
447 qhp = to_iwch_qp(qp);
448 mhp = to_iwch_mw(mw);
451 spin_lock_irqsave(&qhp->lock, flag);
452 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
453 spin_unlock_irqrestore(&qhp->lock, flag);
456 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
457 qhp->wq.sq_size_log2);
458 if ((num_wrs) <= 0) {
459 spin_unlock_irqrestore(&qhp->lock, flag);
462 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
463 PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
465 wqe = (union t3_wr *) (qhp->wq.queue + idx);
468 if (mw_bind->send_flags & IB_SEND_SIGNALED)
469 t3_wr_flags = T3_COMPLETION_FLAG;
471 sgl.addr = mw_bind->addr;
472 sgl.lkey = mw_bind->mr->lkey;
473 sgl.length = mw_bind->length;
474 wqe->bind.reserved = 0;
475 wqe->bind.type = TPT_VATO;
477 /* TBD: check perms */
478 wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags);
479 wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
480 wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
481 wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
482 wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
483 err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
485 spin_unlock_irqrestore(&qhp->lock, flag);
488 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
489 sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
490 sqp->wr_id = mw_bind->wr_id;
491 sqp->opcode = T3_BIND_MW;
492 sqp->sq_wptr = qhp->wq.sq_wptr;
494 sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
495 wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
496 wqe->bind.mr_pagesz = page_size;
497 build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
498 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
499 sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
502 spin_unlock_irqrestore(&qhp->lock, flag);
504 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
509 static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
510 u8 *layer_type, u8 *ecode)
512 int status = TPT_ERR_INTERNAL_ERR;
519 status = CQE_STATUS(rsp_msg->cqe);
520 opcode = CQE_OPCODE(rsp_msg->cqe);
521 rqtype = RQ_TYPE(rsp_msg->cqe);
522 send_inv = (opcode == T3_SEND_WITH_INV) ||
523 (opcode == T3_SEND_WITH_SE_INV);
524 tagged = (opcode == T3_RDMA_WRITE) ||
525 (rqtype && (opcode == T3_READ_RESP));
531 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
532 *ecode = RDMAP_CANT_INV_STAG;
534 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
535 *ecode = RDMAP_INV_STAG;
539 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
540 if ((opcode == T3_SEND_WITH_INV) ||
541 (opcode == T3_SEND_WITH_SE_INV))
542 *ecode = RDMAP_CANT_INV_STAG;
544 *ecode = RDMAP_STAG_NOT_ASSOC;
547 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
548 *ecode = RDMAP_STAG_NOT_ASSOC;
551 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
552 *ecode = RDMAP_ACC_VIOL;
555 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
556 *ecode = RDMAP_TO_WRAP;
560 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
561 *ecode = DDPT_BASE_BOUNDS;
563 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
564 *ecode = RDMAP_BASE_BOUNDS;
567 case TPT_ERR_INVALIDATE_SHARED_MR:
568 case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
569 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
570 *ecode = RDMAP_CANT_INV_STAG;
573 case TPT_ERR_ECC_PSTAG:
574 case TPT_ERR_INTERNAL_ERR:
575 *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
578 case TPT_ERR_OUT_OF_RQE:
579 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
580 *ecode = DDPU_INV_MSN_NOBUF;
582 case TPT_ERR_PBL_ADDR_BOUND:
583 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
584 *ecode = DDPT_BASE_BOUNDS;
587 *layer_type = LAYER_MPA|DDP_LLP;
588 *ecode = MPA_CRC_ERR;
591 *layer_type = LAYER_MPA|DDP_LLP;
592 *ecode = MPA_MARKER_ERR;
594 case TPT_ERR_PDU_LEN_ERR:
595 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
596 *ecode = DDPU_MSG_TOOBIG;
598 case TPT_ERR_DDP_VERSION:
600 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
601 *ecode = DDPT_INV_VERS;
603 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
604 *ecode = DDPU_INV_VERS;
607 case TPT_ERR_RDMA_VERSION:
608 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
609 *ecode = RDMAP_INV_VERS;
612 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
613 *ecode = RDMAP_INV_OPCODE;
615 case TPT_ERR_DDP_QUEUE_NUM:
616 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
617 *ecode = DDPU_INV_QN;
620 case TPT_ERR_MSN_GAP:
621 case TPT_ERR_MSN_RANGE:
622 case TPT_ERR_IRD_OVERFLOW:
623 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
624 *ecode = DDPU_INV_MSN_RANGE;
627 *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
631 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
632 *ecode = DDPU_INV_MO;
635 *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
641 int iwch_post_zb_read(struct iwch_qp *qhp)
645 u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
647 PDBG("%s enter\n", __func__);
648 skb = alloc_skb(40, GFP_KERNEL);
650 printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
653 wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
654 memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
655 wqe->read.rdmaop = T3_READ_REQ;
656 wqe->read.reserved[0] = 0;
657 wqe->read.reserved[1] = 0;
658 wqe->read.reserved[2] = 0;
659 wqe->read.rem_stag = cpu_to_be32(1);
660 wqe->read.rem_to = cpu_to_be64(1);
661 wqe->read.local_stag = cpu_to_be32(1);
662 wqe->read.local_len = cpu_to_be32(0);
663 wqe->read.local_to = cpu_to_be64(1);
664 wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
665 wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
666 V_FW_RIWR_LEN(flit_cnt));
667 skb->priority = CPL_PRIORITY_DATA;
668 return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
672 * This posts a TERMINATE with layer=RDMA, type=catastrophic.
674 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
677 struct terminate_message *term;
680 PDBG("%s %d\n", __func__, __LINE__);
681 skb = alloc_skb(40, GFP_ATOMIC);
683 printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__);
686 wqe = (union t3_wr *)skb_put(skb, 40);
688 wqe->send.rdmaop = T3_TERMINATE;
690 /* immediate data length */
691 wqe->send.plen = htonl(4);
693 /* immediate data starts here. */
694 term = (struct terminate_message *)wqe->send.sgl;
695 build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
696 wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
697 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
698 wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
699 skb->priority = CPL_PRIORITY_DATA;
700 return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
704 * Assumes qhp lock is held.
706 static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
708 struct iwch_cq *rchp, *schp;
712 rchp = get_chp(qhp->rhp, qhp->attr.rcq);
713 schp = get_chp(qhp->rhp, qhp->attr.scq);
715 PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
716 /* take a ref on the qhp since we must release the lock */
717 atomic_inc(&qhp->refcnt);
718 spin_unlock_irqrestore(&qhp->lock, *flag);
720 /* locking heirarchy: cq lock first, then qp lock. */
721 spin_lock_irqsave(&rchp->lock, *flag);
722 spin_lock(&qhp->lock);
723 cxio_flush_hw_cq(&rchp->cq);
724 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
725 flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
726 spin_unlock(&qhp->lock);
727 spin_unlock_irqrestore(&rchp->lock, *flag);
729 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
731 /* locking heirarchy: cq lock first, then qp lock. */
732 spin_lock_irqsave(&schp->lock, *flag);
733 spin_lock(&qhp->lock);
734 cxio_flush_hw_cq(&schp->cq);
735 cxio_count_scqes(&schp->cq, &qhp->wq, &count);
736 flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
737 spin_unlock(&qhp->lock);
738 spin_unlock_irqrestore(&schp->lock, *flag);
740 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
743 if (atomic_dec_and_test(&qhp->refcnt))
746 spin_lock_irqsave(&qhp->lock, *flag);
749 static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
751 if (qhp->ibqp.uobject)
752 cxio_set_wq_in_error(&qhp->wq);
754 __flush_qp(qhp, flag);
759 * Return count of RECV WRs posted
761 u16 iwch_rqes_posted(struct iwch_qp *qhp)
763 union t3_wr *wqe = qhp->wq.queue;
765 while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
769 PDBG("%s qhp %p count %u\n", __func__, qhp, count);
773 static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
774 enum iwch_qp_attr_mask mask,
775 struct iwch_qp_attributes *attrs)
777 struct t3_rdma_init_attr init_attr;
780 init_attr.tid = qhp->ep->hwtid;
781 init_attr.qpid = qhp->wq.qpid;
782 init_attr.pdid = qhp->attr.pd;
783 init_attr.scqid = qhp->attr.scq;
784 init_attr.rcqid = qhp->attr.rcq;
785 init_attr.rq_addr = qhp->wq.rq_addr;
786 init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
787 init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
788 qhp->attr.mpa_attr.recv_marker_enabled |
789 (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
790 (qhp->attr.mpa_attr.crc_enabled << 2);
793 * XXX - The IWCM doesn't quite handle getting these
794 * attrs set before going into RTS. For now, just turn
798 init_attr.qpcaps = qhp->attr.enableRdmaRead |
799 (qhp->attr.enableRdmaWrite << 1) |
800 (qhp->attr.enableBind << 2) |
801 (qhp->attr.enable_stag0_fastreg << 3) |
802 (qhp->attr.enable_stag0_fastreg << 4);
804 init_attr.qpcaps = 0x1f;
806 init_attr.tcp_emss = qhp->ep->emss;
807 init_attr.ord = qhp->attr.max_ord;
808 init_attr.ird = qhp->attr.max_ird;
809 init_attr.qp_dma_addr = qhp->wq.dma_addr;
810 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
811 init_attr.rqe_count = iwch_rqes_posted(qhp);
812 init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
813 init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
815 init_attr.rtr_type = RTR_READ;
816 if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
818 if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
821 init_attr.rtr_type = 0;
822 init_attr.irs = qhp->ep->rcv_seq;
823 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
824 "flags 0x%x qpcaps 0x%x\n", __func__,
825 init_attr.rq_addr, init_attr.rq_size,
826 init_attr.flags, init_attr.qpcaps);
827 ret = cxio_rdma_init(&rhp->rdev, &init_attr);
828 PDBG("%s ret %d\n", __func__, ret);
832 int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
833 enum iwch_qp_attr_mask mask,
834 struct iwch_qp_attributes *attrs,
838 struct iwch_qp_attributes newattr = qhp->attr;
844 struct iwch_ep *ep = NULL;
846 PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
847 qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
848 (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
850 spin_lock_irqsave(&qhp->lock, flag);
852 /* Process attr changes if in IDLE */
853 if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
854 if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
858 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
859 newattr.enable_rdma_read = attrs->enable_rdma_read;
860 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
861 newattr.enable_rdma_write = attrs->enable_rdma_write;
862 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
863 newattr.enable_bind = attrs->enable_bind;
864 if (mask & IWCH_QP_ATTR_MAX_ORD) {
866 rhp->attr.max_rdma_read_qp_depth) {
870 newattr.max_ord = attrs->max_ord;
872 if (mask & IWCH_QP_ATTR_MAX_IRD) {
874 rhp->attr.max_rdma_reads_per_qp) {
878 newattr.max_ird = attrs->max_ird;
883 if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
885 if (qhp->attr.state == attrs->next_state)
888 switch (qhp->attr.state) {
889 case IWCH_QP_STATE_IDLE:
890 switch (attrs->next_state) {
891 case IWCH_QP_STATE_RTS:
892 if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
896 if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
900 qhp->attr.mpa_attr = attrs->mpa_attr;
901 qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
902 qhp->ep = qhp->attr.llp_stream_handle;
903 qhp->attr.state = IWCH_QP_STATE_RTS;
906 * Ref the endpoint here and deref when we
907 * disassociate the endpoint from the QP. This
908 * happens in CLOSING->IDLE transition or *->ERROR
911 get_ep(&qhp->ep->com);
912 spin_unlock_irqrestore(&qhp->lock, flag);
913 ret = rdma_init(rhp, qhp, mask, attrs);
914 spin_lock_irqsave(&qhp->lock, flag);
918 case IWCH_QP_STATE_ERROR:
919 qhp->attr.state = IWCH_QP_STATE_ERROR;
920 flush_qp(qhp, &flag);
927 case IWCH_QP_STATE_RTS:
928 switch (attrs->next_state) {
929 case IWCH_QP_STATE_CLOSING:
930 BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
931 qhp->attr.state = IWCH_QP_STATE_CLOSING;
939 case IWCH_QP_STATE_TERMINATE:
940 qhp->attr.state = IWCH_QP_STATE_TERMINATE;
941 if (qhp->ibqp.uobject)
942 cxio_set_wq_in_error(&qhp->wq);
946 case IWCH_QP_STATE_ERROR:
947 qhp->attr.state = IWCH_QP_STATE_ERROR;
961 case IWCH_QP_STATE_CLOSING:
966 switch (attrs->next_state) {
967 case IWCH_QP_STATE_IDLE:
968 flush_qp(qhp, &flag);
969 qhp->attr.state = IWCH_QP_STATE_IDLE;
970 qhp->attr.llp_stream_handle = NULL;
971 put_ep(&qhp->ep->com);
975 case IWCH_QP_STATE_ERROR:
982 case IWCH_QP_STATE_ERROR:
983 if (attrs->next_state != IWCH_QP_STATE_IDLE) {
988 if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
989 !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
993 qhp->attr.state = IWCH_QP_STATE_IDLE;
994 memset(&qhp->attr, 0, sizeof(qhp->attr));
996 case IWCH_QP_STATE_TERMINATE:
1004 printk(KERN_ERR "%s in a bad state %d\n",
1005 __func__, qhp->attr.state);
1012 PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
1015 /* disassociate the LLP connection */
1016 qhp->attr.llp_stream_handle = NULL;
1019 qhp->attr.state = IWCH_QP_STATE_ERROR;
1021 wake_up(&qhp->wait);
1023 flush_qp(qhp, &flag);
1025 spin_unlock_irqrestore(&qhp->lock, flag);
1028 iwch_post_terminate(qhp, NULL);
1031 * If disconnect is 1, then we need to initiate a disconnect
1032 * on the EP. This can be a normal close (RTS->CLOSING) or
1033 * an abnormal close (RTS/CLOSING->ERROR).
1036 iwch_ep_disconnect(ep, abort, GFP_KERNEL);
1041 * If free is 1, then we've disassociated the EP from the QP
1042 * and we need to dereference the EP.
1047 PDBG("%s exit state %d\n", __func__, qhp->attr.state);
1051 static int quiesce_qp(struct iwch_qp *qhp)
1053 spin_lock_irq(&qhp->lock);
1054 iwch_quiesce_tid(qhp->ep);
1055 qhp->flags |= QP_QUIESCED;
1056 spin_unlock_irq(&qhp->lock);
1060 static int resume_qp(struct iwch_qp *qhp)
1062 spin_lock_irq(&qhp->lock);
1063 iwch_resume_tid(qhp->ep);
1064 qhp->flags &= ~QP_QUIESCED;
1065 spin_unlock_irq(&qhp->lock);
1069 int iwch_quiesce_qps(struct iwch_cq *chp)
1072 struct iwch_qp *qhp;
1074 for (i=0; i < T3_MAX_NUM_QP; i++) {
1075 qhp = get_qhp(chp->rhp, i);
1078 if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
1082 if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
1088 int iwch_resume_qps(struct iwch_cq *chp)
1091 struct iwch_qp *qhp;
1093 for (i=0; i < T3_MAX_NUM_QP; i++) {
1094 qhp = get_qhp(chp->rhp, i);
1097 if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
1101 if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))