X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fnet%2Fcxgb3%2Fsge.c;h=c15e43a8543b983f82b0517010979d5eb782bbf8;hb=8329d98e480250ef5f5a083f9c3af50510b5e65d;hp=a60ec4d4707c50c2919bf2f99cd5f5051f3d483a;hpb=25243633c29b72c4edd5fe9cfcbd76aa5eef8b36;p=linux-2.6

diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a60ec4d470..c15e43a854 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -46,23 +46,16 @@
 
 #define SGE_RX_SM_BUF_SIZE 1536
 
-/*
- * If USE_RX_PAGE is defined, the small freelist populated with (partial)
- * pages instead of skbs. Pages are carved up into RX_PAGE_SIZE chunks (must
- * be a multiple of the host page size).
- */
-#define USE_RX_PAGE
-#define RX_PAGE_SIZE 2048
-
-/*
- * skb freelist packets are copied into a new skb (and the freelist one is 
- * reused) if their len is <= 
- */
 #define SGE_RX_COPY_THRES  256
+#define SGE_RX_PULL_LEN    128
 
 /*
- * Minimum number of freelist entries before we start dropping TUNNEL frames.
+ * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
+ * It must be a divisor of PAGE_SIZE.  If set to 0 FL0 will use sk_buffs
+ * directly.
  */
+#define FL0_PG_CHUNK_SIZE  2048
+
 #define SGE_RX_DROP_THRES 16
 
 /*
@@ -86,7 +79,7 @@ enum {
 };
 
 struct tx_desc {
-	u64 flit[TX_DESC_FLITS];
+	__be64 flit[TX_DESC_FLITS];
 };
 
 struct rx_desc {
@@ -100,12 +93,12 @@ struct tx_sw_desc {		/* SW state per Tx descriptor */
 	struct sk_buff *skb;
 };
 
-struct rx_sw_desc {		/* SW state per Rx descriptor */
+struct rx_sw_desc {                /* SW state per Rx descriptor */
 	union {
 		struct sk_buff *skb;
-		struct sge_fl_page page;
-	} t;
-	 DECLARE_PCI_UNMAP_ADDR(dma_addr);
+		struct fl_pg_chunk pg_chunk;
+	};
+	DECLARE_PCI_UNMAP_ADDR(dma_addr);
 };
 
 struct rsp_desc {		/* response queue descriptor */
@@ -351,27 +344,26 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
 
 		pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
 				 q->buf_size, PCI_DMA_FROMDEVICE);
-
-		if (q->buf_size != RX_PAGE_SIZE) {
-			kfree_skb(d->t.skb);
-			d->t.skb = NULL;
+		if (q->use_pages) {
+			put_page(d->pg_chunk.page);
+			d->pg_chunk.page = NULL;
 		} else {
-			if (d->t.page.frag.page)
-				put_page(d->t.page.frag.page);
-			d->t.page.frag.page = NULL;
+			kfree_skb(d->skb);
+			d->skb = NULL;
 		}
 		if (++cidx == q->size)
 			cidx = 0;
 	}
 
-	if (q->page.frag.page)
-		put_page(q->page.frag.page);
-	q->page.frag.page = NULL;
+	if (q->pg_chunk.page) {
+		__free_page(q->pg_chunk.page);
+		q->pg_chunk.page = NULL;
+	}
 }
 
 /**
  *	add_one_rx_buf - add a packet buffer to a free-buffer list
- *	@va: va of the buffer to add
+ *	@va:  buffer start VA
  *	@len: the buffer length
  *	@d: the HW Rx descriptor to write
  *	@sd: the SW Rx descriptor to write
@@ -381,7 +373,7 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
  *	Add a buffer of the given length to the supplied HW and SW Rx
  *	descriptors.
  */
-static inline void add_one_rx_buf(unsigned char *va, unsigned int len,
+static inline void add_one_rx_buf(void *va, unsigned int len,
 				  struct rx_desc *d, struct rx_sw_desc *sd,
 				  unsigned int gen, struct pci_dev *pdev)
 {
@@ -397,6 +389,27 @@ static inline void add_one_rx_buf(unsigned char *va, unsigned int len,
 	d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
 }
 
+static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp)
+{
+	if (!q->pg_chunk.page) {
+		q->pg_chunk.page = alloc_page(gfp);
+		if (unlikely(!q->pg_chunk.page))
+			return -ENOMEM;
+		q->pg_chunk.va = page_address(q->pg_chunk.page);
+		q->pg_chunk.offset = 0;
+	}
+	sd->pg_chunk = q->pg_chunk;
+
+	q->pg_chunk.offset += q->buf_size;
+	if (q->pg_chunk.offset == PAGE_SIZE)
+		q->pg_chunk.page = NULL;
+	else {
+		q->pg_chunk.va += q->buf_size;
+		get_page(q->pg_chunk.page);
+	}
+	return 0;
+}
+
 /**
  *	refill_fl - refill an SGE free-buffer list
  *	@adapter: the adapter
@@ -410,49 +423,29 @@ static inline void add_one_rx_buf(unsigned char *va, unsigned int len,
  */
 static void refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
 {
+	void *buf_start;
 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 	struct rx_desc *d = &q->desc[q->pidx];
-	struct sge_fl_page *p = &q->page;
 
 	while (n--) {
-		unsigned char *va;
-
-		if (unlikely(q->buf_size != RX_PAGE_SIZE)) {
-			struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
-
-			if (!skb) {
-				q->alloc_failed++;
+		if (q->use_pages) {
+			if (unlikely(alloc_pg_chunk(q, sd, gfp))) {
+nomem:				q->alloc_failed++;
 				break;
 			}
-			va = skb->data;
-			sd->t.skb = skb;
+			buf_start = sd->pg_chunk.va;
 		} else {
-			if (!p->frag.page) {
-				p->frag.page = alloc_pages(gfp, 0);
-				if (unlikely(!p->frag.page)) {
-					q->alloc_failed++;
-					break;
-				} else {
-					p->frag.size = RX_PAGE_SIZE;
-					p->frag.page_offset = 0;
-					p->va = page_address(p->frag.page);
-				}
-			}
+			struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
 
-			memcpy(&sd->t, p, sizeof(*p));
-			va = p->va;
+			if (!skb)
+				goto nomem;
 
-			p->frag.page_offset += RX_PAGE_SIZE;
-			BUG_ON(p->frag.page_offset > PAGE_SIZE);
-			p->va += RX_PAGE_SIZE;
-			if (p->frag.page_offset == PAGE_SIZE)
-				p->frag.page = NULL;
-			else
-				get_page(p->frag.page);
+			sd->skb = skb;
+			buf_start = skb->data;
 		}
 
-		add_one_rx_buf(va, q->buf_size, d, sd, q->gen, adap->pdev);
-
+		add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
+			       adap->pdev);
 		d++;
 		sd++;
 		if (++q->pidx == q->size) {
@@ -487,7 +480,7 @@ static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
 	struct rx_desc *from = &q->desc[idx];
 	struct rx_desc *to = &q->desc[q->pidx];
 
-	memcpy(&q->sdesc[q->pidx], &q->sdesc[idx], sizeof(struct rx_sw_desc));
+	q->sdesc[q->pidx] = q->sdesc[idx];
 	to->addr_lo = from->addr_lo;	/* already big endian */
 	to->addr_hi = from->addr_hi;	/* likewise */
 	wmb();
@@ -551,7 +544,7 @@ static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
  *	queue set must be quiesced prior to calling this.
  */
-void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
+static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 {
 	int i;
 	struct pci_dev *pdev = adapter->pdev;
@@ -598,9 +591,6 @@ void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 				  q->rspq.desc, q->rspq.phys_addr);
 	}
 
-	if (q->netdev)
-		q->netdev->atalk_ptr = NULL;
-
 	memset(q, 0, sizeof(*q));
 }
 
@@ -649,6 +639,132 @@ static inline unsigned int flits_to_desc(unsigned int n)
 	return flit_desc_map[n];
 }
 
+/**
+ *	get_packet - return the next ingress packet buffer from a free list
+ *	@adap: the adapter that received the packet
+ *	@fl: the SGE free list holding the packet
+ *	@len: the packet length including any SGE padding
+ *	@drop_thres: # of remaining buffers before we start dropping packets
+ *
+ *	Get the next packet from a free list and complete setup of the
+ *	sk_buff.  If the packet is small we make a copy and recycle the
+ *	original buffer, otherwise we use the original buffer itself.  If a
+ *	positive drop threshold is supplied packets are dropped and their
+ *	buffers recycled if (a) the number of remaining buffers is under the
+ *	threshold and the packet is too big to copy, or (b) the packet should
+ *	be copied but there is no memory for the copy.
+ */
+static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
+				  unsigned int len, unsigned int drop_thres)
+{
+	struct sk_buff *skb = NULL;
+	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
+
+	prefetch(sd->skb->data);
+	fl->credits--;
+
+	if (len <= SGE_RX_COPY_THRES) {
+		skb = alloc_skb(len, GFP_ATOMIC);
+		if (likely(skb != NULL)) {
+			__skb_put(skb, len);
+			pci_dma_sync_single_for_cpu(adap->pdev,
+					    pci_unmap_addr(sd, dma_addr), len,
+					    PCI_DMA_FROMDEVICE);
+			memcpy(skb->data, sd->skb->data, len);
+			pci_dma_sync_single_for_device(adap->pdev,
+					    pci_unmap_addr(sd, dma_addr), len,
+					    PCI_DMA_FROMDEVICE);
+		} else if (!drop_thres)
+			goto use_orig_buf;
+recycle:
+		recycle_rx_buf(adap, fl, fl->cidx);
+		return skb;
+	}
+
+	if (unlikely(fl->credits < drop_thres))
+		goto recycle;
+
+use_orig_buf:
+	pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
+			 fl->buf_size, PCI_DMA_FROMDEVICE);
+	skb = sd->skb;
+	skb_put(skb, len);
+	__refill_fl(adap, fl);
+	return skb;
+}
+
+/**
+ *	get_packet_pg - return the next ingress packet buffer from a free list
+ *	@adap: the adapter that received the packet
+ *	@fl: the SGE free list holding the packet
+ *	@len: the packet length including any SGE padding
+ *	@drop_thres: # of remaining buffers before we start dropping packets
+ *
+ *	Get the next packet from a free list populated with page chunks.
+ *	If the packet is small we make a copy and recycle the original buffer,
+ *	otherwise we attach the original buffer as a page fragment to a fresh
+ *	sk_buff.  If a positive drop threshold is supplied packets are dropped
+ *	and their buffers recycled if (a) the number of remaining buffers is
+ *	under the threshold and the packet is too big to copy, or (b) there's
+ *	no system memory.
+ *
+ * 	Note: this function is similar to @get_packet but deals with Rx buffers
+ * 	that are page chunks rather than sk_buffs.
+ */
+static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
+				     unsigned int len, unsigned int drop_thres)
+{
+	struct sk_buff *skb = NULL;
+	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
+
+	if (len <= SGE_RX_COPY_THRES) {
+		skb = alloc_skb(len, GFP_ATOMIC);
+		if (likely(skb != NULL)) {
+			__skb_put(skb, len);
+			pci_dma_sync_single_for_cpu(adap->pdev,
+					    pci_unmap_addr(sd, dma_addr), len,
+					    PCI_DMA_FROMDEVICE);
+			memcpy(skb->data, sd->pg_chunk.va, len);
+			pci_dma_sync_single_for_device(adap->pdev,
+					    pci_unmap_addr(sd, dma_addr), len,
+					    PCI_DMA_FROMDEVICE);
+		} else if (!drop_thres)
+			return NULL;
+recycle:
+		fl->credits--;
+		recycle_rx_buf(adap, fl, fl->cidx);
+		return skb;
+	}
+
+	if (unlikely(fl->credits <= drop_thres))
+		goto recycle;
+
+	skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
+	if (unlikely(!skb)) {
+		if (!drop_thres)
+			return NULL;
+		goto recycle;
+	}
+
+	pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
+			 fl->buf_size, PCI_DMA_FROMDEVICE);
+	__skb_put(skb, SGE_RX_PULL_LEN);
+	memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
+	skb_fill_page_desc(skb, 0, sd->pg_chunk.page,
+			   sd->pg_chunk.offset + SGE_RX_PULL_LEN,
+			   len - SGE_RX_PULL_LEN);
+	skb->len = len;
+	skb->data_len = len - SGE_RX_PULL_LEN;
+	skb->truesize += skb->data_len;
+
+	fl->credits--;
+	/*
+	 * We do not refill FLs here, we let the caller do it to overlap a
+	 * prefetch.
+	 */
+	return skb;
+}
+
 /**
  *	get_imm_packet - return the next ingress packet buffer from a response
  *	@resp: the response descriptor containing the packet data
@@ -788,8 +904,8 @@ static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
 			     const struct sge_txq *q,
 			     const struct sg_ent *sgl,
 			     unsigned int flits, unsigned int sgl_flits,
-			     unsigned int gen, unsigned int wr_hi,
-			     unsigned int wr_lo)
+			     unsigned int gen, __be32 wr_hi,
+			     __be32 wr_lo)
 {
 	struct work_request_hdr *wrp = (struct work_request_hdr *)d;
 	struct tx_sw_desc *sd = &q->sdesc[pidx];
@@ -954,8 +1070,8 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	unsigned int ndesc, pidx, credits, gen, compl;
 	const struct port_info *pi = netdev_priv(dev);
-	struct adapter *adap = dev->priv;
-	struct sge_qset *qs = dev2qset(dev);
+	struct adapter *adap = pi->adapter;
+	struct sge_qset *qs = pi->qs;
 	struct sge_txq *q = &qs->txq[TXQ_ETH];
 
 	/*
@@ -1063,8 +1179,8 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
  *
  *	Writes a packet as immediate data into a Tx descriptor.  The packet
  *	contains a work request at its beginning.  We must write the packet
- *	carefully so the SGE doesn't read accidentally before it's written in
- *	its entirety.
+ *	carefully so the SGE doesn't read it accidentally before it's written
+ *	in its entirety.
  */
 static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
 			     unsigned int len, unsigned int gen)
@@ -1072,7 +1188,11 @@ static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
 	struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
 	struct work_request_hdr *to = (struct work_request_hdr *)d;
 
-	memcpy(&to[1], &from[1], len - sizeof(*from));
+	if (likely(!skb->data_len))
+		memcpy(&to[1], &from[1], len - sizeof(*from));
+	else
+		skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
+
 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
 					V_WR_BCNTLFLT(len & 7));
 	wmb();
@@ -1142,7 +1262,7 @@ static inline void reclaim_completed_tx_imm(struct sge_txq *q)
 
 static inline int immediate(const struct sk_buff *skb)
 {
-	return skb->len <= WR_LEN && !skb->data_len;
+	return skb->len <= WR_LEN;
 }
 
 /**
@@ -1207,12 +1327,12 @@ static void restart_ctrlq(unsigned long data)
 	struct sk_buff *skb;
 	struct sge_qset *qs = (struct sge_qset *)data;
 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
-	struct adapter *adap = qs->netdev->priv;
 
 	spin_lock(&q->lock);
       again:reclaim_completed_tx_imm(q);
 
-	while (q->in_use < q->size && (skb = __skb_dequeue(&q->sendq)) != NULL) {
+	while (q->in_use < q->size &&
+	       (skb = __skb_dequeue(&q->sendq)) != NULL) {
 
 		write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
 
@@ -1234,7 +1354,7 @@ static void restart_ctrlq(unsigned long data)
 	}
 
 	spin_unlock(&q->lock);
-	t3_write_reg(adap, A_SG_KDOORBELL,
+	t3_write_reg(qs->adap, A_SG_KDOORBELL,
 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 }
 
@@ -1348,12 +1468,13 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
  */
 static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 {
-	unsigned int flits, cnt = skb_shinfo(skb)->nr_frags;
+	unsigned int flits, cnt;
 
-	if (skb->len <= WR_LEN && cnt == 0)
+	if (skb->len <= WR_LEN)
 		return 1;	/* packet fits as immediate data */
 
 	flits = skb_transport_offset(skb) / 8;	/* headers */
+	cnt = skb_shinfo(skb)->nr_frags;
 	if (skb->tail != skb->transport_header)
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
@@ -1412,7 +1533,8 @@ static void restart_offloadq(unsigned long data)
 	struct sk_buff *skb;
 	struct sge_qset *qs = (struct sge_qset *)data;
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
-	struct adapter *adap = qs->netdev->priv;
+	const struct port_info *pi = netdev_priv(qs->netdev);
+	struct adapter *adap = pi->adapter;
 
 	spin_lock(&q->lock);
       again:reclaim_completed_tx(adap, q);
@@ -1517,8 +1639,7 @@ static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
 	else {
 		struct sge_qset *qs = rspq_to_qset(q);
 
-		if (__netif_rx_schedule_prep(qs->netdev))
-			__netif_rx_schedule(qs->netdev);
+		napi_schedule(&qs->napi);
 		q->rx_head = skb;
 	}
 	q->rx_tail = skb;
@@ -1554,33 +1675,30 @@ static inline void deliver_partial_bundle(struct t3cdev *tdev,
  *	receive handler.  Batches need to be of modest size as we do prefetches
  *	on the packets in each.
  */
-static int ofld_poll(struct net_device *dev, int *budget)
+static int ofld_poll(struct napi_struct *napi, int budget)
 {
-	struct adapter *adapter = dev->priv;
-	struct sge_qset *qs = dev2qset(dev);
+	struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
 	struct sge_rspq *q = &qs->rspq;
-	int work_done, limit = min(*budget, dev->quota), avail = limit;
+	struct adapter *adapter = qs->adap;
+	int work_done = 0;
 
-	while (avail) {
+	while (work_done < budget) {
 		struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
 		int ngathered;
 
 		spin_lock_irq(&q->lock);
 		head = q->rx_head;
 		if (!head) {
-			work_done = limit - avail;
-			*budget -= work_done;
-			dev->quota -= work_done;
-			__netif_rx_complete(dev);
+			napi_complete(napi);
 			spin_unlock_irq(&q->lock);
-			return 0;
+			return work_done;
 		}
 
 		tail = q->rx_tail;
 		q->rx_head = q->rx_tail = NULL;
 		spin_unlock_irq(&q->lock);
 
-		for (ngathered = 0; avail && head; avail--) {
+		for (ngathered = 0; work_done < budget && head; work_done++) {
 			prefetch(head->data);
 			skbs[ngathered] = head;
 			head = head->next;
@@ -1602,10 +1720,8 @@ static int ofld_poll(struct net_device *dev, int *budget)
 		}
 		deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
 	}
-	work_done = limit - avail;
-	*budget -= work_done;
-	dev->quota -= work_done;
-	return 1;
+
+	return work_done;
 }
 
 /**
@@ -1715,85 +1831,6 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
 		netif_rx(skb);
 }
 
-#define SKB_DATA_SIZE 128
-
-static void skb_data_init(struct sk_buff *skb, struct sge_fl_page *p,
-			  unsigned int len)
-{
-	skb->len = len;
-	if (len <= SKB_DATA_SIZE) {
-		skb_copy_to_linear_data(skb, p->va, len);
-		skb->tail += len;
-		put_page(p->frag.page);
-	} else {
-		skb_copy_to_linear_data(skb, p->va, SKB_DATA_SIZE);
-		skb_shinfo(skb)->frags[0].page = p->frag.page;
-		skb_shinfo(skb)->frags[0].page_offset =
-		    p->frag.page_offset + SKB_DATA_SIZE;
-		skb_shinfo(skb)->frags[0].size = len - SKB_DATA_SIZE;
-		skb_shinfo(skb)->nr_frags = 1;
-		skb->data_len = len - SKB_DATA_SIZE;
-		skb->tail += SKB_DATA_SIZE;
-		skb->truesize += skb->data_len;
-	}
-}
-
-/**
-*      get_packet - return the next ingress packet buffer from a free list
-*      @adap: the adapter that received the packet
-*      @fl: the SGE free list holding the packet
-*      @len: the packet length including any SGE padding
-*      @drop_thres: # of remaining buffers before we start dropping packets
-*
-*      Get the next packet from a free list and complete setup of the
-*      sk_buff.  If the packet is small we make a copy and recycle the
-*      original buffer, otherwise we use the original buffer itself.  If a
-*      positive drop threshold is supplied packets are dropped and their
-*      buffers recycled if (a) the number of remaining buffers is under the
-*      threshold and the packet is too big to copy, or (b) the packet should
-*      be copied but there is no memory for the copy.
-*/
-static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
-				  unsigned int len, unsigned int drop_thres)
-{
-	struct sk_buff *skb = NULL;
-	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
-
-	prefetch(sd->t.skb->data);
-
-	if (len <= SGE_RX_COPY_THRES) {
-		skb = alloc_skb(len, GFP_ATOMIC);
-		if (likely(skb != NULL)) {
-			struct rx_desc *d = &fl->desc[fl->cidx];
-			dma_addr_t mapping =
-			    (dma_addr_t)((u64) be32_to_cpu(d->addr_hi) << 32 |
-					 be32_to_cpu(d->addr_lo));
-
-			__skb_put(skb, len);
-			pci_dma_sync_single_for_cpu(adap->pdev, mapping, len,
-						    PCI_DMA_FROMDEVICE);
-			skb_copy_from_linear_data(sd->t.skb, skb->data, len);
-			pci_dma_sync_single_for_device(adap->pdev, mapping, len,
-						       PCI_DMA_FROMDEVICE);
-		} else if (!drop_thres)
-			goto use_orig_buf;
-recycle:
-		recycle_rx_buf(adap, fl, fl->cidx);
-		return skb;
-	}
-
-	if (unlikely(fl->credits < drop_thres))
-		goto recycle;
-
-use_orig_buf:
-	pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
-			 fl->buf_size, PCI_DMA_FROMDEVICE);
-	skb = sd->t.skb;
-	skb_put(skb, len);
-	__refill_fl(adap, fl);
-	return skb;
-}
-
 /**
  *	handle_rsp_cntrl_info - handles control information in a response
  *	@qs: the queue set corresponding to the response
@@ -1935,7 +1972,7 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs,
 		} else if (flags & F_RSPD_IMM_DATA_VALID) {
 			skb = get_imm_packet(r);
 			if (unlikely(!skb)) {
-			      no_mem:
+no_mem:
 				q->next_holdoff = NOMEM_INTR_DELAY;
 				q->nomem++;
 				/* consume one credit since we tried */
@@ -1945,53 +1982,29 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs,
 			q->imm_data++;
 			ethpad = 0;
 		} else if ((len = ntohl(r->len_cq)) != 0) {
-			struct sge_fl *fl =
-			    (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
-
-			if (fl->buf_size == RX_PAGE_SIZE) {
-				struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
-				struct sge_fl_page *p = &sd->t.page;
+			struct sge_fl *fl;
 
-				prefetch(p->va);
-				prefetch(p->va + L1_CACHE_BYTES);
+			fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
+			if (fl->use_pages) {
+				void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
 
+				prefetch(addr);
+#if L1_CACHE_BYTES < 128
+				prefetch(addr + L1_CACHE_BYTES);
+#endif
 				__refill_fl(adap, fl);
 
-				pci_unmap_single(adap->pdev,
-						 pci_unmap_addr(sd, dma_addr),
-						 fl->buf_size,
-						 PCI_DMA_FROMDEVICE);
-
-				if (eth) {
-					if (unlikely(fl->credits <
-						     SGE_RX_DROP_THRES))
-						goto eth_recycle;
-
-					skb = alloc_skb(SKB_DATA_SIZE,
-							GFP_ATOMIC);
-					if (unlikely(!skb)) {
-eth_recycle:
-						q->rx_drops++;
-						recycle_rx_buf(adap, fl,
-							       fl->cidx);
-						goto eth_done;
-					}
-				} else {
-					skb = alloc_skb(SKB_DATA_SIZE,
-							GFP_ATOMIC);
-					if (unlikely(!skb))
-						goto no_mem;
-				}
-
-				skb_data_init(skb, p, G_RSPD_LEN(len));
-eth_done:
-				fl->credits--;
-				q->eth_pkts++;
-			} else {
-				fl->credits--;
+				skb = get_packet_pg(adap, fl, G_RSPD_LEN(len),
+						 eth ? SGE_RX_DROP_THRES : 0);
+			} else
 				skb = get_packet(adap, fl, G_RSPD_LEN(len),
 						 eth ? SGE_RX_DROP_THRES : 0);
-			}
+			if (unlikely(!skb)) {
+				if (!eth)
+					goto no_mem;
+				q->rx_drops++;
+			} else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
+				__skb_pull(skb, 2);
 
 			if (++fl->cidx == fl->size)
 				fl->cidx = 0;
@@ -2016,20 +2029,15 @@ eth_done:
 			q->credits = 0;
 		}
 
-		if (skb) {
-			/* Preserve the RSS info in csum & priority */
-			skb->csum = rss_hi;
-			skb->priority = rss_lo;
-
+		if (likely(skb != NULL)) {
 			if (eth)
 				rx_eth(adap, q, skb, ethpad);
 			else {
-				if (unlikely(r->rss_hdr.opcode ==
-					     CPL_TRACE_PKT))
-					__skb_pull(skb, ethpad);
-
-				ngathered = rx_offload(&adap->tdev, q,
-						       skb, offload_skbs,
+				/* Preserve the RSS info in csum & priority */
+				skb->csum = rss_hi;
+				skb->priority = rss_lo;
+				ngathered = rx_offload(&adap->tdev, q, skb,
+						       offload_skbs,
 						       ngathered);
 			}
 		}
@@ -2057,49 +2065,47 @@ static inline int is_pure_response(const struct rsp_desc *r)
 
 /**
  *	napi_rx_handler - the NAPI handler for Rx processing
- *	@dev: the net device
+ *	@napi: the napi instance
  *	@budget: how many packets we can process in this round
  *
  *	Handler for new data events when using NAPI.
  */
-static int napi_rx_handler(struct net_device *dev, int *budget)
+static int napi_rx_handler(struct napi_struct *napi, int budget)
 {
-	struct adapter *adap = dev->priv;
-	struct sge_qset *qs = dev2qset(dev);
-	int effective_budget = min(*budget, dev->quota);
-
-	int work_done = process_responses(adap, qs, effective_budget);
-	*budget -= work_done;
-	dev->quota -= work_done;
-
-	if (work_done >= effective_budget)
-		return 1;
+	struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
+	struct adapter *adap = qs->adap;
+	int work_done = process_responses(adap, qs, budget);
 
-	netif_rx_complete(dev);
+	if (likely(work_done < budget)) {
+		napi_complete(napi);
 
-	/*
-	 * Because we don't atomically flush the following write it is
-	 * possible that in very rare cases it can reach the device in a way
-	 * that races with a new response being written plus an error interrupt
-	 * causing the NAPI interrupt handler below to return unhandled status
-	 * to the OS.  To protect against this would require flushing the write
-	 * and doing both the write and the flush with interrupts off.  Way too
-	 * expensive and unjustifiable given the rarity of the race.
-	 *
-	 * The race cannot happen at all with MSI-X.
-	 */
-	t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
-		     V_NEWTIMER(qs->rspq.next_holdoff) |
-		     V_NEWINDEX(qs->rspq.cidx));
-	return 0;
+		/*
+		 * Because we don't atomically flush the following
+		 * write it is possible that in very rare cases it can
+		 * reach the device in a way that races with a new
+		 * response being written plus an error interrupt
+		 * causing the NAPI interrupt handler below to return
+		 * unhandled status to the OS.  To protect against
+		 * this would require flushing the write and doing
+		 * both the write and the flush with interrupts off.
+		 * Way too expensive and unjustifiable given the
+		 * rarity of the race.
+		 *
+		 * The race cannot happen at all with MSI-X.
+		 */
+		t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
+			     V_NEWTIMER(qs->rspq.next_holdoff) |
+			     V_NEWINDEX(qs->rspq.cidx));
+	}
+	return work_done;
 }
 
 /*
  * Returns true if the device is already scheduled for polling.
  */
-static inline int napi_is_scheduled(struct net_device *dev)
+static inline int napi_is_scheduled(struct napi_struct *napi)
 {
-	return test_bit(__LINK_STATE_RX_SCHED, &dev->state);
+	return test_bit(NAPI_STATE_SCHED, &napi->state);
 }
 
 /**
@@ -2182,8 +2188,7 @@ static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
 			     V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
 		return 0;
 	}
-	if (likely(__netif_rx_schedule_prep(qs->netdev)))
-		__netif_rx_schedule(qs->netdev);
+	napi_schedule(&qs->napi);
 	return 1;
 }
 
@@ -2194,7 +2199,7 @@ static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
 {
 	struct sge_qset *qs = cookie;
-	struct adapter *adap = qs->netdev->priv;
+	struct adapter *adap = qs->adap;
 	struct sge_rspq *q = &qs->rspq;
 
 	spin_lock(&q->lock);
@@ -2210,15 +2215,14 @@ irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
  * The MSI-X interrupt handler for an SGE response queue for the NAPI case
  * (i.e., response queue serviced by NAPI polling).
  */
-irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
+static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
 {
 	struct sge_qset *qs = cookie;
-	struct adapter *adap = qs->netdev->priv;
 	struct sge_rspq *q = &qs->rspq;
 
 	spin_lock(&q->lock);
 
-	if (handle_responses(adap, q) < 0)
+	if (handle_responses(qs->adap, q) < 0)
 		q->unhandled_irqs++;
 	spin_unlock(&q->lock);
 	return IRQ_HANDLED;
@@ -2261,11 +2265,13 @@ static irqreturn_t t3_intr_msi(int irq, void *cookie)
 	return IRQ_HANDLED;
 }
 
-static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q)
+static int rspq_check_napi(struct sge_qset *qs)
 {
-	if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) {
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
+	struct sge_rspq *q = &qs->rspq;
+
+	if (!napi_is_scheduled(&qs->napi) &&
+	    is_new_response(&q->desc[q->cidx], q)) {
+		napi_schedule(&qs->napi);
 		return 1;
 	}
 	return 0;
@@ -2278,7 +2284,7 @@ static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q)
  * one SGE response queue per port in this mode and protect all response
  * queues with queue 0's lock.
  */
-irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
+static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
 {
 	int new_packets;
 	struct adapter *adap = cookie;
@@ -2286,10 +2292,9 @@ irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
 
 	spin_lock(&q->lock);
 
-	new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q);
+	new_packets = rspq_check_napi(&adap->sge.qs[0]);
 	if (adap->params.nports == 2)
-		new_packets += rspq_check_napi(adap->sge.qs[1].netdev,
-					       &adap->sge.qs[1].rspq);
+		new_packets += rspq_check_napi(&adap->sge.qs[1]);
 	if (!new_packets && t3_slow_intr_handler(adap) == 0)
 		q->unhandled_irqs++;
 
@@ -2392,9 +2397,9 @@ static irqreturn_t t3b_intr(int irq, void *cookie)
 static irqreturn_t t3b_intr_napi(int irq, void *cookie)
 {
 	u32 map;
-	struct net_device *dev;
 	struct adapter *adap = cookie;
-	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
+	struct sge_qset *qs0 = &adap->sge.qs[0];
+	struct sge_rspq *q0 = &qs0->rspq;
 
 	t3_write_reg(adap, A_PL_CLI, 0);
 	map = t3_read_reg(adap, A_SG_DATA_INTR);
@@ -2407,18 +2412,11 @@ static irqreturn_t t3b_intr_napi(int irq, void *cookie)
 	if (unlikely(map & F_ERRINTR))
 		t3_slow_intr_handler(adap);
 
-	if (likely(map & 1)) {
-		dev = adap->sge.qs[0].netdev;
-
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
-	}
-	if (map & 2) {
-		dev = adap->sge.qs[1].netdev;
+	if (likely(map & 1))
+		napi_schedule(&qs0->napi);
 
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
-	}
+	if (map & 2)
+		napi_schedule(&adap->sge.qs[1].napi);
 
 	spin_unlock(&q0->lock);
 	return IRQ_HANDLED;
@@ -2433,7 +2431,7 @@ static irqreturn_t t3b_intr_napi(int irq, void *cookie)
  *	(MSI-X, MSI, or legacy) and whether NAPI will be used to service the
  *	response queues.
  */
-intr_handler_t t3_intr_handler(struct adapter *adap, int polling)
+irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
 {
 	if (adap->flags & USING_MSIX)
 		return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
@@ -2465,6 +2463,10 @@ void t3_sge_err_intr_handler(struct adapter *adapter)
 			 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
 	}
 
+	if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
+		CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
+			 status & F_HIPIODRBDROPERR ? "high" : "lo");
+
 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
 	if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
 		t3_fatal_err(adapter);
@@ -2497,7 +2499,7 @@ static void sge_timer_cb(unsigned long data)
 {
 	spinlock_t *lock;
 	struct sge_qset *qs = (struct sge_qset *)data;
-	struct adapter *adap = qs->netdev->priv;
+	struct adapter *adap = qs->adap;
 
 	if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
 		reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
@@ -2508,9 +2510,9 @@ static void sge_timer_cb(unsigned long data)
 		spin_unlock(&qs->txq[TXQ_OFLD].lock);
 	}
 	lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
-	    &adap->sge.qs[0].rspq.lock;
+					    &adap->sge.qs[0].rspq.lock;
 	if (spin_trylock_irq(lock)) {
-		if (!napi_is_scheduled(qs->netdev)) {
+		if (!napi_is_scheduled(&qs->napi)) {
 			u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
 
 			if (qs->fl[0].credits < qs->fl[0].size)
@@ -2544,12 +2546,9 @@ static void sge_timer_cb(unsigned long data)
  */
 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
 {
-	if (!qs->netdev)
-		return;
-
 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
 	qs->rspq.polling = p->polling;
-	qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll;
+	qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
 }
 
 /**
@@ -2569,7 +2568,7 @@ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  */
 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 		      int irq_vec_idx, const struct qset_params *p,
-		      int ntxq, struct net_device *netdev)
+		      int ntxq, struct net_device *dev)
 {
 	int i, ret = -ENOMEM;
 	struct sge_qset *q = &adapter->sge.qs[id];
@@ -2635,25 +2634,15 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 	q->txq[TXQ_ETH].stop_thres = nports *
 	    flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
 
-	if (!is_offload(adapter)) {
-#ifdef USE_RX_PAGE
-		q->fl[0].buf_size = RX_PAGE_SIZE;
-#else
-		q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + 2 +
-		    sizeof(struct cpl_rx_pkt);
-#endif
-		q->fl[1].buf_size = MAX_FRAME_SIZE + 2 +
-		    sizeof(struct cpl_rx_pkt);
-	} else {
-#ifdef USE_RX_PAGE
-		q->fl[0].buf_size = RX_PAGE_SIZE;
+#if FL0_PG_CHUNK_SIZE > 0
+	q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
 #else
-		q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE +
-		    sizeof(struct cpl_rx_data);
+	q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
 #endif
-		q->fl[1].buf_size = (16 * 1024) -
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	}
+	q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
+	q->fl[1].buf_size = is_offload(adapter) ?
+		(16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
 
 	spin_lock(&adapter->sge.reg_lock);
 
@@ -2700,16 +2689,10 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 	}
 
 	spin_unlock(&adapter->sge.reg_lock);
-	q->netdev = netdev;
-	t3_update_qset_coalesce(q, p);
 
-	/*
-	 * We use atalk_ptr as a backpointer to a qset.  In case a device is
-	 * associated with multiple queue sets only the first one sets
-	 * atalk_ptr.
-	 */
-	if (netdev->atalk_ptr == NULL)
-		netdev->atalk_ptr = q;
+	q->adap = adapter;
+	q->netdev = dev;
+	t3_update_qset_coalesce(q, p);
 
 	refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
 	refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);