]> err.no Git - linux-2.6/blobdiff - drivers/net/bnx2x_main.c
ath9k: Use mac80211's band macros and remove enum hal_freq_band
[linux-2.6] / drivers / net / bnx2x_main.c
index 90b54e4c5c3bb052a50841c0afc220cf97384edf..af251a5df844929097ef67adb394936871bbec3b 100644 (file)
@@ -60,8 +60,8 @@
 #include "bnx2x.h"
 #include "bnx2x_init.h"
 
-#define DRV_MODULE_VERSION      "1.42.4"
-#define DRV_MODULE_RELDATE      "2008/4/9"
+#define DRV_MODULE_VERSION      "1.45.6"
+#define DRV_MODULE_RELDATE      "2008/06/23"
 #define BNX2X_BC_VER           0x040200
 
 /* Time in jiffies before concluding the transmitter is hung */
@@ -79,6 +79,7 @@ MODULE_VERSION(DRV_MODULE_VERSION);
 static int use_inta;
 static int poll;
 static int debug;
+static int disable_tpa;
 static int nomcp;
 static int load_count[3]; /* 0-common, 1-port0, 2-port1 */
 static int use_multi;
@@ -86,6 +87,7 @@ static int use_multi;
 module_param(use_inta, int, 0);
 module_param(poll, int, 0);
 module_param(debug, int, 0);
+module_param(disable_tpa, int, 0);
 module_param(nomcp, int, 0);
 MODULE_PARM_DESC(use_inta, "use INT#A instead of MSI-X");
 MODULE_PARM_DESC(poll, "use polling (for debug)");
@@ -512,13 +514,16 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
                          i, fp->tx_pkt_prod, fp->tx_pkt_cons, fp->tx_bd_prod,
                          fp->tx_bd_cons, le16_to_cpu(*fp->tx_cons_sb));
                BNX2X_ERR("          rx_comp_prod(%x)  rx_comp_cons(%x)"
-                         "  *rx_cons_sb(%x)\n",
+                         "  *rx_cons_sb(%x)  *rx_bd_cons_sb(%x)"
+                         "  rx_sge_prod(%x)  last_max_sge(%x)\n",
                          fp->rx_comp_prod, fp->rx_comp_cons,
-                         le16_to_cpu(*fp->rx_cons_sb));
+                         le16_to_cpu(*fp->rx_cons_sb),
+                         le16_to_cpu(*fp->rx_bd_cons_sb),
+                         fp->rx_sge_prod, fp->last_max_sge);
                BNX2X_ERR("          fp_c_idx(%x)  fp_u_idx(%x)"
-                         "  bd data(%x,%x)\n",
+                         "  bd data(%x,%x)  rx_alloc_failed(%lx)\n",
                          fp->fp_c_idx, fp->fp_u_idx, hw_prods->packets_prod,
-                         hw_prods->bds_prod);
+                         hw_prods->bds_prod, fp->rx_alloc_failed);
 
                start = TX_BD(le16_to_cpu(*fp->tx_cons_sb) - 10);
                end = TX_BD(le16_to_cpu(*fp->tx_cons_sb) + 245);
@@ -548,6 +553,16 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
                                  j, rx_bd[1], rx_bd[0], sw_bd->skb);
                }
 
+               start = 0;
+               end = RX_SGE_CNT*NUM_RX_SGE_PAGES;
+               for (j = start; j < end; j++) {
+                       u32 *rx_sge = (u32 *)&fp->rx_sge_ring[j];
+                       struct sw_rx_page *sw_page = &fp->rx_page_ring[j];
+
+                       BNX2X_ERR("rx_sge[%x]=[%x:%x]  sw_page=[%p]\n",
+                                 j, rx_sge[1], rx_sge[0], sw_page->page);
+               }
+
                start = RCQ_BD(fp->rx_comp_cons - 10);
                end = RCQ_BD(fp->rx_comp_cons + 503);
                for (j = start; j < end; j++) {
@@ -568,8 +583,8 @@ static void bnx2x_panic_dump(struct bnx2x *bp)
        bnx2x_mc_assert(bp);
        BNX2X_ERR("end crash dump -----------------\n");
 
-       bp->stats_state = STATS_STATE_DISABLE;
-       DP(BNX2X_MSG_STATS, "stats_state - DISABLE\n");
+       bp->stats_state = STATS_STATE_DISABLED;
+       DP(BNX2X_MSG_STATS, "stats_state - DISABLED\n");
 }
 
 static void bnx2x_int_enable(struct bnx2x *bp)
@@ -799,7 +814,7 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fastpath *fp,
        }
 
        /* release skb */
-       BUG_TRAP(skb);
+       WARN_ON(!skb);
        dev_kfree_skb(skb);
        tx_buf->first_bd = 0;
        tx_buf->skb = NULL;
@@ -822,9 +837,9 @@ static inline u16 bnx2x_tx_avail(struct bnx2x_fastpath *fp)
        used = SUB_S16(prod, cons) + (s16)NUM_TX_RINGS;
 
 #ifdef BNX2X_STOP_ON_ERROR
-       BUG_TRAP(used >= 0);
-       BUG_TRAP(used <= fp->bp->tx_ring_size);
-       BUG_TRAP((fp->bp->tx_ring_size - used) <= MAX_TX_AVAIL);
+       WARN_ON(used < 0);
+       WARN_ON(used > fp->bp->tx_ring_size);
+       WARN_ON((fp->bp->tx_ring_size - used) > MAX_TX_AVAIL);
 #endif
 
        return (s16)(fp->bp->tx_ring_size) - used;
@@ -948,6 +963,7 @@ static void bnx2x_sp_event(struct bnx2x_fastpath *fp,
        case (RAMROD_CMD_ID_ETH_SET_MAC | BNX2X_STATE_OPEN):
        case (RAMROD_CMD_ID_ETH_SET_MAC | BNX2X_STATE_DIAG):
                DP(NETIF_MSG_IFUP, "got set mac ramrod\n");
+               bp->set_mac_pending = 0;
                break;
 
        case (RAMROD_CMD_ID_ETH_SET_MAC | BNX2X_STATE_CLOSING_WAIT4_HALT):
@@ -962,6 +978,62 @@ static void bnx2x_sp_event(struct bnx2x_fastpath *fp,
        mb(); /* force bnx2x_wait_ramrod() to see the change */
 }
 
+static inline void bnx2x_free_rx_sge(struct bnx2x *bp,
+                                    struct bnx2x_fastpath *fp, u16 index)
+{
+       struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
+       struct page *page = sw_buf->page;
+       struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
+
+       /* Skip "next page" elements */
+       if (!page)
+               return;
+
+       pci_unmap_page(bp->pdev, pci_unmap_addr(sw_buf, mapping),
+                      BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE);
+       __free_pages(page, PAGES_PER_SGE_SHIFT);
+
+       sw_buf->page = NULL;
+       sge->addr_hi = 0;
+       sge->addr_lo = 0;
+}
+
+static inline void bnx2x_free_rx_sge_range(struct bnx2x *bp,
+                                          struct bnx2x_fastpath *fp, int last)
+{
+       int i;
+
+       for (i = 0; i < last; i++)
+               bnx2x_free_rx_sge(bp, fp, i);
+}
+
+static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp,
+                                    struct bnx2x_fastpath *fp, u16 index)
+{
+       struct page *page = alloc_pages(GFP_ATOMIC, PAGES_PER_SGE_SHIFT);
+       struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
+       struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
+       dma_addr_t mapping;
+
+       if (unlikely(page == NULL))
+               return -ENOMEM;
+
+       mapping = pci_map_page(bp->pdev, page, 0, BCM_PAGE_SIZE*PAGES_PER_SGE,
+                              PCI_DMA_FROMDEVICE);
+       if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
+               __free_pages(page, PAGES_PER_SGE_SHIFT);
+               return -ENOMEM;
+       }
+
+       sw_buf->page = page;
+       pci_unmap_addr_set(sw_buf, mapping, mapping);
+
+       sge->addr_hi = cpu_to_le32(U64_HI(mapping));
+       sge->addr_lo = cpu_to_le32(U64_LO(mapping));
+
+       return 0;
+}
+
 static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
                                     struct bnx2x_fastpath *fp, u16 index)
 {
@@ -976,7 +1048,7 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
 
        mapping = pci_map_single(bp->pdev, skb->data, bp->rx_buf_use_size,
                                 PCI_DMA_FROMDEVICE);
-       if (unlikely(dma_mapping_error(mapping))) {
+       if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
                dev_kfree_skb(skb);
                return -ENOMEM;
        }
@@ -1015,12 +1087,310 @@ static void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
        *prod_bd = *cons_bd;
 }
 
+static inline void bnx2x_update_last_max_sge(struct bnx2x_fastpath *fp,
+                                            u16 idx)
+{
+       u16 last_max = fp->last_max_sge;
+
+       if (SUB_S16(idx, last_max) > 0)
+               fp->last_max_sge = idx;
+}
+
+static void bnx2x_clear_sge_mask_next_elems(struct bnx2x_fastpath *fp)
+{
+       int i, j;
+
+       for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
+               int idx = RX_SGE_CNT * i - 1;
+
+               for (j = 0; j < 2; j++) {
+                       SGE_MASK_CLEAR_BIT(fp, idx);
+                       idx--;
+               }
+       }
+}
+
+static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
+                                 struct eth_fast_path_rx_cqe *fp_cqe)
+{
+       struct bnx2x *bp = fp->bp;
+       u16 sge_len = BCM_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) -
+                                    le16_to_cpu(fp_cqe->len_on_bd)) >>
+                     BCM_PAGE_SHIFT;
+       u16 last_max, last_elem, first_elem;
+       u16 delta = 0;
+       u16 i;
+
+       if (!sge_len)
+               return;
+
+       /* First mark all used pages */
+       for (i = 0; i < sge_len; i++)
+               SGE_MASK_CLEAR_BIT(fp, RX_SGE(le16_to_cpu(fp_cqe->sgl[i])));
+
+       DP(NETIF_MSG_RX_STATUS, "fp_cqe->sgl[%d] = %d\n",
+          sge_len - 1, le16_to_cpu(fp_cqe->sgl[sge_len - 1]));
+
+       /* Here we assume that the last SGE index is the biggest */
+       prefetch((void *)(fp->sge_mask));
+       bnx2x_update_last_max_sge(fp, le16_to_cpu(fp_cqe->sgl[sge_len - 1]));
+
+       last_max = RX_SGE(fp->last_max_sge);
+       last_elem = last_max >> RX_SGE_MASK_ELEM_SHIFT;
+       first_elem = RX_SGE(fp->rx_sge_prod) >> RX_SGE_MASK_ELEM_SHIFT;
+
+       /* If ring is not full */
+       if (last_elem + 1 != first_elem)
+               last_elem++;
+
+       /* Now update the prod */
+       for (i = first_elem; i != last_elem; i = NEXT_SGE_MASK_ELEM(i)) {
+               if (likely(fp->sge_mask[i]))
+                       break;
+
+               fp->sge_mask[i] = RX_SGE_MASK_ELEM_ONE_MASK;
+               delta += RX_SGE_MASK_ELEM_SZ;
+       }
+
+       if (delta > 0) {
+               fp->rx_sge_prod += delta;
+               /* clear page-end entries */
+               bnx2x_clear_sge_mask_next_elems(fp);
+       }
+
+       DP(NETIF_MSG_RX_STATUS,
+          "fp->last_max_sge = %d  fp->rx_sge_prod = %d\n",
+          fp->last_max_sge, fp->rx_sge_prod);
+}
+
+static inline void bnx2x_init_sge_ring_bit_mask(struct bnx2x_fastpath *fp)
+{
+       /* Set the mask to all 1-s: it's faster to compare to 0 than to 0xf-s */
+       memset(fp->sge_mask, 0xff,
+              (NUM_RX_SGE >> RX_SGE_MASK_ELEM_SHIFT)*sizeof(u64));
+
+       /* Clear the two last indeces in the page to 1:
+          these are the indeces that correspond to the "next" element,
+          hence will never be indicated and should be removed from
+          the calculations. */
+       bnx2x_clear_sge_mask_next_elems(fp);
+}
+
+static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
+                           struct sk_buff *skb, u16 cons, u16 prod)
+{
+       struct bnx2x *bp = fp->bp;
+       struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
+       struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
+       struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
+       dma_addr_t mapping;
+
+       /* move empty skb from pool to prod and map it */
+       prod_rx_buf->skb = fp->tpa_pool[queue].skb;
+       mapping = pci_map_single(bp->pdev, fp->tpa_pool[queue].skb->data,
+                                bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
+       pci_unmap_addr_set(prod_rx_buf, mapping, mapping);
+
+       /* move partial skb from cons to pool (don't unmap yet) */
+       fp->tpa_pool[queue] = *cons_rx_buf;
+
+       /* mark bin state as start - print error if current state != stop */
+       if (fp->tpa_state[queue] != BNX2X_TPA_STOP)
+               BNX2X_ERR("start of bin not in stop [%d]\n", queue);
+
+       fp->tpa_state[queue] = BNX2X_TPA_START;
+
+       /* point prod_bd to new skb */
+       prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
+       prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
+
+#ifdef BNX2X_STOP_ON_ERROR
+       fp->tpa_queue_used |= (1 << queue);
+#ifdef __powerpc64__
+       DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%lx\n",
+#else
+       DP(NETIF_MSG_RX_STATUS, "fp->tpa_queue_used = 0x%llx\n",
+#endif
+          fp->tpa_queue_used);
+#endif
+}
+
+static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+                              struct sk_buff *skb,
+                              struct eth_fast_path_rx_cqe *fp_cqe,
+                              u16 cqe_idx)
+{
+       struct sw_rx_page *rx_pg, old_rx_pg;
+       struct page *sge;
+       u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd);
+       u32 i, frag_len, frag_size, pages;
+       int err;
+       int j;
+
+       frag_size = le16_to_cpu(fp_cqe->pkt_len) - len_on_bd;
+       pages = BCM_PAGE_ALIGN(frag_size) >> BCM_PAGE_SHIFT;
+
+       /* This is needed in order to enable forwarding support */
+       if (frag_size)
+               skb_shinfo(skb)->gso_size = min((u32)BCM_PAGE_SIZE,
+                                              max(frag_size, (u32)len_on_bd));
+
+#ifdef BNX2X_STOP_ON_ERROR
+       if (pages > 8*PAGES_PER_SGE) {
+               BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
+                         pages, cqe_idx);
+               BNX2X_ERR("fp_cqe->pkt_len = %d  fp_cqe->len_on_bd = %d\n",
+                         fp_cqe->pkt_len, len_on_bd);
+               bnx2x_panic();
+               return -EINVAL;
+       }
+#endif
+
+       /* Run through the SGL and compose the fragmented skb */
+       for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
+               u16 sge_idx = RX_SGE(le16_to_cpu(fp_cqe->sgl[j]));
+
+               /* FW gives the indices of the SGE as if the ring is an array
+                  (meaning that "next" element will consume 2 indices) */
+               frag_len = min(frag_size, (u32)(BCM_PAGE_SIZE*PAGES_PER_SGE));
+               rx_pg = &fp->rx_page_ring[sge_idx];
+               sge = rx_pg->page;
+               old_rx_pg = *rx_pg;
+
+               /* If we fail to allocate a substitute page, we simply stop
+                  where we are and drop the whole packet */
+               err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
+               if (unlikely(err)) {
+                       fp->rx_alloc_failed++;
+                       return err;
+               }
+
+               /* Unmap the page as we r going to pass it to the stack */
+               pci_unmap_page(bp->pdev, pci_unmap_addr(&old_rx_pg, mapping),
+                             BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE);
+
+               /* Add one frag and update the appropriate fields in the skb */
+               skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len);
+
+               skb->data_len += frag_len;
+               skb->truesize += frag_len;
+               skb->len += frag_len;
+
+               frag_size -= frag_len;
+       }
+
+       return 0;
+}
+
+static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+                          u16 queue, int pad, int len, union eth_rx_cqe *cqe,
+                          u16 cqe_idx)
+{
+       struct sw_rx_bd *rx_buf = &fp->tpa_pool[queue];
+       struct sk_buff *skb = rx_buf->skb;
+       /* alloc new skb */
+       struct sk_buff *new_skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size);
+
+       /* Unmap skb in the pool anyway, as we are going to change
+          pool entry status to BNX2X_TPA_STOP even if new skb allocation
+          fails. */
+       pci_unmap_single(bp->pdev, pci_unmap_addr(rx_buf, mapping),
+                        bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
+
+       /* if alloc failed drop the packet and keep the buffer in the bin */
+       if (likely(new_skb)) {
+
+               prefetch(skb);
+               prefetch(((char *)(skb)) + 128);
+
+               /* else fix ip xsum and give it to the stack */
+               /* (no need to map the new skb) */
+#ifdef BNX2X_STOP_ON_ERROR
+               if (pad + len > bp->rx_buf_size) {
+                       BNX2X_ERR("skb_put is about to fail...  "
+                                 "pad %d  len %d  rx_buf_size %d\n",
+                                 pad, len, bp->rx_buf_size);
+                       bnx2x_panic();
+                       return;
+               }
+#endif
+
+               skb_reserve(skb, pad);
+               skb_put(skb, len);
+
+               skb->protocol = eth_type_trans(skb, bp->dev);
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+               {
+                       struct iphdr *iph;
+
+                       iph = (struct iphdr *)skb->data;
+                       iph->check = 0;
+                       iph->check = ip_fast_csum((u8 *)iph, iph->ihl);
+               }
+
+               if (!bnx2x_fill_frag_skb(bp, fp, skb,
+                                        &cqe->fast_path_cqe, cqe_idx)) {
+#ifdef BCM_VLAN
+                       if ((bp->vlgrp != NULL) &&
+                           (le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) &
+                            PARSING_FLAGS_VLAN))
+                               vlan_hwaccel_receive_skb(skb, bp->vlgrp,
+                                               le16_to_cpu(cqe->fast_path_cqe.
+                                                           vlan_tag));
+                       else
+#endif
+                               netif_receive_skb(skb);
+               } else {
+                       DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages"
+                          " - dropping packet!\n");
+                       dev_kfree_skb(skb);
+               }
+
+               bp->dev->last_rx = jiffies;
+
+               /* put new skb in bin */
+               fp->tpa_pool[queue].skb = new_skb;
+
+       } else {
+               DP(NETIF_MSG_RX_STATUS,
+                  "Failed to allocate new skb - dropping packet!\n");
+               fp->rx_alloc_failed++;
+       }
+
+       fp->tpa_state[queue] = BNX2X_TPA_STOP;
+}
+
+static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
+                                       struct bnx2x_fastpath *fp,
+                                       u16 bd_prod, u16 rx_comp_prod,
+                                       u16 rx_sge_prod)
+{
+       struct tstorm_eth_rx_producers rx_prods = {0};
+       int i;
+
+       /* Update producers */
+       rx_prods.bd_prod = bd_prod;
+       rx_prods.cqe_prod = rx_comp_prod;
+       rx_prods.sge_prod = rx_sge_prod;
+
+       for (i = 0; i < sizeof(struct tstorm_eth_rx_producers)/4; i++)
+               REG_WR(bp, BAR_TSTRORM_INTMEM +
+                      TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)) + i*4,
+                      ((u32 *)&rx_prods)[i]);
+
+       DP(NETIF_MSG_RX_STATUS,
+          "Wrote: bd_prod %u  cqe_prod %u  sge_prod %u\n",
+          bd_prod, rx_comp_prod, rx_sge_prod);
+}
+
 static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 {
        struct bnx2x *bp = fp->bp;
        u16 bd_cons, bd_prod, bd_prod_fw, comp_ring_cons;
        u16 hw_comp_cons, sw_comp_cons, sw_comp_prod;
        int rx_pkt = 0;
+       u16 queue;
 
 #ifdef BNX2X_STOP_ON_ERROR
        if (unlikely(bp->panic))
@@ -1081,6 +1451,49 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
                        len = le16_to_cpu(cqe->fast_path_cqe.pkt_len);
                        pad = cqe->fast_path_cqe.placement_offset;
 
+                       /* If CQE is marked both TPA_START and TPA_END
+                          it is a non-TPA CQE */
+                       if ((!fp->disable_tpa) &&
+                           (TPA_TYPE(cqe_fp_flags) !=
+                                       (TPA_TYPE_START | TPA_TYPE_END))) {
+                               queue = cqe->fast_path_cqe.queue_index;
+
+                               if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_START) {
+                                       DP(NETIF_MSG_RX_STATUS,
+                                          "calling tpa_start on queue %d\n",
+                                          queue);
+
+                                       bnx2x_tpa_start(fp, queue, skb,
+                                                       bd_cons, bd_prod);
+                                       goto next_rx;
+                               }
+
+                               if (TPA_TYPE(cqe_fp_flags) == TPA_TYPE_END) {
+                                       DP(NETIF_MSG_RX_STATUS,
+                                          "calling tpa_stop on queue %d\n",
+                                          queue);
+
+                                       if (!BNX2X_RX_SUM_FIX(cqe))
+                                               BNX2X_ERR("STOP on none TCP "
+                                                         "data\n");
+
+                                       /* This is a size of the linear data
+                                          on this skb */
+                                       len = le16_to_cpu(cqe->fast_path_cqe.
+                                                               len_on_bd);
+                                       bnx2x_tpa_stop(bp, fp, queue, pad,
+                                                   len, cqe, comp_ring_cons);
+#ifdef BNX2X_STOP_ON_ERROR
+                                       if (bp->panic)
+                                               return -EINVAL;
+#endif
+
+                                       bnx2x_update_sge_prod(fp,
+                                                       &cqe->fast_path_cqe);
+                                       goto next_cqe;
+                               }
+                       }
+
                        pci_dma_sync_single_for_device(bp->pdev,
                                        pci_unmap_addr(rx_buf, mapping),
                                                       pad + RX_COPY_THRESH,
@@ -1111,7 +1524,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
                                        DP(NETIF_MSG_RX_ERR,
                                           "ERROR  packet dropped "
                                           "because of alloc failure\n");
-                                       /* TBD count this as a drop? */
+                                       fp->rx_alloc_failed++;
                                        goto reuse_rx;
                                }
 
@@ -1137,6 +1550,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
                                DP(NETIF_MSG_RX_ERR,
                                   "ERROR  packet dropped because "
                                   "of alloc failure\n");
+                               fp->rx_alloc_failed++;
 reuse_rx:
                                bnx2x_reuse_rx_skb(fp, skb, bd_cons, bd_prod);
                                goto next_rx;
@@ -1183,11 +1597,9 @@ next_cqe:
        fp->rx_comp_cons = sw_comp_cons;
        fp->rx_comp_prod = sw_comp_prod;
 
-       REG_WR(bp, BAR_TSTRORM_INTMEM +
-               TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)),
-               sw_comp_prod);
-
-
+       /* Update producers */
+       bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod,
+                            fp->rx_sge_prod);
        mmiowb(); /* keep prod updates ordered */
 
        fp->rx_pkt += rx_pkt;
@@ -1279,6 +1691,7 @@ static irqreturn_t bnx2x_interrupt(int irq, void *dev_instance)
 
 /* end of fast path */
 
+static void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event);
 
 /* Link */
 
@@ -1787,10 +2200,28 @@ static void bnx2x_link_attn(struct bnx2x *bp)
 {
        int vn;
 
+       /* Make sure that we are synced with the current statistics */
+       bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+
        bnx2x_phy_hw_lock(bp);
        bnx2x_link_update(&bp->link_params, &bp->link_vars);
        bnx2x_phy_hw_unlock(bp);
 
+       if (bp->link_vars.link_up) {
+
+               if (bp->link_vars.mac_type == MAC_TYPE_BMAC) {
+                       struct host_port_stats *pstats;
+
+                       pstats = bnx2x_sp(bp, port_stats);
+                       /* reset old bmac stats */
+                       memset(&(pstats->mac_stx[0]), 0,
+                              sizeof(struct mac_stx));
+               }
+               if ((bp->state == BNX2X_STATE_OPEN) ||
+                   (bp->state == BNX2X_STATE_DISABLED))
+                       bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
+       }
+
        /* indicate link status */
        bnx2x_link_report(bp);
 
@@ -1835,6 +2266,11 @@ static void bnx2x__link_status_update(struct bnx2x *bp)
 
        bnx2x_link_status_update(&bp->link_params, &bp->link_vars);
 
+       if (bp->link_vars.link_up)
+               bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
+       else
+               bnx2x_stats_handle(bp, STATS_EVENT_STOP);
+
        /* indicate link status */
        bnx2x_link_report(bp);
 }
@@ -1851,6 +2287,8 @@ static void bnx2x_pmf_update(struct bnx2x *bp)
        val = (0xff0f | (1 << (BP_E1HVN(bp) + 4)));
        REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
        REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
+
+       bnx2x_stats_handle(bp, STATS_EVENT_PMF);
 }
 
 /* end of Link */
@@ -2376,6 +2814,10 @@ static void bnx2x_sp_task(struct work_struct *work)
        if (status & 0x1)
                bnx2x_attn_int(bp);
 
+       /* CStorm events: query_stats, port delete ramrod */
+       if (status & 0x2)
+               bp->stats_pending = 0;
+
        bnx2x_ack_sb(bp, DEF_SB_ID, ATTENTION_ID, bp->def_att_idx,
                     IGU_INT_NOP, 1);
        bnx2x_ack_sb(bp, DEF_SB_ID, USTORM_ID, le16_to_cpu(bp->def_u_idx),
@@ -2420,12 +2862,6 @@ static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance)
 * Macros
 ****************************************************************************/
 
-#define UPDATE_STAT(s, t) \
-       do { \
-               estats->t += new->s - old->s; \
-               old->s = new->s; \
-       } while (0)
-
 /* sum[hi:lo] += add[hi:lo] */
 #define ADD_64(s_hi, a_hi, s_lo, a_lo) \
        do { \
@@ -2436,40 +2872,47 @@ static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance)
 /* difference = minuend - subtrahend */
 #define DIFF_64(d_hi, m_hi, s_hi, d_lo, m_lo, s_lo) \
        do { \
-               if (m_lo < s_lo) {      /* underflow */ \
+               if (m_lo < s_lo) { \
+                       /* underflow */ \
                        d_hi = m_hi - s_hi; \
-                       if (d_hi > 0) { /* we can 'loan' 1 */ \
+                       if (d_hi > 0) { \
+                       /* we can 'loan' 1 */ \
                                d_hi--; \
                                d_lo = m_lo + (UINT_MAX - s_lo) + 1; \
-                       } else {        /* m_hi <= s_hi */ \
+                       } else { \
+                       /* m_hi <= s_hi */ \
                                d_hi = 0; \
                                d_lo = 0; \
                        } \
-               } else {                /* m_lo >= s_lo */ \
+               } else { \
+                       /* m_lo >= s_lo */ \
                        if (m_hi < s_hi) { \
-                           d_hi = 0; \
-                           d_lo = 0; \
-                       } else {        /* m_hi >= s_hi */ \
-                           d_hi = m_hi - s_hi; \
-                           d_lo = m_lo - s_lo; \
+                               d_hi = 0; \
+                               d_lo = 0; \
+                       } else { \
+                       /* m_hi >= s_hi */ \
+                               d_hi = m_hi - s_hi; \
+                               d_lo = m_lo - s_lo; \
                        } \
                } \
        } while (0)
 
-/* minuend -= subtrahend */
-#define SUB_64(m_hi, s_hi, m_lo, s_lo) \
+#define UPDATE_STAT64(s, t) \
        do { \
-               DIFF_64(m_hi, m_hi, s_hi, m_lo, m_lo, s_lo); \
+               DIFF_64(diff.hi, new->s##_hi, pstats->mac_stx[0].t##_hi, \
+                       diff.lo, new->s##_lo, pstats->mac_stx[0].t##_lo); \
+               pstats->mac_stx[0].t##_hi = new->s##_hi; \
+               pstats->mac_stx[0].t##_lo = new->s##_lo; \
+               ADD_64(pstats->mac_stx[1].t##_hi, diff.hi, \
+                      pstats->mac_stx[1].t##_lo, diff.lo); \
        } while (0)
 
-#define UPDATE_STAT64(s_hi, t_hi, s_lo, t_lo) \
+#define UPDATE_STAT64_NIG(s, t) \
        do { \
-               DIFF_64(diff.hi, new->s_hi, old->s_hi, \
-                       diff.lo, new->s_lo, old->s_lo); \
-               old->s_hi = new->s_hi; \
-               old->s_lo = new->s_lo; \
-               ADD_64(estats->t_hi, diff.hi, \
-                      estats->t_lo, diff.lo); \
+               DIFF_64(diff.hi, new->s##_hi, old->s##_hi, \
+                       diff.lo, new->s##_lo, old->s##_lo); \
+               ADD_64(estats->t##_hi, diff.hi, \
+                      estats->t##_lo, diff.lo); \
        } while (0)
 
 /* sum[hi:lo] += add */
@@ -2479,16 +2922,25 @@ static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance)
                s_hi += (s_lo < a) ? 1 : 0; \
        } while (0)
 
-#define UPDATE_EXTEND_STAT(s, t_hi, t_lo) \
+#define UPDATE_EXTEND_STAT(s) \
        do { \
-               ADD_EXTEND_64(estats->t_hi, estats->t_lo, new->s); \
+               ADD_EXTEND_64(pstats->mac_stx[1].s##_hi, \
+                             pstats->mac_stx[1].s##_lo, \
+                             new->s); \
        } while (0)
 
-#define UPDATE_EXTEND_TSTAT(s, t_hi, t_lo) \
+#define UPDATE_EXTEND_TSTAT(s, t) \
        do { \
                diff = le32_to_cpu(tclient->s) - old_tclient->s; \
                old_tclient->s = le32_to_cpu(tclient->s); \
-               ADD_EXTEND_64(estats->t_hi, estats->t_lo, diff); \
+               ADD_EXTEND_64(fstats->t##_hi, fstats->t##_lo, diff); \
+       } while (0)
+
+#define UPDATE_EXTEND_XSTAT(s, t) \
+       do { \
+               diff = le32_to_cpu(xclient->s) - old_xclient->s; \
+               old_xclient->s = le32_to_cpu(xclient->s); \
+               ADD_EXTEND_64(fstats->t##_hi, fstats->t##_lo, diff); \
        } while (0)
 
 /*
@@ -2511,96 +2963,314 @@ static inline long bnx2x_hilo(u32 *hiref)
  * Init service functions
  */
 
-static void bnx2x_init_mac_stats(struct bnx2x *bp)
+static void bnx2x_storm_stats_init(struct bnx2x *bp)
 {
-       struct dmae_command *dmae;
-       int port = BP_PORT(bp);
-       int loader_idx = port * 8;
-       u32 opcode;
-       u32 mac_addr;
+       int func = BP_FUNC(bp);
 
-       bp->executer_idx = 0;
-       if (bp->func_stx) {
-               /* MCP */
-               opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
-                         DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
-#ifdef __BIG_ENDIAN
-                         DMAE_CMD_ENDIANITY_B_DW_SWAP |
-#else
-                         DMAE_CMD_ENDIANITY_DW_SWAP |
-#endif
-                         (port ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0));
+       REG_WR(bp, BAR_XSTRORM_INTMEM + XSTORM_STATS_FLAGS_OFFSET(func), 1);
+       REG_WR(bp, BAR_XSTRORM_INTMEM +
+              XSTORM_STATS_FLAGS_OFFSET(func) + 4, 0);
 
-               if (bp->link_vars.link_up)
-                       opcode |= (DMAE_CMD_C_DST_GRC | DMAE_CMD_C_ENABLE);
+       REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_STATS_FLAGS_OFFSET(func), 1);
+       REG_WR(bp, BAR_TSTRORM_INTMEM +
+              TSTORM_STATS_FLAGS_OFFSET(func) + 4, 0);
 
-               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
-               dmae->opcode = opcode;
-               dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, eth_stats) +
-                                          sizeof(u32));
-               dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, eth_stats) +
-                                          sizeof(u32));
-               dmae->dst_addr_lo = bp->func_stx >> 2;
-               dmae->dst_addr_hi = 0;
-               dmae->len = (offsetof(struct bnx2x_eth_stats, mac_stx_end) -
-                            sizeof(u32)) >> 2;
-               if (bp->link_vars.link_up) {
-                       dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
-                       dmae->comp_addr_hi = 0;
-                       dmae->comp_val = 1;
-               } else {
-                       dmae->comp_addr_lo = 0;
-                       dmae->comp_addr_hi = 0;
-                       dmae->comp_val = 0;
+       REG_WR(bp, BAR_CSTRORM_INTMEM + CSTORM_STATS_FLAGS_OFFSET(func), 0);
+       REG_WR(bp, BAR_CSTRORM_INTMEM +
+              CSTORM_STATS_FLAGS_OFFSET(func) + 4, 0);
+
+       REG_WR(bp, BAR_XSTRORM_INTMEM +
+              XSTORM_ETH_STATS_QUERY_ADDR_OFFSET(func),
+              U64_LO(bnx2x_sp_mapping(bp, fw_stats)));
+       REG_WR(bp, BAR_XSTRORM_INTMEM +
+              XSTORM_ETH_STATS_QUERY_ADDR_OFFSET(func) + 4,
+              U64_HI(bnx2x_sp_mapping(bp, fw_stats)));
+
+       REG_WR(bp, BAR_TSTRORM_INTMEM +
+              TSTORM_ETH_STATS_QUERY_ADDR_OFFSET(func),
+              U64_LO(bnx2x_sp_mapping(bp, fw_stats)));
+       REG_WR(bp, BAR_TSTRORM_INTMEM +
+              TSTORM_ETH_STATS_QUERY_ADDR_OFFSET(func) + 4,
+              U64_HI(bnx2x_sp_mapping(bp, fw_stats)));
+}
+
+static void bnx2x_storm_stats_post(struct bnx2x *bp)
+{
+       if (!bp->stats_pending) {
+               struct eth_query_ramrod_data ramrod_data = {0};
+               int rc;
+
+               ramrod_data.drv_counter = bp->stats_counter++;
+               ramrod_data.collect_port_1b = bp->port.pmf ? 1 : 0;
+               ramrod_data.ctr_id_vector = (1 << BP_CL_ID(bp));
+
+               rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_STAT_QUERY, 0,
+                                  ((u32 *)&ramrod_data)[1],
+                                  ((u32 *)&ramrod_data)[0], 0);
+               if (rc == 0) {
+                       /* stats ramrod has it's own slot on the spq */
+                       bp->spq_left++;
+                       bp->stats_pending = 1;
                }
        }
+}
 
-       if (!bp->link_vars.link_up) {
-               /* no need to collect statistics in link down */
-               return;
-       }
+static void bnx2x_stats_init(struct bnx2x *bp)
+{
+       int port = BP_PORT(bp);
 
-       opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI |
-                 DMAE_CMD_C_DST_GRC | DMAE_CMD_C_ENABLE |
-                 DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
-#ifdef __BIG_ENDIAN
-                 DMAE_CMD_ENDIANITY_B_DW_SWAP |
-#else
-                 DMAE_CMD_ENDIANITY_DW_SWAP |
-#endif
-                 (port ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0));
+       bp->executer_idx = 0;
+       bp->stats_counter = 0;
 
-       if (bp->link_vars.mac_type == MAC_TYPE_BMAC) {
+       /* port stats */
+       if (!BP_NOMCP(bp))
+               bp->port.port_stx = SHMEM_RD(bp, port_mb[port].port_stx);
+       else
+               bp->port.port_stx = 0;
+       DP(BNX2X_MSG_STATS, "port_stx 0x%x\n", bp->port.port_stx);
+
+       memset(&(bp->port.old_nig_stats), 0, sizeof(struct nig_stats));
+       bp->port.old_nig_stats.brb_discard =
+                       REG_RD(bp, NIG_REG_STAT0_BRB_DISCARD + port*0x38);
+       REG_RD_DMAE(bp, NIG_REG_STAT0_EGRESS_MAC_PKT0 + port*0x50,
+                   &(bp->port.old_nig_stats.egress_mac_pkt0_lo), 2);
+       REG_RD_DMAE(bp, NIG_REG_STAT0_EGRESS_MAC_PKT1 + port*0x50,
+                   &(bp->port.old_nig_stats.egress_mac_pkt1_lo), 2);
+
+       /* function stats */
+       memset(&bp->dev->stats, 0, sizeof(struct net_device_stats));
+       memset(&bp->old_tclient, 0, sizeof(struct tstorm_per_client_stats));
+       memset(&bp->old_xclient, 0, sizeof(struct xstorm_per_client_stats));
+       memset(&bp->eth_stats, 0, sizeof(struct bnx2x_eth_stats));
 
-               mac_addr = (port ? NIG_REG_INGRESS_BMAC1_MEM :
-                                  NIG_REG_INGRESS_BMAC0_MEM);
+       bp->stats_state = STATS_STATE_DISABLED;
+       if (IS_E1HMF(bp) && bp->port.pmf && bp->port.port_stx)
+               bnx2x_stats_handle(bp, STATS_EVENT_PMF);
+}
 
-               /* BIGMAC_REGISTER_TX_STAT_GTPKT ..
-                  BIGMAC_REGISTER_TX_STAT_GTBYT */
-               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
-               dmae->opcode = opcode;
-               dmae->src_addr_lo = (mac_addr +
-                                    BIGMAC_REGISTER_TX_STAT_GTPKT) >> 2;
-               dmae->src_addr_hi = 0;
-               dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, mac_stats));
-               dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, mac_stats));
-               dmae->len = (8 + BIGMAC_REGISTER_TX_STAT_GTBYT -
-                            BIGMAC_REGISTER_TX_STAT_GTPKT) >> 2;
-               dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
-               dmae->comp_addr_hi = 0;
-               dmae->comp_val = 1;
+static void bnx2x_hw_stats_post(struct bnx2x *bp)
+{
+       struct dmae_command *dmae = &bp->stats_dmae;
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
 
-               /* BIGMAC_REGISTER_RX_STAT_GR64 ..
-                  BIGMAC_REGISTER_RX_STAT_GRIPJ */
-               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
-               dmae->opcode = opcode;
-               dmae->src_addr_lo = (mac_addr +
+       *stats_comp = DMAE_COMP_VAL;
+
+       /* loader */
+       if (bp->executer_idx) {
+               int loader_idx = PMF_DMAE_C(bp);
+
+               memset(dmae, 0, sizeof(struct dmae_command));
+
+               dmae->opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
+                               DMAE_CMD_C_DST_GRC | DMAE_CMD_C_ENABLE |
+                               DMAE_CMD_DST_RESET |
+#ifdef __BIG_ENDIAN
+                               DMAE_CMD_ENDIANITY_B_DW_SWAP |
+#else
+                               DMAE_CMD_ENDIANITY_DW_SWAP |
+#endif
+                               (BP_PORT(bp) ? DMAE_CMD_PORT_1 :
+                                              DMAE_CMD_PORT_0) |
+                               (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
+               dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, dmae[0]));
+               dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, dmae[0]));
+               dmae->dst_addr_lo = (DMAE_REG_CMD_MEM +
+                                    sizeof(struct dmae_command) *
+                                    (loader_idx + 1)) >> 2;
+               dmae->dst_addr_hi = 0;
+               dmae->len = sizeof(struct dmae_command) >> 2;
+               if (CHIP_IS_E1(bp))
+                       dmae->len--;
+               dmae->comp_addr_lo = dmae_reg_go_c[loader_idx + 1] >> 2;
+               dmae->comp_addr_hi = 0;
+               dmae->comp_val = 1;
+
+               *stats_comp = 0;
+               bnx2x_post_dmae(bp, dmae, loader_idx);
+
+       } else if (bp->func_stx) {
+               *stats_comp = 0;
+               bnx2x_post_dmae(bp, dmae, INIT_DMAE_C(bp));
+       }
+}
+
+static int bnx2x_stats_comp(struct bnx2x *bp)
+{
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
+       int cnt = 10;
+
+       might_sleep();
+       while (*stats_comp != DMAE_COMP_VAL) {
+               msleep(1);
+               if (!cnt) {
+                       BNX2X_ERR("timeout waiting for stats finished\n");
+                       break;
+               }
+               cnt--;
+       }
+       return 1;
+}
+
+/*
+ * Statistics service functions
+ */
+
+static void bnx2x_stats_pmf_update(struct bnx2x *bp)
+{
+       struct dmae_command *dmae;
+       u32 opcode;
+       int loader_idx = PMF_DMAE_C(bp);
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
+
+       /* sanity */
+       if (!IS_E1HMF(bp) || !bp->port.pmf || !bp->port.port_stx) {
+               BNX2X_ERR("BUG!\n");
+               return;
+       }
+
+       bp->executer_idx = 0;
+
+       opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI |
+                 DMAE_CMD_C_ENABLE |
+                 DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
+#ifdef __BIG_ENDIAN
+                 DMAE_CMD_ENDIANITY_B_DW_SWAP |
+#else
+                 DMAE_CMD_ENDIANITY_DW_SWAP |
+#endif
+                 (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                 (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
+
+       dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+       dmae->opcode = (opcode | DMAE_CMD_C_DST_GRC);
+       dmae->src_addr_lo = bp->port.port_stx >> 2;
+       dmae->src_addr_hi = 0;
+       dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats));
+       dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats));
+       dmae->len = DMAE_LEN32_RD_MAX;
+       dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
+       dmae->comp_addr_hi = 0;
+       dmae->comp_val = 1;
+
+       dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+       dmae->opcode = (opcode | DMAE_CMD_C_DST_PCI);
+       dmae->src_addr_lo = (bp->port.port_stx >> 2) + DMAE_LEN32_RD_MAX;
+       dmae->src_addr_hi = 0;
+       dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats) +
+                                  DMAE_LEN32_RD_MAX * 4);
+       dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats) +
+                                  DMAE_LEN32_RD_MAX * 4);
+       dmae->len = (sizeof(struct host_port_stats) >> 2) - DMAE_LEN32_RD_MAX;
+       dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_val = DMAE_COMP_VAL;
+
+       *stats_comp = 0;
+       bnx2x_hw_stats_post(bp);
+       bnx2x_stats_comp(bp);
+}
+
+static void bnx2x_port_stats_init(struct bnx2x *bp)
+{
+       struct dmae_command *dmae;
+       int port = BP_PORT(bp);
+       int vn = BP_E1HVN(bp);
+       u32 opcode;
+       int loader_idx = PMF_DMAE_C(bp);
+       u32 mac_addr;
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
+
+       /* sanity */
+       if (!bp->link_vars.link_up || !bp->port.pmf) {
+               BNX2X_ERR("BUG!\n");
+               return;
+       }
+
+       bp->executer_idx = 0;
+
+       /* MCP */
+       opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
+                 DMAE_CMD_C_DST_GRC | DMAE_CMD_C_ENABLE |
+                 DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
+#ifdef __BIG_ENDIAN
+                 DMAE_CMD_ENDIANITY_B_DW_SWAP |
+#else
+                 DMAE_CMD_ENDIANITY_DW_SWAP |
+#endif
+                 (port ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                 (vn << DMAE_CMD_E1HVN_SHIFT));
+
+       if (bp->port.port_stx) {
+
+               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+               dmae->opcode = opcode;
+               dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats));
+               dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats));
+               dmae->dst_addr_lo = bp->port.port_stx >> 2;
+               dmae->dst_addr_hi = 0;
+               dmae->len = sizeof(struct host_port_stats) >> 2;
+               dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
+               dmae->comp_addr_hi = 0;
+               dmae->comp_val = 1;
+       }
+
+       if (bp->func_stx) {
+
+               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+               dmae->opcode = opcode;
+               dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, func_stats));
+               dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, func_stats));
+               dmae->dst_addr_lo = bp->func_stx >> 2;
+               dmae->dst_addr_hi = 0;
+               dmae->len = sizeof(struct host_func_stats) >> 2;
+               dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
+               dmae->comp_addr_hi = 0;
+               dmae->comp_val = 1;
+       }
+
+       /* MAC */
+       opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI |
+                 DMAE_CMD_C_DST_GRC | DMAE_CMD_C_ENABLE |
+                 DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
+#ifdef __BIG_ENDIAN
+                 DMAE_CMD_ENDIANITY_B_DW_SWAP |
+#else
+                 DMAE_CMD_ENDIANITY_DW_SWAP |
+#endif
+                 (port ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                 (vn << DMAE_CMD_E1HVN_SHIFT));
+
+       if (bp->link_vars.mac_type == MAC_TYPE_BMAC) {
+
+               mac_addr = (port ? NIG_REG_INGRESS_BMAC1_MEM :
+                                  NIG_REG_INGRESS_BMAC0_MEM);
+
+               /* BIGMAC_REGISTER_TX_STAT_GTPKT ..
+                  BIGMAC_REGISTER_TX_STAT_GTBYT */
+               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+               dmae->opcode = opcode;
+               dmae->src_addr_lo = (mac_addr +
+                                    BIGMAC_REGISTER_TX_STAT_GTPKT) >> 2;
+               dmae->src_addr_hi = 0;
+               dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, mac_stats));
+               dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, mac_stats));
+               dmae->len = (8 + BIGMAC_REGISTER_TX_STAT_GTBYT -
+                            BIGMAC_REGISTER_TX_STAT_GTPKT) >> 2;
+               dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
+               dmae->comp_addr_hi = 0;
+               dmae->comp_val = 1;
+
+               /* BIGMAC_REGISTER_RX_STAT_GR64 ..
+                  BIGMAC_REGISTER_RX_STAT_GRIPJ */
+               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+               dmae->opcode = opcode;
+               dmae->src_addr_lo = (mac_addr +
                                     BIGMAC_REGISTER_RX_STAT_GR64) >> 2;
                dmae->src_addr_hi = 0;
                dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, mac_stats) +
-                                       offsetof(struct bmac_stats, rx_gr64));
+                               offsetof(struct bmac_stats, rx_stat_gr64_lo));
                dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, mac_stats) +
-                                       offsetof(struct bmac_stats, rx_gr64));
+                               offsetof(struct bmac_stats, rx_stat_gr64_lo));
                dmae->len = (8 + BIGMAC_REGISTER_RX_STAT_GRIPJ -
                             BIGMAC_REGISTER_RX_STAT_GR64) >> 2;
                dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
@@ -2631,11 +3301,9 @@ static void bnx2x_init_mac_stats(struct bnx2x *bp)
                                     EMAC_REG_EMAC_RX_STAT_AC_28) >> 2;
                dmae->src_addr_hi = 0;
                dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, mac_stats) +
-                                          offsetof(struct emac_stats,
-                                                   rx_falsecarriererrors));
+                    offsetof(struct emac_stats, rx_stat_falsecarriererrors));
                dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, mac_stats) +
-                                          offsetof(struct emac_stats,
-                                                   rx_falsecarriererrors));
+                    offsetof(struct emac_stats, rx_stat_falsecarriererrors));
                dmae->len = 1;
                dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
                dmae->comp_addr_hi = 0;
@@ -2648,11 +3316,9 @@ static void bnx2x_init_mac_stats(struct bnx2x *bp)
                                     EMAC_REG_EMAC_TX_STAT_AC) >> 2;
                dmae->src_addr_hi = 0;
                dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, mac_stats) +
-                                          offsetof(struct emac_stats,
-                                                   tx_ifhcoutoctets));
+                       offsetof(struct emac_stats, tx_stat_ifhcoutoctets));
                dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, mac_stats) +
-                                          offsetof(struct emac_stats,
-                                                   tx_ifhcoutoctets));
+                       offsetof(struct emac_stats, tx_stat_ifhcoutoctets));
                dmae->len = EMAC_REG_EMAC_TX_STAT_AC_COUNT;
                dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
                dmae->comp_addr_hi = 0;
@@ -2660,6 +3326,32 @@ static void bnx2x_init_mac_stats(struct bnx2x *bp)
        }
 
        /* NIG */
+       dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+       dmae->opcode = opcode;
+       dmae->src_addr_lo = (port ? NIG_REG_STAT1_BRB_DISCARD :
+                                   NIG_REG_STAT0_BRB_DISCARD) >> 2;
+       dmae->src_addr_hi = 0;
+       dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, nig_stats));
+       dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, nig_stats));
+       dmae->len = (sizeof(struct nig_stats) - 4*sizeof(u32)) >> 2;
+       dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
+       dmae->comp_addr_hi = 0;
+       dmae->comp_val = 1;
+
+       dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+       dmae->opcode = opcode;
+       dmae->src_addr_lo = (port ? NIG_REG_STAT1_EGRESS_MAC_PKT0 :
+                                   NIG_REG_STAT0_EGRESS_MAC_PKT0) >> 2;
+       dmae->src_addr_hi = 0;
+       dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, nig_stats) +
+                       offsetof(struct nig_stats, egress_mac_pkt0_lo));
+       dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, nig_stats) +
+                       offsetof(struct nig_stats, egress_mac_pkt0_lo));
+       dmae->len = (2*sizeof(u32)) >> 2;
+       dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
+       dmae->comp_addr_hi = 0;
+       dmae->comp_val = 1;
+
        dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
        dmae->opcode = (DMAE_CMD_SRC_GRC | DMAE_CMD_DST_PCI |
                        DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE |
@@ -2669,325 +3361,322 @@ static void bnx2x_init_mac_stats(struct bnx2x *bp)
 #else
                        DMAE_CMD_ENDIANITY_DW_SWAP |
 #endif
-                       (port ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0));
-       dmae->src_addr_lo = (port ? NIG_REG_STAT1_BRB_DISCARD :
-                                   NIG_REG_STAT0_BRB_DISCARD) >> 2;
+                       (port ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                       (vn << DMAE_CMD_E1HVN_SHIFT));
+       dmae->src_addr_lo = (port ? NIG_REG_STAT1_EGRESS_MAC_PKT1 :
+                                   NIG_REG_STAT0_EGRESS_MAC_PKT1) >> 2;
        dmae->src_addr_hi = 0;
-       dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, nig));
-       dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, nig));
-       dmae->len = (sizeof(struct nig_stats) - 2*sizeof(u32)) >> 2;
-       dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, nig) +
-                                   offsetof(struct nig_stats, done));
-       dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, nig) +
-                                   offsetof(struct nig_stats, done));
-       dmae->comp_val = 0xffffffff;
+       dmae->dst_addr_lo = U64_LO(bnx2x_sp_mapping(bp, nig_stats) +
+                       offsetof(struct nig_stats, egress_mac_pkt1_lo));
+       dmae->dst_addr_hi = U64_HI(bnx2x_sp_mapping(bp, nig_stats) +
+                       offsetof(struct nig_stats, egress_mac_pkt1_lo));
+       dmae->len = (2*sizeof(u32)) >> 2;
+       dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_val = DMAE_COMP_VAL;
+
+       *stats_comp = 0;
 }
 
-static void bnx2x_init_stats(struct bnx2x *bp)
+static void bnx2x_func_stats_init(struct bnx2x *bp)
 {
-       int port = BP_PORT(bp);
+       struct dmae_command *dmae = &bp->stats_dmae;
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
+
+       /* sanity */
+       if (!bp->func_stx) {
+               BNX2X_ERR("BUG!\n");
+               return;
+       }
 
-       bp->stats_state = STATS_STATE_DISABLE;
        bp->executer_idx = 0;
+       memset(dmae, 0, sizeof(struct dmae_command));
 
-       bp->old_brb_discard = REG_RD(bp,
-                                    NIG_REG_STAT0_BRB_DISCARD + port*0x38);
+       dmae->opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
+                       DMAE_CMD_C_DST_PCI | DMAE_CMD_C_ENABLE |
+                       DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
+#ifdef __BIG_ENDIAN
+                       DMAE_CMD_ENDIANITY_B_DW_SWAP |
+#else
+                       DMAE_CMD_ENDIANITY_DW_SWAP |
+#endif
+                       (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                       (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
+       dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, func_stats));
+       dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, func_stats));
+       dmae->dst_addr_lo = bp->func_stx >> 2;
+       dmae->dst_addr_hi = 0;
+       dmae->len = sizeof(struct host_func_stats) >> 2;
+       dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp));
+       dmae->comp_val = DMAE_COMP_VAL;
 
-       memset(&bp->old_bmac, 0, sizeof(struct bmac_stats));
-       memset(&bp->old_tclient, 0, sizeof(struct tstorm_per_client_stats));
-       memset(&bp->dev->stats, 0, sizeof(struct net_device_stats));
+       *stats_comp = 0;
+}
 
-       REG_WR(bp, BAR_XSTRORM_INTMEM + XSTORM_STATS_FLAGS_OFFSET(port), 1);
-       REG_WR(bp, BAR_XSTRORM_INTMEM +
-              XSTORM_STATS_FLAGS_OFFSET(port) + 4, 0);
+static void bnx2x_stats_start(struct bnx2x *bp)
+{
+       if (bp->port.pmf)
+               bnx2x_port_stats_init(bp);
+
+       else if (bp->func_stx)
+               bnx2x_func_stats_init(bp);
+
+       bnx2x_hw_stats_post(bp);
+       bnx2x_storm_stats_post(bp);
+}
+
+static void bnx2x_stats_pmf_start(struct bnx2x *bp)
+{
+       bnx2x_stats_comp(bp);
+       bnx2x_stats_pmf_update(bp);
+       bnx2x_stats_start(bp);
+}
+
+static void bnx2x_stats_restart(struct bnx2x *bp)
+{
+       bnx2x_stats_comp(bp);
+       bnx2x_stats_start(bp);
+}
+
+static void bnx2x_bmac_stats_update(struct bnx2x *bp)
+{
+       struct bmac_stats *new = bnx2x_sp(bp, mac_stats.bmac_stats);
+       struct host_port_stats *pstats = bnx2x_sp(bp, port_stats);
+       struct regpair diff;
+
+       UPDATE_STAT64(rx_stat_grerb, rx_stat_ifhcinbadoctets);
+       UPDATE_STAT64(rx_stat_grfcs, rx_stat_dot3statsfcserrors);
+       UPDATE_STAT64(rx_stat_grund, rx_stat_etherstatsundersizepkts);
+       UPDATE_STAT64(rx_stat_grovr, rx_stat_dot3statsframestoolong);
+       UPDATE_STAT64(rx_stat_grfrg, rx_stat_etherstatsfragments);
+       UPDATE_STAT64(rx_stat_grjbr, rx_stat_etherstatsjabbers);
+       UPDATE_STAT64(rx_stat_grxpf, rx_stat_bmac_xpf);
+       UPDATE_STAT64(rx_stat_grxcf, rx_stat_bmac_xcf);
+       UPDATE_STAT64(rx_stat_grxpf, rx_stat_xoffstateentered);
+       UPDATE_STAT64(rx_stat_grxpf, rx_stat_xoffpauseframesreceived);
+       UPDATE_STAT64(tx_stat_gtxpf, tx_stat_outxoffsent);
+       UPDATE_STAT64(tx_stat_gtxpf, tx_stat_flowcontroldone);
+       UPDATE_STAT64(tx_stat_gt64, tx_stat_etherstatspkts64octets);
+       UPDATE_STAT64(tx_stat_gt127,
+                               tx_stat_etherstatspkts65octetsto127octets);
+       UPDATE_STAT64(tx_stat_gt255,
+                               tx_stat_etherstatspkts128octetsto255octets);
+       UPDATE_STAT64(tx_stat_gt511,
+                               tx_stat_etherstatspkts256octetsto511octets);
+       UPDATE_STAT64(tx_stat_gt1023,
+                               tx_stat_etherstatspkts512octetsto1023octets);
+       UPDATE_STAT64(tx_stat_gt1518,
+                               tx_stat_etherstatspkts1024octetsto1522octets);
+       UPDATE_STAT64(tx_stat_gt2047, tx_stat_bmac_2047);
+       UPDATE_STAT64(tx_stat_gt4095, tx_stat_bmac_4095);
+       UPDATE_STAT64(tx_stat_gt9216, tx_stat_bmac_9216);
+       UPDATE_STAT64(tx_stat_gt16383, tx_stat_bmac_16383);
+       UPDATE_STAT64(tx_stat_gterr,
+                               tx_stat_dot3statsinternalmactransmiterrors);
+       UPDATE_STAT64(tx_stat_gtufl, tx_stat_bmac_ufl);
+}
+
+static void bnx2x_emac_stats_update(struct bnx2x *bp)
+{
+       struct emac_stats *new = bnx2x_sp(bp, mac_stats.emac_stats);
+       struct host_port_stats *pstats = bnx2x_sp(bp, port_stats);
+
+       UPDATE_EXTEND_STAT(rx_stat_ifhcinbadoctets);
+       UPDATE_EXTEND_STAT(tx_stat_ifhcoutbadoctets);
+       UPDATE_EXTEND_STAT(rx_stat_dot3statsfcserrors);
+       UPDATE_EXTEND_STAT(rx_stat_dot3statsalignmenterrors);
+       UPDATE_EXTEND_STAT(rx_stat_dot3statscarriersenseerrors);
+       UPDATE_EXTEND_STAT(rx_stat_falsecarriererrors);
+       UPDATE_EXTEND_STAT(rx_stat_etherstatsundersizepkts);
+       UPDATE_EXTEND_STAT(rx_stat_dot3statsframestoolong);
+       UPDATE_EXTEND_STAT(rx_stat_etherstatsfragments);
+       UPDATE_EXTEND_STAT(rx_stat_etherstatsjabbers);
+       UPDATE_EXTEND_STAT(rx_stat_maccontrolframesreceived);
+       UPDATE_EXTEND_STAT(rx_stat_xoffstateentered);
+       UPDATE_EXTEND_STAT(rx_stat_xonpauseframesreceived);
+       UPDATE_EXTEND_STAT(rx_stat_xoffpauseframesreceived);
+       UPDATE_EXTEND_STAT(tx_stat_outxonsent);
+       UPDATE_EXTEND_STAT(tx_stat_outxoffsent);
+       UPDATE_EXTEND_STAT(tx_stat_flowcontroldone);
+       UPDATE_EXTEND_STAT(tx_stat_etherstatscollisions);
+       UPDATE_EXTEND_STAT(tx_stat_dot3statssinglecollisionframes);
+       UPDATE_EXTEND_STAT(tx_stat_dot3statsmultiplecollisionframes);
+       UPDATE_EXTEND_STAT(tx_stat_dot3statsdeferredtransmissions);
+       UPDATE_EXTEND_STAT(tx_stat_dot3statsexcessivecollisions);
+       UPDATE_EXTEND_STAT(tx_stat_dot3statslatecollisions);
+       UPDATE_EXTEND_STAT(tx_stat_etherstatspkts64octets);
+       UPDATE_EXTEND_STAT(tx_stat_etherstatspkts65octetsto127octets);
+       UPDATE_EXTEND_STAT(tx_stat_etherstatspkts128octetsto255octets);
+       UPDATE_EXTEND_STAT(tx_stat_etherstatspkts256octetsto511octets);
+       UPDATE_EXTEND_STAT(tx_stat_etherstatspkts512octetsto1023octets);
+       UPDATE_EXTEND_STAT(tx_stat_etherstatspkts1024octetsto1522octets);
+       UPDATE_EXTEND_STAT(tx_stat_etherstatspktsover1522octets);
+       UPDATE_EXTEND_STAT(tx_stat_dot3statsinternalmactransmiterrors);
+}
+
+static int bnx2x_hw_stats_update(struct bnx2x *bp)
+{
+       struct nig_stats *new = bnx2x_sp(bp, nig_stats);
+       struct nig_stats *old = &(bp->port.old_nig_stats);
+       struct host_port_stats *pstats = bnx2x_sp(bp, port_stats);
+       struct bnx2x_eth_stats *estats = &bp->eth_stats;
+       struct regpair diff;
+
+       if (bp->link_vars.mac_type == MAC_TYPE_BMAC)
+               bnx2x_bmac_stats_update(bp);
+
+       else if (bp->link_vars.mac_type == MAC_TYPE_EMAC)
+               bnx2x_emac_stats_update(bp);
+
+       else { /* unreached */
+               BNX2X_ERR("stats updated by dmae but no MAC active\n");
+               return -1;
+       }
 
-       REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_STATS_FLAGS_OFFSET(port), 1);
-       REG_WR(bp, BAR_TSTRORM_INTMEM +
-              TSTORM_STATS_FLAGS_OFFSET(port) + 4, 0);
+       ADD_EXTEND_64(pstats->brb_drop_hi, pstats->brb_drop_lo,
+                     new->brb_discard - old->brb_discard);
 
-       REG_WR(bp, BAR_CSTRORM_INTMEM + CSTORM_STATS_FLAGS_OFFSET(port), 0);
-       REG_WR(bp, BAR_CSTRORM_INTMEM +
-              CSTORM_STATS_FLAGS_OFFSET(port) + 4, 0);
+       UPDATE_STAT64_NIG(egress_mac_pkt0,
+                                       etherstatspkts1024octetsto1522octets);
+       UPDATE_STAT64_NIG(egress_mac_pkt1, etherstatspktsover1522octets);
 
-       REG_WR(bp, BAR_XSTRORM_INTMEM +
-              XSTORM_ETH_STATS_QUERY_ADDR_OFFSET(port),
-              U64_LO(bnx2x_sp_mapping(bp, fw_stats)));
-       REG_WR(bp, BAR_XSTRORM_INTMEM +
-              XSTORM_ETH_STATS_QUERY_ADDR_OFFSET(port) + 4,
-              U64_HI(bnx2x_sp_mapping(bp, fw_stats)));
+       memcpy(old, new, sizeof(struct nig_stats));
 
-       REG_WR(bp, BAR_TSTRORM_INTMEM +
-              TSTORM_ETH_STATS_QUERY_ADDR_OFFSET(port),
-              U64_LO(bnx2x_sp_mapping(bp, fw_stats)));
-       REG_WR(bp, BAR_TSTRORM_INTMEM +
-              TSTORM_ETH_STATS_QUERY_ADDR_OFFSET(port) + 4,
-              U64_HI(bnx2x_sp_mapping(bp, fw_stats)));
-}
+       memcpy(&(estats->rx_stat_ifhcinbadoctets_hi), &(pstats->mac_stx[1]),
+              sizeof(struct mac_stx));
+       estats->brb_drop_hi = pstats->brb_drop_hi;
+       estats->brb_drop_lo = pstats->brb_drop_lo;
 
-static void bnx2x_stop_stats(struct bnx2x *bp)
-{
-       might_sleep();
-       if (bp->stats_state != STATS_STATE_DISABLE) {
-               int timeout = 10;
+       pstats->host_port_stats_start = ++pstats->host_port_stats_end;
 
-               bp->stats_state = STATS_STATE_STOP;
-               DP(BNX2X_MSG_STATS, "stats_state - STOP\n");
-
-               while (bp->stats_state != STATS_STATE_DISABLE) {
-                       if (!timeout) {
-                               BNX2X_ERR("timeout waiting for stats stop\n");
-                               break;
-                       }
-                       timeout--;
-                       msleep(100);
-               }
-       }
-       DP(BNX2X_MSG_STATS, "stats_state - DISABLE\n");
+       return 0;
 }
 
-/*
- * Statistics service functions
- */
-
-static void bnx2x_update_bmac_stats(struct bnx2x *bp)
-{
-       struct regp diff;
-       struct regp sum;
-       struct bmac_stats *new = bnx2x_sp(bp, mac_stats.bmac);
-       struct bmac_stats *old = &bp->old_bmac;
-       struct bnx2x_eth_stats *estats = bnx2x_sp(bp, eth_stats);
-
-       sum.hi = 0;
-       sum.lo = 0;
-
-       UPDATE_STAT64(tx_gtbyt.hi, total_bytes_transmitted_hi,
-                     tx_gtbyt.lo, total_bytes_transmitted_lo);
-
-       UPDATE_STAT64(tx_gtmca.hi, total_multicast_packets_transmitted_hi,
-                     tx_gtmca.lo, total_multicast_packets_transmitted_lo);
-       ADD_64(sum.hi, diff.hi, sum.lo, diff.lo);
-
-       UPDATE_STAT64(tx_gtgca.hi, total_broadcast_packets_transmitted_hi,
-                     tx_gtgca.lo, total_broadcast_packets_transmitted_lo);
-       ADD_64(sum.hi, diff.hi, sum.lo, diff.lo);
-
-       UPDATE_STAT64(tx_gtpkt.hi, total_unicast_packets_transmitted_hi,
-                     tx_gtpkt.lo, total_unicast_packets_transmitted_lo);
-       SUB_64(estats->total_unicast_packets_transmitted_hi, sum.hi,
-              estats->total_unicast_packets_transmitted_lo, sum.lo);
-
-       UPDATE_STAT(tx_gtxpf.lo, pause_xoff_frames_transmitted);
-       UPDATE_STAT(tx_gt64.lo, frames_transmitted_64_bytes);
-       UPDATE_STAT(tx_gt127.lo, frames_transmitted_65_127_bytes);
-       UPDATE_STAT(tx_gt255.lo, frames_transmitted_128_255_bytes);
-       UPDATE_STAT(tx_gt511.lo, frames_transmitted_256_511_bytes);
-       UPDATE_STAT(tx_gt1023.lo, frames_transmitted_512_1023_bytes);
-       UPDATE_STAT(tx_gt1518.lo, frames_transmitted_1024_1522_bytes);
-       UPDATE_STAT(tx_gt2047.lo, frames_transmitted_1523_9022_bytes);
-       UPDATE_STAT(tx_gt4095.lo, frames_transmitted_1523_9022_bytes);
-       UPDATE_STAT(tx_gt9216.lo, frames_transmitted_1523_9022_bytes);
-       UPDATE_STAT(tx_gt16383.lo, frames_transmitted_1523_9022_bytes);
-
-       UPDATE_STAT(rx_grfcs.lo, crc_receive_errors);
-       UPDATE_STAT(rx_grund.lo, runt_packets_received);
-       UPDATE_STAT(rx_grovr.lo, stat_Dot3statsFramesTooLong);
-       UPDATE_STAT(rx_grxpf.lo, pause_xoff_frames_received);
-       UPDATE_STAT(rx_grxcf.lo, control_frames_received);
-       /* UPDATE_STAT(rx_grxpf.lo, control_frames_received); */
-       UPDATE_STAT(rx_grfrg.lo, error_runt_packets_received);
-       UPDATE_STAT(rx_grjbr.lo, error_jabber_packets_received);
-
-       UPDATE_STAT64(rx_grerb.hi, stat_IfHCInBadOctets_hi,
-                     rx_grerb.lo, stat_IfHCInBadOctets_lo);
-       UPDATE_STAT64(tx_gtufl.hi, stat_IfHCOutBadOctets_hi,
-                     tx_gtufl.lo, stat_IfHCOutBadOctets_lo);
-       UPDATE_STAT(tx_gterr.lo, stat_Dot3statsInternalMacTransmitErrors);
-       /* UPDATE_STAT(rx_grxpf.lo, stat_XoffStateEntered); */
-       estats->stat_XoffStateEntered = estats->pause_xoff_frames_received;
-}
-
-static void bnx2x_update_emac_stats(struct bnx2x *bp)
-{
-       struct emac_stats *new = bnx2x_sp(bp, mac_stats.emac);
-       struct bnx2x_eth_stats *estats = bnx2x_sp(bp, eth_stats);
-
-       UPDATE_EXTEND_STAT(tx_ifhcoutoctets, total_bytes_transmitted_hi,
-                                            total_bytes_transmitted_lo);
-       UPDATE_EXTEND_STAT(tx_ifhcoutucastpkts,
-                                       total_unicast_packets_transmitted_hi,
-                                       total_unicast_packets_transmitted_lo);
-       UPDATE_EXTEND_STAT(tx_ifhcoutmulticastpkts,
-                                     total_multicast_packets_transmitted_hi,
-                                     total_multicast_packets_transmitted_lo);
-       UPDATE_EXTEND_STAT(tx_ifhcoutbroadcastpkts,
-                                     total_broadcast_packets_transmitted_hi,
-                                     total_broadcast_packets_transmitted_lo);
-
-       estats->pause_xon_frames_transmitted += new->tx_outxonsent;
-       estats->pause_xoff_frames_transmitted += new->tx_outxoffsent;
-       estats->single_collision_transmit_frames +=
-                               new->tx_dot3statssinglecollisionframes;
-       estats->multiple_collision_transmit_frames +=
-                               new->tx_dot3statsmultiplecollisionframes;
-       estats->late_collision_frames += new->tx_dot3statslatecollisions;
-       estats->excessive_collision_frames +=
-                               new->tx_dot3statsexcessivecollisions;
-       estats->frames_transmitted_64_bytes += new->tx_etherstatspkts64octets;
-       estats->frames_transmitted_65_127_bytes +=
-                               new->tx_etherstatspkts65octetsto127octets;
-       estats->frames_transmitted_128_255_bytes +=
-                               new->tx_etherstatspkts128octetsto255octets;
-       estats->frames_transmitted_256_511_bytes +=
-                               new->tx_etherstatspkts256octetsto511octets;
-       estats->frames_transmitted_512_1023_bytes +=
-                               new->tx_etherstatspkts512octetsto1023octets;
-       estats->frames_transmitted_1024_1522_bytes +=
-                               new->tx_etherstatspkts1024octetsto1522octet;
-       estats->frames_transmitted_1523_9022_bytes +=
-                               new->tx_etherstatspktsover1522octets;
-
-       estats->crc_receive_errors += new->rx_dot3statsfcserrors;
-       estats->alignment_errors += new->rx_dot3statsalignmenterrors;
-       estats->false_carrier_detections += new->rx_falsecarriererrors;
-       estats->runt_packets_received += new->rx_etherstatsundersizepkts;
-       estats->stat_Dot3statsFramesTooLong += new->rx_dot3statsframestoolong;
-       estats->pause_xon_frames_received += new->rx_xonpauseframesreceived;
-       estats->pause_xoff_frames_received += new->rx_xoffpauseframesreceived;
-       estats->control_frames_received += new->rx_maccontrolframesreceived;
-       estats->error_runt_packets_received += new->rx_etherstatsfragments;
-       estats->error_jabber_packets_received += new->rx_etherstatsjabbers;
-
-       UPDATE_EXTEND_STAT(rx_ifhcinbadoctets, stat_IfHCInBadOctets_hi,
-                                              stat_IfHCInBadOctets_lo);
-       UPDATE_EXTEND_STAT(tx_ifhcoutbadoctets, stat_IfHCOutBadOctets_hi,
-                                               stat_IfHCOutBadOctets_lo);
-       estats->stat_Dot3statsInternalMacTransmitErrors +=
-                               new->tx_dot3statsinternalmactransmiterrors;
-       estats->stat_Dot3StatsCarrierSenseErrors +=
-                               new->rx_dot3statscarriersenseerrors;
-       estats->stat_Dot3StatsDeferredTransmissions +=
-                               new->tx_dot3statsdeferredtransmissions;
-       estats->stat_FlowControlDone += new->tx_flowcontroldone;
-       estats->stat_XoffStateEntered += new->rx_xoffstateentered;
-}
-
-static int bnx2x_update_storm_stats(struct bnx2x *bp)
+static int bnx2x_storm_stats_update(struct bnx2x *bp)
 {
        struct eth_stats_query *stats = bnx2x_sp(bp, fw_stats);
-       struct tstorm_common_stats *tstats = &stats->tstorm_common;
+       int cl_id = BP_CL_ID(bp);
+       struct tstorm_per_port_stats *tport =
+                               &stats->tstorm_common.port_statistics;
        struct tstorm_per_client_stats *tclient =
-                                               &tstats->client_statistics[0];
+                       &stats->tstorm_common.client_statistics[cl_id];
        struct tstorm_per_client_stats *old_tclient = &bp->old_tclient;
-       struct xstorm_common_stats *xstats = &stats->xstorm_common;
-       struct nig_stats *nstats = bnx2x_sp(bp, nig);
-       struct bnx2x_eth_stats *estats = bnx2x_sp(bp, eth_stats);
+       struct xstorm_per_client_stats *xclient =
+                       &stats->xstorm_common.client_statistics[cl_id];
+       struct xstorm_per_client_stats *old_xclient = &bp->old_xclient;
+       struct host_func_stats *fstats = bnx2x_sp(bp, func_stats);
+       struct bnx2x_eth_stats *estats = &bp->eth_stats;
        u32 diff;
 
-       /* are DMAE stats valid? */
-       if (nstats->done != 0xffffffff) {
-               DP(BNX2X_MSG_STATS, "stats not updated by dmae\n");
+       /* are storm stats valid? */
+       if ((u16)(le16_to_cpu(tclient->stats_counter) + 1) !=
+                                                       bp->stats_counter) {
+               DP(BNX2X_MSG_STATS, "stats not updated by tstorm"
+                  "  tstorm counter (%d) != stats_counter (%d)\n",
+                  tclient->stats_counter, bp->stats_counter);
                return -1;
        }
-
-       /* are storm stats valid? */
-       if (tstats->done.hi != 0xffffffff) {
-               DP(BNX2X_MSG_STATS, "stats not updated by tstorm\n");
+       if ((u16)(le16_to_cpu(xclient->stats_counter) + 1) !=
+                                                       bp->stats_counter) {
+               DP(BNX2X_MSG_STATS, "stats not updated by xstorm"
+                  "  xstorm counter (%d) != stats_counter (%d)\n",
+                  xclient->stats_counter, bp->stats_counter);
                return -2;
        }
-       if (xstats->done.hi != 0xffffffff) {
-               DP(BNX2X_MSG_STATS, "stats not updated by xstorm\n");
-               return -3;
-       }
 
-       estats->total_bytes_received_hi =
-       estats->valid_bytes_received_hi =
+       fstats->total_bytes_received_hi =
+       fstats->valid_bytes_received_hi =
                                le32_to_cpu(tclient->total_rcv_bytes.hi);
-       estats->total_bytes_received_lo =
-       estats->valid_bytes_received_lo =
+       fstats->total_bytes_received_lo =
+       fstats->valid_bytes_received_lo =
                                le32_to_cpu(tclient->total_rcv_bytes.lo);
-       ADD_64(estats->total_bytes_received_hi,
-              le32_to_cpu(tclient->rcv_error_bytes.hi),
-              estats->total_bytes_received_lo,
-              le32_to_cpu(tclient->rcv_error_bytes.lo));
-
-       UPDATE_EXTEND_TSTAT(rcv_unicast_pkts,
-                                       total_unicast_packets_received_hi,
-                                       total_unicast_packets_received_lo);
+
+       estats->error_bytes_received_hi =
+                               le32_to_cpu(tclient->rcv_error_bytes.hi);
+       estats->error_bytes_received_lo =
+                               le32_to_cpu(tclient->rcv_error_bytes.lo);
+       ADD_64(estats->error_bytes_received_hi,
+              estats->rx_stat_ifhcinbadoctets_hi,
+              estats->error_bytes_received_lo,
+              estats->rx_stat_ifhcinbadoctets_lo);
+
+       ADD_64(fstats->total_bytes_received_hi,
+              estats->error_bytes_received_hi,
+              fstats->total_bytes_received_lo,
+              estats->error_bytes_received_lo);
+
+       UPDATE_EXTEND_TSTAT(rcv_unicast_pkts, total_unicast_packets_received);
        UPDATE_EXTEND_TSTAT(rcv_multicast_pkts,
-                                       total_multicast_packets_received_hi,
-                                       total_multicast_packets_received_lo);
+                               total_multicast_packets_received);
        UPDATE_EXTEND_TSTAT(rcv_broadcast_pkts,
-                                       total_broadcast_packets_received_hi,
-                                       total_broadcast_packets_received_lo);
-
-       estats->frames_received_64_bytes = MAC_STX_NA;
-       estats->frames_received_65_127_bytes = MAC_STX_NA;
-       estats->frames_received_128_255_bytes = MAC_STX_NA;
-       estats->frames_received_256_511_bytes = MAC_STX_NA;
-       estats->frames_received_512_1023_bytes = MAC_STX_NA;
-       estats->frames_received_1024_1522_bytes = MAC_STX_NA;
-       estats->frames_received_1523_9022_bytes = MAC_STX_NA;
-
-       estats->x_total_sent_bytes_hi =
-                               le32_to_cpu(xstats->total_sent_bytes.hi);
-       estats->x_total_sent_bytes_lo =
-                               le32_to_cpu(xstats->total_sent_bytes.lo);
-       estats->x_total_sent_pkts = le32_to_cpu(xstats->total_sent_pkts);
-
-       estats->t_rcv_unicast_bytes_hi =
+                               total_broadcast_packets_received);
+
+       fstats->total_bytes_transmitted_hi =
+                               le32_to_cpu(xclient->total_sent_bytes.hi);
+       fstats->total_bytes_transmitted_lo =
+                               le32_to_cpu(xclient->total_sent_bytes.lo);
+
+       UPDATE_EXTEND_XSTAT(unicast_pkts_sent,
+                               total_unicast_packets_transmitted);
+       UPDATE_EXTEND_XSTAT(multicast_pkts_sent,
+                               total_multicast_packets_transmitted);
+       UPDATE_EXTEND_XSTAT(broadcast_pkts_sent,
+                               total_broadcast_packets_transmitted);
+
+       memcpy(estats, &(fstats->total_bytes_received_hi),
+              sizeof(struct host_func_stats) - 2*sizeof(u32));
+
+       estats->mac_filter_discard = le32_to_cpu(tport->mac_filter_discard);
+       estats->xxoverflow_discard = le32_to_cpu(tport->xxoverflow_discard);
+       estats->brb_truncate_discard =
+                               le32_to_cpu(tport->brb_truncate_discard);
+       estats->mac_discard = le32_to_cpu(tport->mac_discard);
+
+       old_tclient->rcv_unicast_bytes.hi =
                                le32_to_cpu(tclient->rcv_unicast_bytes.hi);
-       estats->t_rcv_unicast_bytes_lo =
+       old_tclient->rcv_unicast_bytes.lo =
                                le32_to_cpu(tclient->rcv_unicast_bytes.lo);
-       estats->t_rcv_broadcast_bytes_hi =
+       old_tclient->rcv_broadcast_bytes.hi =
                                le32_to_cpu(tclient->rcv_broadcast_bytes.hi);
-       estats->t_rcv_broadcast_bytes_lo =
+       old_tclient->rcv_broadcast_bytes.lo =
                                le32_to_cpu(tclient->rcv_broadcast_bytes.lo);
-       estats->t_rcv_multicast_bytes_hi =
+       old_tclient->rcv_multicast_bytes.hi =
                                le32_to_cpu(tclient->rcv_multicast_bytes.hi);
-       estats->t_rcv_multicast_bytes_lo =
+       old_tclient->rcv_multicast_bytes.lo =
                                le32_to_cpu(tclient->rcv_multicast_bytes.lo);
-       estats->t_total_rcv_pkt = le32_to_cpu(tclient->total_rcv_pkts);
+       old_tclient->total_rcv_pkts = le32_to_cpu(tclient->total_rcv_pkts);
 
-       estats->checksum_discard = le32_to_cpu(tclient->checksum_discard);
-       estats->packets_too_big_discard =
+       old_tclient->checksum_discard = le32_to_cpu(tclient->checksum_discard);
+       old_tclient->packets_too_big_discard =
                                le32_to_cpu(tclient->packets_too_big_discard);
-       estats->jabber_packets_received = estats->packets_too_big_discard +
-                                         estats->stat_Dot3statsFramesTooLong;
-       estats->no_buff_discard = le32_to_cpu(tclient->no_buff_discard);
-       estats->ttl0_discard = le32_to_cpu(tclient->ttl0_discard);
-       estats->mac_discard = le32_to_cpu(tclient->mac_discard);
-       estats->mac_filter_discard = le32_to_cpu(tstats->mac_filter_discard);
-       estats->xxoverflow_discard = le32_to_cpu(tstats->xxoverflow_discard);
-       estats->brb_truncate_discard =
-                               le32_to_cpu(tstats->brb_truncate_discard);
-
-       estats->brb_discard += nstats->brb_discard - bp->old_brb_discard;
-       bp->old_brb_discard = nstats->brb_discard;
-
-       estats->brb_packet = nstats->brb_packet;
-       estats->brb_truncate = nstats->brb_truncate;
-       estats->flow_ctrl_discard = nstats->flow_ctrl_discard;
-       estats->flow_ctrl_octets = nstats->flow_ctrl_octets;
-       estats->flow_ctrl_packet = nstats->flow_ctrl_packet;
-       estats->mng_discard = nstats->mng_discard;
-       estats->mng_octet_inp = nstats->mng_octet_inp;
-       estats->mng_octet_out = nstats->mng_octet_out;
-       estats->mng_packet_inp = nstats->mng_packet_inp;
-       estats->mng_packet_out = nstats->mng_packet_out;
-       estats->pbf_octets = nstats->pbf_octets;
-       estats->pbf_packet = nstats->pbf_packet;
-       estats->safc_inp = nstats->safc_inp;
-
-       xstats->done.hi = 0;
-       tstats->done.hi = 0;
-       nstats->done = 0;
+       estats->no_buff_discard =
+       old_tclient->no_buff_discard = le32_to_cpu(tclient->no_buff_discard);
+       old_tclient->ttl0_discard = le32_to_cpu(tclient->ttl0_discard);
+
+       old_xclient->total_sent_pkts = le32_to_cpu(xclient->total_sent_pkts);
+       old_xclient->unicast_bytes_sent.hi =
+                               le32_to_cpu(xclient->unicast_bytes_sent.hi);
+       old_xclient->unicast_bytes_sent.lo =
+                               le32_to_cpu(xclient->unicast_bytes_sent.lo);
+       old_xclient->multicast_bytes_sent.hi =
+                               le32_to_cpu(xclient->multicast_bytes_sent.hi);
+       old_xclient->multicast_bytes_sent.lo =
+                               le32_to_cpu(xclient->multicast_bytes_sent.lo);
+       old_xclient->broadcast_bytes_sent.hi =
+                               le32_to_cpu(xclient->broadcast_bytes_sent.hi);
+       old_xclient->broadcast_bytes_sent.lo =
+                               le32_to_cpu(xclient->broadcast_bytes_sent.lo);
+
+       fstats->host_func_stats_start = ++fstats->host_func_stats_end;
 
        return 0;
 }
 
-static void bnx2x_update_net_stats(struct bnx2x *bp)
+static void bnx2x_net_stats_update(struct bnx2x *bp)
 {
-       struct bnx2x_eth_stats *estats = bnx2x_sp(bp, eth_stats);
+       struct tstorm_per_client_stats *old_tclient = &bp->old_tclient;
+       struct bnx2x_eth_stats *estats = &bp->eth_stats;
        struct net_device_stats *nstats = &bp->dev->stats;
 
        nstats->rx_packets =
@@ -3000,28 +3689,35 @@ static void bnx2x_update_net_stats(struct bnx2x *bp)
                bnx2x_hilo(&estats->total_multicast_packets_transmitted_hi) +
                bnx2x_hilo(&estats->total_broadcast_packets_transmitted_hi);
 
-       nstats->rx_bytes = bnx2x_hilo(&estats->total_bytes_received_hi);
+       nstats->rx_bytes = bnx2x_hilo(&estats->valid_bytes_received_hi);
 
        nstats->tx_bytes = bnx2x_hilo(&estats->total_bytes_transmitted_hi);
 
-       nstats->rx_dropped = estats->checksum_discard + estats->mac_discard;
+       nstats->rx_dropped = old_tclient->checksum_discard +
+                            estats->mac_discard;
        nstats->tx_dropped = 0;
 
        nstats->multicast =
                bnx2x_hilo(&estats->total_multicast_packets_transmitted_hi);
 
-       nstats->collisions = estats->single_collision_transmit_frames +
-                            estats->multiple_collision_transmit_frames +
-                            estats->late_collision_frames +
-                            estats->excessive_collision_frames;
+       nstats->collisions =
+                       estats->tx_stat_dot3statssinglecollisionframes_lo +
+                       estats->tx_stat_dot3statsmultiplecollisionframes_lo +
+                       estats->tx_stat_dot3statslatecollisions_lo +
+                       estats->tx_stat_dot3statsexcessivecollisions_lo;
+
+       estats->jabber_packets_received =
+                               old_tclient->packets_too_big_discard +
+                               estats->rx_stat_dot3statsframestoolong_lo;
 
-       nstats->rx_length_errors = estats->runt_packets_received +
-                                  estats->jabber_packets_received;
-       nstats->rx_over_errors = estats->brb_discard +
+       nstats->rx_length_errors =
+                               estats->rx_stat_etherstatsundersizepkts_lo +
+                               estats->jabber_packets_received;
+       nstats->rx_over_errors = estats->brb_drop_lo +
                                 estats->brb_truncate_discard;
-       nstats->rx_crc_errors = estats->crc_receive_errors;
-       nstats->rx_frame_errors = estats->alignment_errors;
-       nstats->rx_fifo_errors = estats->no_buff_discard;
+       nstats->rx_crc_errors = estats->rx_stat_dot3statsfcserrors_lo;
+       nstats->rx_frame_errors = estats->rx_stat_dot3statsalignmenterrors_lo;
+       nstats->rx_fifo_errors = old_tclient->no_buff_discard;
        nstats->rx_missed_errors = estats->xxoverflow_discard;
 
        nstats->rx_errors = nstats->rx_length_errors +
@@ -3031,39 +3727,48 @@ static void bnx2x_update_net_stats(struct bnx2x *bp)
                            nstats->rx_fifo_errors +
                            nstats->rx_missed_errors;
 
-       nstats->tx_aborted_errors = estats->late_collision_frames +
-                                   estats->excessive_collision_frames;
-       nstats->tx_carrier_errors = estats->false_carrier_detections;
+       nstats->tx_aborted_errors =
+                       estats->tx_stat_dot3statslatecollisions_lo +
+                       estats->tx_stat_dot3statsexcessivecollisions_lo;
+       nstats->tx_carrier_errors = estats->rx_stat_falsecarriererrors_lo;
        nstats->tx_fifo_errors = 0;
        nstats->tx_heartbeat_errors = 0;
        nstats->tx_window_errors = 0;
 
        nstats->tx_errors = nstats->tx_aborted_errors +
                            nstats->tx_carrier_errors;
-
-       estats->mac_stx_start = ++estats->mac_stx_end;
 }
 
-static void bnx2x_update_stats(struct bnx2x *bp)
+static void bnx2x_stats_update(struct bnx2x *bp)
 {
-       if (!bnx2x_update_storm_stats(bp)) {
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
+       int update = 0;
 
-               if (bp->link_vars.mac_type == MAC_TYPE_BMAC) {
-                       bnx2x_update_bmac_stats(bp);
+       if (*stats_comp != DMAE_COMP_VAL)
+               return;
 
-               } else if (bp->link_vars.mac_type == MAC_TYPE_EMAC) {
-                       bnx2x_update_emac_stats(bp);
+       if (bp->port.pmf)
+               update = (bnx2x_hw_stats_update(bp) == 0);
 
-               } else { /* unreached */
-                       BNX2X_ERR("no MAC active\n");
-                       return;
-               }
+       update |= (bnx2x_storm_stats_update(bp) == 0);
+
+       if (update)
+               bnx2x_net_stats_update(bp);
 
-               bnx2x_update_net_stats(bp);
+       else {
+               if (bp->stats_pending) {
+                       bp->stats_pending++;
+                       if (bp->stats_pending == 3) {
+                               BNX2X_ERR("stats not updated for 3 times\n");
+                               bnx2x_panic();
+                               return;
+                       }
+               }
        }
 
        if (bp->msglevel & NETIF_MSG_TIMER) {
-               struct bnx2x_eth_stats *estats = bnx2x_sp(bp, eth_stats);
+               struct tstorm_per_client_stats *old_tclient = &bp->old_tclient;
+               struct bnx2x_eth_stats *estats = &bp->eth_stats;
                struct net_device_stats *nstats = &bp->dev->stats;
                int i;
 
@@ -3071,24 +3776,26 @@ static void bnx2x_update_stats(struct bnx2x *bp)
                printk(KERN_DEBUG "  tx avail (%4x)  tx hc idx (%x)"
                                  "  tx pkt (%lx)\n",
                       bnx2x_tx_avail(bp->fp),
-                      *bp->fp->tx_cons_sb, nstats->tx_packets);
+                      le16_to_cpu(*bp->fp->tx_cons_sb), nstats->tx_packets);
                printk(KERN_DEBUG "  rx usage (%4x)  rx hc idx (%x)"
                                  "  rx pkt (%lx)\n",
-                      (u16)(*bp->fp->rx_cons_sb - bp->fp->rx_comp_cons),
-                      *bp->fp->rx_cons_sb, nstats->rx_packets);
+                      (u16)(le16_to_cpu(*bp->fp->rx_cons_sb) -
+                            bp->fp->rx_comp_cons),
+                      le16_to_cpu(*bp->fp->rx_cons_sb), nstats->rx_packets);
                printk(KERN_DEBUG "  %s (Xoff events %u)  brb drops %u\n",
                       netif_queue_stopped(bp->dev)? "Xoff" : "Xon",
-                      estats->driver_xoff, estats->brb_discard);
+                      estats->driver_xoff, estats->brb_drop_lo);
                printk(KERN_DEBUG "tstats: checksum_discard %u  "
                        "packets_too_big_discard %u  no_buff_discard %u  "
                        "mac_discard %u  mac_filter_discard %u  "
                        "xxovrflow_discard %u  brb_truncate_discard %u  "
                        "ttl0_discard %u\n",
-                      estats->checksum_discard,
-                      estats->packets_too_big_discard,
-                      estats->no_buff_discard, estats->mac_discard,
+                      old_tclient->checksum_discard,
+                      old_tclient->packets_too_big_discard,
+                      old_tclient->no_buff_discard, estats->mac_discard,
                       estats->mac_filter_discard, estats->xxoverflow_discard,
-                      estats->brb_truncate_discard, estats->ttl0_discard);
+                      estats->brb_truncate_discard,
+                      old_tclient->ttl0_discard);
 
                for_each_queue(bp, i) {
                        printk(KERN_DEBUG "[%d]: %lu\t%lu\t%lu\n", i,
@@ -3098,58 +3805,129 @@ static void bnx2x_update_stats(struct bnx2x *bp)
                }
        }
 
-       if (bp->state != BNX2X_STATE_OPEN) {
-               DP(BNX2X_MSG_STATS, "state is %x, returning\n", bp->state);
-               return;
-       }
-
-#ifdef BNX2X_STOP_ON_ERROR
-       if (unlikely(bp->panic))
-               return;
-#endif
+       bnx2x_hw_stats_post(bp);
+       bnx2x_storm_stats_post(bp);
+}
 
-       /* loader */
-       if (bp->executer_idx) {
-               struct dmae_command *dmae = &bp->dmae;
-               int port = BP_PORT(bp);
-               int loader_idx = port * 8;
+static void bnx2x_port_stats_stop(struct bnx2x *bp)
+{
+       struct dmae_command *dmae;
+       u32 opcode;
+       int loader_idx = PMF_DMAE_C(bp);
+       u32 *stats_comp = bnx2x_sp(bp, stats_comp);
 
-               memset(dmae, 0, sizeof(struct dmae_command));
+       bp->executer_idx = 0;
 
-               dmae->opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
-                               DMAE_CMD_C_DST_GRC | DMAE_CMD_C_ENABLE |
-                               DMAE_CMD_DST_RESET |
+       opcode = (DMAE_CMD_SRC_PCI | DMAE_CMD_DST_GRC |
+                 DMAE_CMD_C_ENABLE |
+                 DMAE_CMD_SRC_RESET | DMAE_CMD_DST_RESET |
 #ifdef __BIG_ENDIAN
-                               DMAE_CMD_ENDIANITY_B_DW_SWAP |
+                 DMAE_CMD_ENDIANITY_B_DW_SWAP |
 #else
-                               DMAE_CMD_ENDIANITY_DW_SWAP |
+                 DMAE_CMD_ENDIANITY_DW_SWAP |
 #endif
-                               (port ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0));
-               dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, dmae[0]));
-               dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, dmae[0]));
-               dmae->dst_addr_lo = (DMAE_REG_CMD_MEM +
-                                    sizeof(struct dmae_command) *
-                                    (loader_idx + 1)) >> 2;
+                 (BP_PORT(bp) ? DMAE_CMD_PORT_1 : DMAE_CMD_PORT_0) |
+                 (BP_E1HVN(bp) << DMAE_CMD_E1HVN_SHIFT));
+
+       if (bp->port.port_stx) {
+
+               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+               if (bp->func_stx)
+                       dmae->opcode = (opcode | DMAE_CMD_C_DST_GRC);
+               else
+                       dmae->opcode = (opcode | DMAE_CMD_C_DST_PCI);
+               dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, port_stats));
+               dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, port_stats));
+               dmae->dst_addr_lo = bp->port.port_stx >> 2;
                dmae->dst_addr_hi = 0;
-               dmae->len = sizeof(struct dmae_command) >> 2;
-               dmae->len--;    /* !!! for A0/1 only */
-               dmae->comp_addr_lo = dmae_reg_go_c[loader_idx + 1] >> 2;
-               dmae->comp_addr_hi = 0;
-               dmae->comp_val = 1;
+               dmae->len = sizeof(struct host_port_stats) >> 2;
+               if (bp->func_stx) {
+                       dmae->comp_addr_lo = dmae_reg_go_c[loader_idx] >> 2;
+                       dmae->comp_addr_hi = 0;
+                       dmae->comp_val = 1;
+               } else {
+                       dmae->comp_addr_lo =
+                               U64_LO(bnx2x_sp_mapping(bp, stats_comp));
+                       dmae->comp_addr_hi =
+                               U64_HI(bnx2x_sp_mapping(bp, stats_comp));
+                       dmae->comp_val = DMAE_COMP_VAL;
 
-               bnx2x_post_dmae(bp, dmae, loader_idx);
+                       *stats_comp = 0;
+               }
+       }
+
+       if (bp->func_stx) {
+
+               dmae = bnx2x_sp(bp, dmae[bp->executer_idx++]);
+               dmae->opcode = (opcode | DMAE_CMD_C_DST_PCI);
+               dmae->src_addr_lo = U64_LO(bnx2x_sp_mapping(bp, func_stats));
+               dmae->src_addr_hi = U64_HI(bnx2x_sp_mapping(bp, func_stats));
+               dmae->dst_addr_lo = bp->func_stx >> 2;
+               dmae->dst_addr_hi = 0;
+               dmae->len = sizeof(struct host_func_stats) >> 2;
+               dmae->comp_addr_lo = U64_LO(bnx2x_sp_mapping(bp, stats_comp));
+               dmae->comp_addr_hi = U64_HI(bnx2x_sp_mapping(bp, stats_comp));
+               dmae->comp_val = DMAE_COMP_VAL;
+
+               *stats_comp = 0;
+       }
+}
+
+static void bnx2x_stats_stop(struct bnx2x *bp)
+{
+       int update = 0;
+
+       bnx2x_stats_comp(bp);
+
+       if (bp->port.pmf)
+               update = (bnx2x_hw_stats_update(bp) == 0);
+
+       update |= (bnx2x_storm_stats_update(bp) == 0);
+
+       if (update) {
+               bnx2x_net_stats_update(bp);
+
+               if (bp->port.pmf)
+                       bnx2x_port_stats_stop(bp);
+
+               bnx2x_hw_stats_post(bp);
+               bnx2x_stats_comp(bp);
        }
+}
+
+static void bnx2x_stats_do_nothing(struct bnx2x *bp)
+{
+}
+
+static const struct {
+       void (*action)(struct bnx2x *bp);
+       enum bnx2x_stats_state next_state;
+} bnx2x_stats_stm[STATS_STATE_MAX][STATS_EVENT_MAX] = {
+/* state       event   */
+{
+/* DISABLED    PMF     */ {bnx2x_stats_pmf_update, STATS_STATE_DISABLED},
+/*             LINK_UP */ {bnx2x_stats_start,      STATS_STATE_ENABLED},
+/*             UPDATE  */ {bnx2x_stats_do_nothing, STATS_STATE_DISABLED},
+/*             STOP    */ {bnx2x_stats_do_nothing, STATS_STATE_DISABLED}
+},
+{
+/* ENABLED     PMF     */ {bnx2x_stats_pmf_start,  STATS_STATE_ENABLED},
+/*             LINK_UP */ {bnx2x_stats_restart,    STATS_STATE_ENABLED},
+/*             UPDATE  */ {bnx2x_stats_update,     STATS_STATE_ENABLED},
+/*             STOP    */ {bnx2x_stats_stop,       STATS_STATE_DISABLED}
+}
+};
+
+static void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
+{
+       enum bnx2x_stats_state state = bp->stats_state;
 
-       if (bp->stats_state != STATS_STATE_ENABLE) {
-               bp->stats_state = STATS_STATE_DISABLE;
-               return;
-       }
+       bnx2x_stats_stm[state][event].action(bp);
+       bp->stats_state = bnx2x_stats_stm[state][event].next_state;
 
-       if (bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_STAT_QUERY, 0, 0, 0, 0) == 0) {
-               /* stats ramrod has it's own slot on the spe */
-               bp->spq_left++;
-               bp->stat_pending = 1;
-       }
+       if ((event != STATS_EVENT_UPDATE) || (bp->msglevel & NETIF_MSG_TIMER))
+               DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
+                  state, event, bp->stats_state);
 }
 
 static void bnx2x_timer(unsigned long data)
@@ -3194,10 +3972,9 @@ static void bnx2x_timer(unsigned long data)
                }
        }
 
-       if (bp->stats_state == STATS_STATE_DISABLE)
-               goto timer_restart;
-
-       bnx2x_update_stats(bp);
+       if ((bp->state == BNX2X_STATE_OPEN) ||
+           (bp->state == BNX2X_STATE_DISABLED))
+               bnx2x_stats_handle(bp, STATS_EVENT_UPDATE);
 
 timer_restart:
        mod_timer(&bp->timer, jiffies + bp->current_interval);
@@ -3227,6 +4004,7 @@ static void bnx2x_init_sb(struct bnx2x *bp, int sb_id,
                          struct host_status_block *sb, dma_addr_t mapping)
 {
        int port = BP_PORT(bp);
+       int func = BP_FUNC(bp);
        int index;
        u64 section;
 
@@ -3240,6 +4018,8 @@ static void bnx2x_init_sb(struct bnx2x *bp, int sb_id,
        REG_WR(bp, BAR_USTRORM_INTMEM +
               ((USTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4),
               U64_HI(section));
+       REG_WR8(bp, BAR_USTRORM_INTMEM + FP_USB_FUNC_OFF +
+               USTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), func);
 
        for (index = 0; index < HC_USTORM_SB_NUM_INDICES; index++)
                REG_WR16(bp, BAR_USTRORM_INTMEM +
@@ -3255,6 +4035,8 @@ static void bnx2x_init_sb(struct bnx2x *bp, int sb_id,
        REG_WR(bp, BAR_CSTRORM_INTMEM +
               ((CSTORM_SB_HOST_SB_ADDR_OFFSET(port, sb_id)) + 4),
               U64_HI(section));
+       REG_WR8(bp, BAR_CSTRORM_INTMEM + FP_CSB_FUNC_OFF +
+               CSTORM_SB_HOST_STATUS_BLOCK_OFFSET(port, sb_id), func);
 
        for (index = 0; index < HC_CSTORM_SB_NUM_INDICES; index++)
                REG_WR16(bp, BAR_CSTRORM_INTMEM +
@@ -3411,6 +4193,8 @@ static void bnx2x_init_def_sb(struct bnx2x *bp,
                REG_WR16(bp, BAR_XSTRORM_INTMEM +
                         XSTORM_DEF_SB_HC_DISABLE_OFFSET(func, index), 1);
 
+       bp->stats_pending = 0;
+
        bnx2x_ack_sb(bp, sb_id, CSTORM_ID, 0, IGU_INT_ENABLE, 0);
 }
 
@@ -3444,22 +4228,94 @@ static void bnx2x_update_coalesce(struct bnx2x *bp)
        }
 }
 
+static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
+                                      struct bnx2x_fastpath *fp, int last)
+{
+       int i;
+
+       for (i = 0; i < last; i++) {
+               struct sw_rx_bd *rx_buf = &(fp->tpa_pool[i]);
+               struct sk_buff *skb = rx_buf->skb;
+
+               if (skb == NULL) {
+                       DP(NETIF_MSG_IFDOWN, "tpa bin %d empty on free\n", i);
+                       continue;
+               }
+
+               if (fp->tpa_state[i] == BNX2X_TPA_START)
+                       pci_unmap_single(bp->pdev,
+                                        pci_unmap_addr(rx_buf, mapping),
+                                        bp->rx_buf_use_size,
+                                        PCI_DMA_FROMDEVICE);
+
+               dev_kfree_skb(skb);
+               rx_buf->skb = NULL;
+       }
+}
+
 static void bnx2x_init_rx_rings(struct bnx2x *bp)
 {
-       u16 ring_prod;
+       int func = BP_FUNC(bp);
+       u16 ring_prod, cqe_ring_prod = 0;
        int i, j;
 
        bp->rx_buf_use_size = bp->dev->mtu;
-
        bp->rx_buf_use_size += bp->rx_offset + ETH_OVREHEAD;
        bp->rx_buf_size = bp->rx_buf_use_size + 64;
 
+       if (bp->flags & TPA_ENABLE_FLAG) {
+               DP(NETIF_MSG_IFUP,
+                  "rx_buf_use_size %d  rx_buf_size %d  effective_mtu %d\n",
+                  bp->rx_buf_use_size, bp->rx_buf_size,
+                  bp->dev->mtu + ETH_OVREHEAD);
+
+               for_each_queue(bp, j) {
+                       for (i = 0; i < ETH_MAX_AGGREGATION_QUEUES_E1H; i++) {
+                               struct bnx2x_fastpath *fp = &bp->fp[j];
+
+                               fp->tpa_pool[i].skb =
+                                  netdev_alloc_skb(bp->dev, bp->rx_buf_size);
+                               if (!fp->tpa_pool[i].skb) {
+                                       BNX2X_ERR("Failed to allocate TPA "
+                                                 "skb pool for queue[%d] - "
+                                                 "disabling TPA on this "
+                                                 "queue!\n", j);
+                                       bnx2x_free_tpa_pool(bp, fp, i);
+                                       fp->disable_tpa = 1;
+                                       break;
+                               }
+                               pci_unmap_addr_set((struct sw_rx_bd *)
+                                                       &bp->fp->tpa_pool[i],
+                                                  mapping, 0);
+                               fp->tpa_state[i] = BNX2X_TPA_STOP;
+                       }
+               }
+       }
+
        for_each_queue(bp, j) {
                struct bnx2x_fastpath *fp = &bp->fp[j];
 
                fp->rx_bd_cons = 0;
                fp->rx_cons_sb = BNX2X_RX_SB_INDEX;
+               fp->rx_bd_cons_sb = BNX2X_RX_SB_BD_INDEX;
+
+               /* "next page" elements initialization */
+               /* SGE ring */
+               for (i = 1; i <= NUM_RX_SGE_PAGES; i++) {
+                       struct eth_rx_sge *sge;
+
+                       sge = &fp->rx_sge_ring[RX_SGE_CNT * i - 2];
+                       sge->addr_hi =
+                               cpu_to_le32(U64_HI(fp->rx_sge_mapping +
+                                       BCM_PAGE_SIZE*(i % NUM_RX_SGE_PAGES)));
+                       sge->addr_lo =
+                               cpu_to_le32(U64_LO(fp->rx_sge_mapping +
+                                       BCM_PAGE_SIZE*(i % NUM_RX_SGE_PAGES)));
+               }
+
+               bnx2x_init_sge_ring_bit_mask(fp);
 
+               /* RX BD ring */
                for (i = 1; i <= NUM_RX_RINGS; i++) {
                        struct eth_rx_bd *rx_bd;
 
@@ -3486,35 +4342,61 @@ static void bnx2x_init_rx_rings(struct bnx2x *bp)
                                           BCM_PAGE_SIZE*(i % NUM_RCQ_RINGS)));
                }
 
-               /* rx completion queue */
-               fp->rx_comp_cons = ring_prod = 0;
+               /* Allocate SGEs and initialize the ring elements */
+               for (i = 0, ring_prod = 0;
+                    i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
+
+                       if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
+                               BNX2X_ERR("was only able to allocate "
+                                         "%d rx sges\n", i);
+                               BNX2X_ERR("disabling TPA for queue[%d]\n", j);
+                               /* Cleanup already allocated elements */
+                               bnx2x_free_rx_sge_range(bp, fp, ring_prod);
+                               bnx2x_free_tpa_pool(bp, fp,
+                                             ETH_MAX_AGGREGATION_QUEUES_E1H);
+                               fp->disable_tpa = 1;
+                               ring_prod = 0;
+                               break;
+                       }
+                       ring_prod = NEXT_SGE_IDX(ring_prod);
+               }
+               fp->rx_sge_prod = ring_prod;
 
+               /* Allocate BDs and initialize BD ring */
+               fp->rx_comp_cons = fp->rx_alloc_failed = 0;
+               cqe_ring_prod = ring_prod = 0;
                for (i = 0; i < bp->rx_ring_size; i++) {
                        if (bnx2x_alloc_rx_skb(bp, fp, ring_prod) < 0) {
                                BNX2X_ERR("was only able to allocate "
                                          "%d rx skbs\n", i);
+                               fp->rx_alloc_failed++;
                                break;
                        }
                        ring_prod = NEXT_RX_IDX(ring_prod);
-                       BUG_TRAP(ring_prod > i);
+                       cqe_ring_prod = NEXT_RCQ_IDX(cqe_ring_prod);
+                       WARN_ON(ring_prod <= i);
                }
 
-               fp->rx_bd_prod = fp->rx_comp_prod = ring_prod;
+               fp->rx_bd_prod = ring_prod;
+               /* must not have more available CQEs than BDs */
+               fp->rx_comp_prod = min((u16)(NUM_RCQ_RINGS*RCQ_DESC_CNT),
+                                      cqe_ring_prod);
                fp->rx_pkt = fp->rx_calls = 0;
 
-               /* Warning! this will generate an interrupt (to the TSTORM) */
-               /* must only be done when chip is initialized */
-               REG_WR(bp, BAR_TSTRORM_INTMEM +
-                      TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)),
-                       ring_prod);
+               /* Warning!
+                * this will generate an interrupt (to the TSTORM)
+                * must only be done after chip is initialized
+                */
+               bnx2x_update_rx_prod(bp, fp, ring_prod, fp->rx_comp_prod,
+                                    fp->rx_sge_prod);
                if (j != 0)
                        continue;
 
                REG_WR(bp, BAR_USTRORM_INTMEM +
-                      USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(BP_PORT(bp)),
+                      USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func),
                       U64_LO(fp->rx_comp_mapping));
                REG_WR(bp, BAR_USTRORM_INTMEM +
-                      USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(BP_PORT(bp)) + 4,
+                      USTORM_MEM_WORKAROUND_ADDRESS_OFFSET(func) + 4,
                       U64_HI(fp->rx_comp_mapping));
        }
 }
@@ -3602,6 +4484,18 @@ static void bnx2x_init_context(struct bnx2x *bp)
                                                U64_HI(fp->rx_desc_mapping);
                context->ustorm_st_context.common.bd_page_base_lo =
                                                U64_LO(fp->rx_desc_mapping);
+               if (!fp->disable_tpa) {
+                       context->ustorm_st_context.common.flags |=
+                               (USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_TPA |
+                                USTORM_ETH_ST_CONTEXT_CONFIG_ENABLE_SGE_RING);
+                       context->ustorm_st_context.common.sge_buff_size =
+                                       (u16)(BCM_PAGE_SIZE*PAGES_PER_SGE);
+                       context->ustorm_st_context.common.sge_page_base_hi =
+                                               U64_HI(fp->rx_sge_mapping);
+                       context->ustorm_st_context.common.sge_page_base_lo =
+                                               U64_LO(fp->rx_sge_mapping);
+               }
+
                context->cstorm_st_context.sb_index_number =
                                                HC_INDEX_C_ETH_TX_CQ_CONS;
                context->cstorm_st_context.status_block_id = sb_id;
@@ -3652,6 +4546,18 @@ static void bnx2x_set_client_config(struct bnx2x *bp)
        }
 #endif
 
+       if (bp->flags & TPA_ENABLE_FLAG) {
+               tstorm_client.max_sges_for_packet =
+                       BCM_PAGE_ALIGN(tstorm_client.mtu) >> BCM_PAGE_SHIFT;
+               tstorm_client.max_sges_for_packet =
+                       ((tstorm_client.max_sges_for_packet +
+                         PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >>
+                       PAGES_PER_SGE_SHIFT;
+
+               tstorm_client.config_flags |=
+                               TSTORM_ETH_CLIENT_CONFIG_ENABLE_SGE_RING;
+       }
+
        for_each_queue(bp, i) {
                REG_WR(bp, BAR_TSTRORM_INTMEM +
                       TSTORM_CLIENT_CONFIG_OFFSET(port, bp->fp[i].cl_id),
@@ -3766,8 +4672,8 @@ static void bnx2x_init_internal(struct bnx2x *bp)
                REG_WR8(bp, BAR_USTRORM_INTMEM + USTORM_FUNCTION_MODE_OFFSET,
                        IS_E1HMF(bp));
 
-               REG_WR16(bp, BAR_XSTRORM_INTMEM +
-                        XSTORM_E1HOV_OFFSET(func), bp->e1hov);
+               REG_WR16(bp, BAR_XSTRORM_INTMEM + XSTORM_E1HOV_OFFSET(func),
+                        bp->e1hov);
        }
 
        /* Zero this manualy as its initialization is
@@ -3775,6 +4681,25 @@ static void bnx2x_init_internal(struct bnx2x *bp)
        for (i = 0; i < USTORM_AGG_DATA_SIZE >> 2; i++)
                REG_WR(bp, BAR_USTRORM_INTMEM +
                       USTORM_AGG_DATA_OFFSET + 4*i, 0);
+
+       for_each_queue(bp, i) {
+               struct bnx2x_fastpath *fp = &bp->fp[i];
+               u16 max_agg_size;
+
+               REG_WR(bp, BAR_USTRORM_INTMEM +
+                      USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)),
+                      U64_LO(fp->rx_comp_mapping));
+               REG_WR(bp, BAR_USTRORM_INTMEM +
+                      USTORM_CQE_PAGE_BASE_OFFSET(port, FP_CL_ID(fp)) + 4,
+                      U64_HI(fp->rx_comp_mapping));
+
+               max_agg_size = min((u32)(bp->rx_buf_use_size +
+                                        8*BCM_PAGE_SIZE*PAGES_PER_SGE),
+                                  (u32)0xffff);
+               REG_WR16(bp, BAR_USTRORM_INTMEM +
+                        USTORM_MAX_AGG_SIZE_OFFSET(port, FP_CL_ID(fp)),
+                        max_agg_size);
+       }
 }
 
 static void bnx2x_nic_init(struct bnx2x *bp)
@@ -3804,7 +4729,7 @@ static void bnx2x_nic_init(struct bnx2x *bp)
        bnx2x_init_sp_ring(bp);
        bnx2x_init_context(bp);
        bnx2x_init_internal(bp);
-       bnx2x_init_stats(bp);
+       bnx2x_storm_stats_init(bp);
        bnx2x_init_ind_table(bp);
        bnx2x_int_enable(bp);
 }
@@ -4397,6 +5322,17 @@ static int bnx2x_init_common(struct bnx2x *bp)
 
        enable_blocks_attention(bp);
 
+       if (bp->flags & TPA_ENABLE_FLAG) {
+               struct tstorm_eth_tpa_exist tmp = {0};
+
+               tmp.tpa_exist = 1;
+
+               REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET,
+                      ((u32 *)&tmp)[0]);
+               REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_TPA_EXIST_OFFSET + 4,
+                      ((u32 *)&tmp)[1]);
+       }
+
        return 0;
 }
 
@@ -4775,8 +5711,12 @@ static void bnx2x_free_mem(struct bnx2x *bp)
                               bnx2x_fp(bp, i, rx_comp_mapping),
                               sizeof(struct eth_fast_path_rx_cqe) *
                               NUM_RCQ_BD);
-       }
 
+               /* SGE ring */
+               BNX2X_PCI_FREE(bnx2x_fp(bp, i, rx_sge_ring),
+                              bnx2x_fp(bp, i, rx_sge_mapping),
+                              BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
+       }
        /* end of fastpath */
 
        BNX2X_PCI_FREE(bp->def_status_blk, bp->def_status_blk_mapping,
@@ -4791,7 +5731,7 @@ static void bnx2x_free_mem(struct bnx2x *bp)
        BNX2X_PCI_FREE(bp->timers, bp->timers_mapping, 8*1024);
        BNX2X_PCI_FREE(bp->qm, bp->qm_mapping, 128*1024);
 #endif
-       BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, PAGE_SIZE);
+       BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, BCM_PAGE_SIZE);
 
 #undef BNX2X_PCI_FREE
 #undef BNX2X_KFREE
@@ -4853,6 +5793,12 @@ static int bnx2x_alloc_mem(struct bnx2x *bp)
                                sizeof(struct eth_fast_path_rx_cqe) *
                                NUM_RCQ_BD);
 
+               /* SGE ring */
+               BNX2X_ALLOC(bnx2x_fp(bp, i, rx_page_ring),
+                               sizeof(struct sw_rx_page) * NUM_RX_SGE);
+               BNX2X_PCI_ALLOC(bnx2x_fp(bp, i, rx_sge_ring),
+                               &bnx2x_fp(bp, i, rx_sge_mapping),
+                               BCM_PAGE_SIZE * NUM_RX_SGE_PAGES);
        }
        /* end of fastpath */
 
@@ -4943,6 +5889,9 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp)
                        rx_buf->skb = NULL;
                        dev_kfree_skb(skb);
                }
+               if (!fp->disable_tpa)
+                       bnx2x_free_tpa_pool(bp, fp,
+                                           ETH_MAX_AGGREGATION_QUEUES_E1H);
        }
 }
 
@@ -5294,6 +6243,10 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        if (bnx2x_alloc_mem(bp))
                return -ENOMEM;
 
+       for_each_queue(bp, i)
+               bnx2x_fp(bp, i, disable_tpa) =
+                                       ((bp->flags & TPA_ENABLE_FLAG) == 0);
+
        /* Disable interrupt handling until HW is initialized */
        atomic_set(&bp->intr_sem, 1);
 
@@ -5339,6 +6292,8 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
                }
        }
 
+       bnx2x_stats_init(bp);
+
        bp->state = BNX2X_STATE_OPENING_WAIT4_PORT;
 
        /* Enable Rx interrupt handling before sending the ramrod
@@ -5420,6 +6375,11 @@ load_int_disable:
        /* Release IRQs */
        bnx2x_free_irq(bp);
 
+       /* Free SKBs, SGEs, TPA pool and driver internals */
+       bnx2x_free_skbs(bp);
+       for_each_queue(bp, i)
+               bnx2x_free_rx_sge_range(bp, bp->fp + i,
+                                       RX_SGE_CNT*NUM_RX_SGE_PAGES);
 load_error:
        bnx2x_free_mem(bp);
 
@@ -5595,6 +6555,7 @@ static int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode)
        del_timer_sync(&bp->timer);
        SHMEM_WR(bp, func_mb[BP_FUNC(bp)].drv_pulse_mb,
                 (DRV_PULSE_ALWAYS_ALIVE | bp->fw_drv_pulse_wr_seq));
+       bnx2x_stats_handle(bp, STATS_EVENT_STOP);
 
        /* Wait until all fast path tasks complete */
        for_each_queue(bp, i) {
@@ -5717,8 +6678,11 @@ unload_error:
        if (!BP_NOMCP(bp))
                bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE);
 
-       /* Free SKBs and driver internals */
+       /* Free SKBs, SGEs, TPA pool and driver internals */
        bnx2x_free_skbs(bp);
+       for_each_queue(bp, i)
+               bnx2x_free_rx_sge_range(bp, bp->fp + i,
+                                       RX_SGE_CNT*NUM_RX_SGE_PAGES);
        bnx2x_free_mem(bp);
 
        bp->state = BNX2X_STATE_CLOSED;
@@ -6394,6 +7358,16 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp)
                printk(KERN_ERR PFX
                       "MCP disabled, must load devices in order!\n");
 
+       /* Set TPA flags */
+       if (disable_tpa) {
+               bp->flags &= ~TPA_ENABLE_FLAG;
+               bp->dev->features &= ~NETIF_F_LRO;
+       } else {
+               bp->flags |= TPA_ENABLE_FLAG;
+               bp->dev->features |= NETIF_F_LRO;
+       }
+
+
        bp->tx_ring_size = MAX_TX_AVAIL;
        bp->rx_ring_size = MAX_RX_AVAIL;
 
@@ -6641,7 +7615,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
           bp->port.advertising);
 
        if (netif_running(dev)) {
-               bnx2x_stop_stats(bp);
+               bnx2x_stats_handle(bp, STATS_EVENT_STOP);
                bnx2x_link_set(bp);
        }
 
@@ -6738,7 +7712,7 @@ static int bnx2x_nway_reset(struct net_device *dev)
                return 0;
 
        if (netif_running(dev)) {
-               bnx2x_stop_stats(bp);
+               bnx2x_stats_handle(bp, STATS_EVENT_STOP);
                bnx2x_link_set(bp);
        }
 
@@ -7128,10 +8102,13 @@ static int bnx2x_set_eeprom(struct net_device *dev,
                                             bp->link_params.ext_phy_config,
                                             (bp->state != BNX2X_STATE_CLOSED),
                                             eebuf, eeprom->len);
+                       if ((bp->state == BNX2X_STATE_OPEN) ||
+                           (bp->state == BNX2X_STATE_DISABLED)) {
                                rc |= bnx2x_link_reset(&bp->link_params,
                                                       &bp->link_vars);
                                rc |= bnx2x_phy_init(&bp->link_params,
                                                     &bp->link_vars);
+                       }
                        bnx2x_phy_hw_unlock(bp);
 
                } else /* Only the PMF can access the PHY */
@@ -7180,6 +8157,33 @@ static int bnx2x_set_coalesce(struct net_device *dev,
        return 0;
 }
 
+static int bnx2x_set_flags(struct net_device *dev, u32 data)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+       int changed = 0;
+       int rc = 0;
+
+       if (data & ETH_FLAG_LRO) {
+               if (!(dev->features & NETIF_F_LRO)) {
+                       dev->features |= NETIF_F_LRO;
+                       bp->flags |= TPA_ENABLE_FLAG;
+                       changed = 1;
+               }
+
+       } else if (dev->features & NETIF_F_LRO) {
+               dev->features &= ~NETIF_F_LRO;
+               bp->flags &= ~TPA_ENABLE_FLAG;
+               changed = 1;
+       }
+
+       if (changed && netif_running(dev)) {
+               bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+               rc = bnx2x_nic_load(bp, LOAD_NORMAL);
+       }
+
+       return rc;
+}
+
 static void bnx2x_get_ringparam(struct net_device *dev,
                                struct ethtool_ringparam *ering)
 {
@@ -7274,166 +8278,639 @@ static int bnx2x_set_pauseparam(struct net_device *dev,
           "req_flow_ctrl 0x%x\n", bp->link_params.req_flow_ctrl);
 
        if (netif_running(dev)) {
-               bnx2x_stop_stats(bp);
+               bnx2x_stats_handle(bp, STATS_EVENT_STOP);
                bnx2x_link_set(bp);
        }
 
-       return 0;
-}
+       return 0;
+}
+
+static u32 bnx2x_get_rx_csum(struct net_device *dev)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+
+       return bp->rx_csum;
+}
+
+static int bnx2x_set_rx_csum(struct net_device *dev, u32 data)
+{
+       struct bnx2x *bp = netdev_priv(dev);
+
+       bp->rx_csum = data;
+       return 0;
+}
+
+static int bnx2x_set_tso(struct net_device *dev, u32 data)
+{
+       if (data) {
+               dev->features |= (NETIF_F_TSO | NETIF_F_TSO_ECN);
+               dev->features |= NETIF_F_TSO6;
+       } else {
+               dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO_ECN);
+               dev->features &= ~NETIF_F_TSO6;
+       }
+
+       return 0;
+}
+
+static const struct {
+       char string[ETH_GSTRING_LEN];
+} bnx2x_tests_str_arr[BNX2X_NUM_TESTS] = {
+       { "register_test (offline)" },
+       { "memory_test (offline)" },
+       { "loopback_test (offline)" },
+       { "nvram_test (online)" },
+       { "interrupt_test (online)" },
+       { "link_test (online)" },
+       { "idle check (online)" },
+       { "MC errors (online)" }
+};
+
+static int bnx2x_self_test_count(struct net_device *dev)
+{
+       return BNX2X_NUM_TESTS;
+}
+
+static int bnx2x_test_registers(struct bnx2x *bp)
+{
+       int idx, i, rc = -ENODEV;
+       u32 wr_val = 0;
+       static const struct {
+               u32  offset0;
+               u32  offset1;
+               u32  mask;
+       } reg_tbl[] = {
+/* 0 */                { BRB1_REG_PAUSE_LOW_THRESHOLD_0,      4, 0x000003ff },
+               { DORQ_REG_DB_ADDR0,                   4, 0xffffffff },
+               { HC_REG_AGG_INT_0,                    4, 0x000003ff },
+               { PBF_REG_MAC_IF0_ENABLE,              4, 0x00000001 },
+               { PBF_REG_P0_INIT_CRD,                 4, 0x000007ff },
+               { PRS_REG_CID_PORT_0,                  4, 0x00ffffff },
+               { PXP2_REG_PSWRQ_CDU0_L2P,             4, 0x000fffff },
+               { PXP2_REG_RQ_CDU0_EFIRST_MEM_ADDR,    8, 0x0003ffff },
+               { PXP2_REG_PSWRQ_TM0_L2P,              4, 0x000fffff },
+               { PXP2_REG_RQ_USDM0_EFIRST_MEM_ADDR,   8, 0x0003ffff },
+/* 10 */       { PXP2_REG_PSWRQ_TSDM0_L2P,            4, 0x000fffff },
+               { QM_REG_CONNNUM_0,                    4, 0x000fffff },
+               { TM_REG_LIN0_MAX_ACTIVE_CID,          4, 0x0003ffff },
+               { SRC_REG_KEYRSS0_0,                  40, 0xffffffff },
+               { SRC_REG_KEYRSS0_7,                  40, 0xffffffff },
+               { XCM_REG_WU_DA_SET_TMR_CNT_FLG_CMD00, 4, 0x00000001 },
+               { XCM_REG_WU_DA_CNT_CMD00,             4, 0x00000003 },
+               { XCM_REG_GLB_DEL_ACK_MAX_CNT_0,       4, 0x000000ff },
+               { NIG_REG_EGRESS_MNG0_FIFO,           20, 0xffffffff },
+               { NIG_REG_LLH0_T_BIT,                  4, 0x00000001 },
+/* 20 */       { NIG_REG_EMAC0_IN_EN,                 4, 0x00000001 },
+               { NIG_REG_BMAC0_IN_EN,                 4, 0x00000001 },
+               { NIG_REG_XCM0_OUT_EN,                 4, 0x00000001 },
+               { NIG_REG_BRB0_OUT_EN,                 4, 0x00000001 },
+               { NIG_REG_LLH0_XCM_MASK,               4, 0x00000007 },
+               { NIG_REG_LLH0_ACPI_PAT_6_LEN,        68, 0x000000ff },
+               { NIG_REG_LLH0_ACPI_PAT_0_CRC,        68, 0xffffffff },
+               { NIG_REG_LLH0_DEST_MAC_0_0,         160, 0xffffffff },
+               { NIG_REG_LLH0_DEST_IP_0_1,          160, 0xffffffff },
+               { NIG_REG_LLH0_IPV4_IPV6_0,          160, 0x00000001 },
+/* 30 */       { NIG_REG_LLH0_DEST_UDP_0,           160, 0x0000ffff },
+               { NIG_REG_LLH0_DEST_TCP_0,           160, 0x0000ffff },
+               { NIG_REG_LLH0_VLAN_ID_0,            160, 0x00000fff },
+               { NIG_REG_XGXS_SERDES0_MODE_SEL,       4, 0x00000001 },
+               { NIG_REG_LED_CONTROL_OVERRIDE_TRAFFIC_P0, 4, 0x00000001 },
+               { NIG_REG_STATUS_INTERRUPT_PORT0,      4, 0x07ffffff },
+               { NIG_REG_XGXS0_CTRL_EXTREMOTEMDIOST, 24, 0x00000001 },
+               { NIG_REG_SERDES0_CTRL_PHY_ADDR,      16, 0x0000001f },
+
+               { 0xffffffff, 0, 0x00000000 }
+       };
+
+       if (!netif_running(bp->dev))
+               return rc;
+
+       /* Repeat the test twice:
+          First by writing 0x00000000, second by writing 0xffffffff */
+       for (idx = 0; idx < 2; idx++) {
+
+               switch (idx) {
+               case 0:
+                       wr_val = 0;
+                       break;
+               case 1:
+                       wr_val = 0xffffffff;
+                       break;
+               }
+
+               for (i = 0; reg_tbl[i].offset0 != 0xffffffff; i++) {
+                       u32 offset, mask, save_val, val;
+                       int port = BP_PORT(bp);
+
+                       offset = reg_tbl[i].offset0 + port*reg_tbl[i].offset1;
+                       mask = reg_tbl[i].mask;
+
+                       save_val = REG_RD(bp, offset);
+
+                       REG_WR(bp, offset, wr_val);
+                       val = REG_RD(bp, offset);
+
+                       /* Restore the original register's value */
+                       REG_WR(bp, offset, save_val);
+
+                       /* verify that value is as expected value */
+                       if ((val & mask) != (wr_val & mask))
+                               goto test_reg_exit;
+               }
+       }
+
+       rc = 0;
+
+test_reg_exit:
+       return rc;
+}
+
+static int bnx2x_test_memory(struct bnx2x *bp)
+{
+       int i, j, rc = -ENODEV;
+       u32 val;
+       static const struct {
+               u32 offset;
+               int size;
+       } mem_tbl[] = {
+               { CCM_REG_XX_DESCR_TABLE,   CCM_REG_XX_DESCR_TABLE_SIZE },
+               { CFC_REG_ACTIVITY_COUNTER, CFC_REG_ACTIVITY_COUNTER_SIZE },
+               { CFC_REG_LINK_LIST,        CFC_REG_LINK_LIST_SIZE },
+               { DMAE_REG_CMD_MEM,         DMAE_REG_CMD_MEM_SIZE },
+               { TCM_REG_XX_DESCR_TABLE,   TCM_REG_XX_DESCR_TABLE_SIZE },
+               { UCM_REG_XX_DESCR_TABLE,   UCM_REG_XX_DESCR_TABLE_SIZE },
+               { XCM_REG_XX_DESCR_TABLE,   XCM_REG_XX_DESCR_TABLE_SIZE },
+
+               { 0xffffffff, 0 }
+       };
+       static const struct {
+               char *name;
+               u32 offset;
+               u32 mask;
+       } prty_tbl[] = {
+               { "CCM_REG_CCM_PRTY_STS",     CCM_REG_CCM_PRTY_STS,     0 },
+               { "CFC_REG_CFC_PRTY_STS",     CFC_REG_CFC_PRTY_STS,     0 },
+               { "DMAE_REG_DMAE_PRTY_STS",   DMAE_REG_DMAE_PRTY_STS,   0 },
+               { "TCM_REG_TCM_PRTY_STS",     TCM_REG_TCM_PRTY_STS,     0 },
+               { "UCM_REG_UCM_PRTY_STS",     UCM_REG_UCM_PRTY_STS,     0 },
+               { "XCM_REG_XCM_PRTY_STS",     XCM_REG_XCM_PRTY_STS,     0x1 },
+
+               { NULL, 0xffffffff, 0 }
+       };
+
+       if (!netif_running(bp->dev))
+               return rc;
+
+       /* Go through all the memories */
+       for (i = 0; mem_tbl[i].offset != 0xffffffff; i++)
+               for (j = 0; j < mem_tbl[i].size; j++)
+                       REG_RD(bp, mem_tbl[i].offset + j*4);
+
+       /* Check the parity status */
+       for (i = 0; prty_tbl[i].offset != 0xffffffff; i++) {
+               val = REG_RD(bp, prty_tbl[i].offset);
+               if (val & ~(prty_tbl[i].mask)) {
+                       DP(NETIF_MSG_HW,
+                          "%s is 0x%x\n", prty_tbl[i].name, val);
+                       goto test_mem_exit;
+               }
+       }
+
+       rc = 0;
+
+test_mem_exit:
+       return rc;
+}
+
+static void bnx2x_netif_start(struct bnx2x *bp)
+{
+       int i;
+
+       if (atomic_dec_and_test(&bp->intr_sem)) {
+               if (netif_running(bp->dev)) {
+                       bnx2x_int_enable(bp);
+                       for_each_queue(bp, i)
+                               napi_enable(&bnx2x_fp(bp, i, napi));
+                       if (bp->state == BNX2X_STATE_OPEN)
+                               netif_wake_queue(bp->dev);
+               }
+       }
+}
+
+static void bnx2x_netif_stop(struct bnx2x *bp)
+{
+       int i;
+
+       if (netif_running(bp->dev)) {
+               netif_tx_disable(bp->dev);
+               bp->dev->trans_start = jiffies; /* prevent tx timeout */
+               for_each_queue(bp, i)
+                       napi_disable(&bnx2x_fp(bp, i, napi));
+       }
+       bnx2x_int_disable_sync(bp);
+}
+
+static void bnx2x_wait_for_link(struct bnx2x *bp, u8 link_up)
+{
+       int cnt = 1000;
+
+       if (link_up)
+               while (bnx2x_link_test(bp) && cnt--)
+                       msleep(10);
+}
+
+static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode, u8 link_up)
+{
+       unsigned int pkt_size, num_pkts, i;
+       struct sk_buff *skb;
+       unsigned char *packet;
+       struct bnx2x_fastpath *fp = &bp->fp[0];
+       u16 tx_start_idx, tx_idx;
+       u16 rx_start_idx, rx_idx;
+       u16 pkt_prod;
+       struct sw_tx_bd *tx_buf;
+       struct eth_tx_bd *tx_bd;
+       dma_addr_t mapping;
+       union eth_rx_cqe *cqe;
+       u8 cqe_fp_flags;
+       struct sw_rx_bd *rx_buf;
+       u16 len;
+       int rc = -ENODEV;
+
+       if (loopback_mode == BNX2X_MAC_LOOPBACK) {
+               bp->link_params.loopback_mode = LOOPBACK_BMAC;
+               bnx2x_phy_hw_lock(bp);
+               bnx2x_phy_init(&bp->link_params, &bp->link_vars);
+               bnx2x_phy_hw_unlock(bp);
+
+       } else if (loopback_mode == BNX2X_PHY_LOOPBACK) {
+               bp->link_params.loopback_mode = LOOPBACK_XGXS_10;
+               bnx2x_phy_hw_lock(bp);
+               bnx2x_phy_init(&bp->link_params, &bp->link_vars);
+               bnx2x_phy_hw_unlock(bp);
+               /* wait until link state is restored */
+               bnx2x_wait_for_link(bp, link_up);
+
+       } else
+               return -EINVAL;
+
+       pkt_size = 1514;
+       skb = netdev_alloc_skb(bp->dev, bp->rx_buf_size);
+       if (!skb) {
+               rc = -ENOMEM;
+               goto test_loopback_exit;
+       }
+       packet = skb_put(skb, pkt_size);
+       memcpy(packet, bp->dev->dev_addr, ETH_ALEN);
+       memset(packet + ETH_ALEN, 0, (ETH_HLEN - ETH_ALEN));
+       for (i = ETH_HLEN; i < pkt_size; i++)
+               packet[i] = (unsigned char) (i & 0xff);
+
+       num_pkts = 0;
+       tx_start_idx = le16_to_cpu(*fp->tx_cons_sb);
+       rx_start_idx = le16_to_cpu(*fp->rx_cons_sb);
+
+       pkt_prod = fp->tx_pkt_prod++;
+       tx_buf = &fp->tx_buf_ring[TX_BD(pkt_prod)];
+       tx_buf->first_bd = fp->tx_bd_prod;
+       tx_buf->skb = skb;
+
+       tx_bd = &fp->tx_desc_ring[TX_BD(fp->tx_bd_prod)];
+       mapping = pci_map_single(bp->pdev, skb->data,
+                                skb_headlen(skb), PCI_DMA_TODEVICE);
+       tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
+       tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
+       tx_bd->nbd = cpu_to_le16(1);
+       tx_bd->nbytes = cpu_to_le16(skb_headlen(skb));
+       tx_bd->vlan = cpu_to_le16(pkt_prod);
+       tx_bd->bd_flags.as_bitfield = (ETH_TX_BD_FLAGS_START_BD |
+                                      ETH_TX_BD_FLAGS_END_BD);
+       tx_bd->general_data = ((UNICAST_ADDRESS <<
+                               ETH_TX_BD_ETH_ADDR_TYPE_SHIFT) | 1);
+
+       fp->hw_tx_prods->bds_prod =
+               cpu_to_le16(le16_to_cpu(fp->hw_tx_prods->bds_prod) + 1);
+       mb(); /* FW restriction: must not reorder writing nbd and packets */
+       fp->hw_tx_prods->packets_prod =
+               cpu_to_le32(le32_to_cpu(fp->hw_tx_prods->packets_prod) + 1);
+       DOORBELL(bp, FP_IDX(fp), 0);
+
+       mmiowb();
+
+       num_pkts++;
+       fp->tx_bd_prod++;
+       bp->dev->trans_start = jiffies;
+
+       udelay(100);
+
+       tx_idx = le16_to_cpu(*fp->tx_cons_sb);
+       if (tx_idx != tx_start_idx + num_pkts)
+               goto test_loopback_exit;
+
+       rx_idx = le16_to_cpu(*fp->rx_cons_sb);
+       if (rx_idx != rx_start_idx + num_pkts)
+               goto test_loopback_exit;
+
+       cqe = &fp->rx_comp_ring[RCQ_BD(fp->rx_comp_cons)];
+       cqe_fp_flags = cqe->fast_path_cqe.type_error_flags;
+       if (CQE_TYPE(cqe_fp_flags) || (cqe_fp_flags & ETH_RX_ERROR_FALGS))
+               goto test_loopback_rx_exit;
+
+       len = le16_to_cpu(cqe->fast_path_cqe.pkt_len);
+       if (len != pkt_size)
+               goto test_loopback_rx_exit;
+
+       rx_buf = &fp->rx_buf_ring[RX_BD(fp->rx_bd_cons)];
+       skb = rx_buf->skb;
+       skb_reserve(skb, cqe->fast_path_cqe.placement_offset);
+       for (i = ETH_HLEN; i < pkt_size; i++)
+               if (*(skb->data + i) != (unsigned char) (i & 0xff))
+                       goto test_loopback_rx_exit;
+
+       rc = 0;
+
+test_loopback_rx_exit:
+       bp->dev->last_rx = jiffies;
+
+       fp->rx_bd_cons = NEXT_RX_IDX(fp->rx_bd_cons);
+       fp->rx_bd_prod = NEXT_RX_IDX(fp->rx_bd_prod);
+       fp->rx_comp_cons = NEXT_RCQ_IDX(fp->rx_comp_cons);
+       fp->rx_comp_prod = NEXT_RCQ_IDX(fp->rx_comp_prod);
+
+       /* Update producers */
+       bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
+                            fp->rx_sge_prod);
+       mmiowb(); /* keep prod updates ordered */
+
+test_loopback_exit:
+       bp->link_params.loopback_mode = LOOPBACK_NONE;
+
+       return rc;
+}
+
+static int bnx2x_test_loopback(struct bnx2x *bp, u8 link_up)
+{
+       int rc = 0;
+
+       if (!netif_running(bp->dev))
+               return BNX2X_LOOPBACK_FAILED;
+
+       bnx2x_netif_stop(bp);
+
+       if (bnx2x_run_loopback(bp, BNX2X_MAC_LOOPBACK, link_up)) {
+               DP(NETIF_MSG_PROBE, "MAC loopback failed\n");
+               rc |= BNX2X_MAC_LOOPBACK_FAILED;
+       }
+
+       if (bnx2x_run_loopback(bp, BNX2X_PHY_LOOPBACK, link_up)) {
+               DP(NETIF_MSG_PROBE, "PHY loopback failed\n");
+               rc |= BNX2X_PHY_LOOPBACK_FAILED;
+       }
+
+       bnx2x_netif_start(bp);
+
+       return rc;
+}
+
+#define CRC32_RESIDUAL                 0xdebb20e3
+
+static int bnx2x_test_nvram(struct bnx2x *bp)
+{
+       static const struct {
+               int offset;
+               int size;
+       } nvram_tbl[] = {
+               {     0,  0x14 }, /* bootstrap */
+               {  0x14,  0xec }, /* dir */
+               { 0x100, 0x350 }, /* manuf_info */
+               { 0x450,  0xf0 }, /* feature_info */
+               { 0x640,  0x64 }, /* upgrade_key_info */
+               { 0x6a4,  0x64 },
+               { 0x708,  0x70 }, /* manuf_key_info */
+               { 0x778,  0x70 },
+               {     0,     0 }
+       };
+       u32 buf[0x350 / 4];
+       u8 *data = (u8 *)buf;
+       int i, rc;
+       u32 magic, csum;
+
+       rc = bnx2x_nvram_read(bp, 0, data, 4);
+       if (rc) {
+               DP(NETIF_MSG_PROBE, "magic value read (rc -%d)\n", -rc);
+               goto test_nvram_exit;
+       }
+
+       magic = be32_to_cpu(buf[0]);
+       if (magic != 0x669955aa) {
+               DP(NETIF_MSG_PROBE, "magic value (0x%08x)\n", magic);
+               rc = -ENODEV;
+               goto test_nvram_exit;
+       }
+
+       for (i = 0; nvram_tbl[i].size; i++) {
 
-static u32 bnx2x_get_rx_csum(struct net_device *dev)
-{
-       struct bnx2x *bp = netdev_priv(dev);
+               rc = bnx2x_nvram_read(bp, nvram_tbl[i].offset, data,
+                                     nvram_tbl[i].size);
+               if (rc) {
+                       DP(NETIF_MSG_PROBE,
+                          "nvram_tbl[%d] read data (rc -%d)\n", i, -rc);
+                       goto test_nvram_exit;
+               }
 
-       return bp->rx_csum;
+               csum = ether_crc_le(nvram_tbl[i].size, data);
+               if (csum != CRC32_RESIDUAL) {
+                       DP(NETIF_MSG_PROBE,
+                          "nvram_tbl[%d] csum value (0x%08x)\n", i, csum);
+                       rc = -ENODEV;
+                       goto test_nvram_exit;
+               }
+       }
+
+test_nvram_exit:
+       return rc;
 }
 
-static int bnx2x_set_rx_csum(struct net_device *dev, u32 data)
+static int bnx2x_test_intr(struct bnx2x *bp)
 {
-       struct bnx2x *bp = netdev_priv(dev);
+       struct mac_configuration_cmd *config = bnx2x_sp(bp, mac_config);
+       int i, rc;
 
-       bp->rx_csum = data;
-       return 0;
-}
+       if (!netif_running(bp->dev))
+               return -ENODEV;
 
-static int bnx2x_set_tso(struct net_device *dev, u32 data)
-{
-       if (data)
-               dev->features |= (NETIF_F_TSO | NETIF_F_TSO_ECN);
-       else
-               dev->features &= ~(NETIF_F_TSO | NETIF_F_TSO_ECN);
-       return 0;
-}
+       config->hdr.length_6b = 0;
+       config->hdr.offset = 0;
+       config->hdr.client_id = BP_CL_ID(bp);
+       config->hdr.reserved1 = 0;
 
-static struct {
-       char string[ETH_GSTRING_LEN];
-} bnx2x_tests_str_arr[BNX2X_NUM_TESTS] = {
-       { "MC Errors  (online)" }
-};
+       rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_ETH_SET_MAC, 0,
+                          U64_HI(bnx2x_sp_mapping(bp, mac_config)),
+                          U64_LO(bnx2x_sp_mapping(bp, mac_config)), 0);
+       if (rc == 0) {
+               bp->set_mac_pending++;
+               for (i = 0; i < 10; i++) {
+                       if (!bp->set_mac_pending)
+                               break;
+                       msleep_interruptible(10);
+               }
+               if (i == 10)
+                       rc = -ENODEV;
+       }
 
-static int bnx2x_self_test_count(struct net_device *dev)
-{
-       return BNX2X_NUM_TESTS;
+       return rc;
 }
 
 static void bnx2x_self_test(struct net_device *dev,
                            struct ethtool_test *etest, u64 *buf)
 {
        struct bnx2x *bp = netdev_priv(dev);
-       int stats_state;
 
        memset(buf, 0, sizeof(u64) * BNX2X_NUM_TESTS);
 
-       if (bp->state != BNX2X_STATE_OPEN) {
-               DP(NETIF_MSG_PROBE, "state is %x, returning\n", bp->state);
+       if (!netif_running(dev))
                return;
-       }
 
-       stats_state = bp->stats_state;
-       bnx2x_stop_stats(bp);
+       /* offline tests are not suppoerted in MF mode */
+       if (IS_E1HMF(bp))
+               etest->flags &= ~ETH_TEST_FL_OFFLINE;
+
+       if (etest->flags & ETH_TEST_FL_OFFLINE) {
+               u8 link_up;
 
-       if (bnx2x_mc_assert(bp) != 0) {
-               buf[0] = 1;
+               link_up = bp->link_vars.link_up;
+               bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+               bnx2x_nic_load(bp, LOAD_DIAG);
+               /* wait until link state is restored */
+               bnx2x_wait_for_link(bp, link_up);
+
+               if (bnx2x_test_registers(bp) != 0) {
+                       buf[0] = 1;
+                       etest->flags |= ETH_TEST_FL_FAILED;
+               }
+               if (bnx2x_test_memory(bp) != 0) {
+                       buf[1] = 1;
+                       etest->flags |= ETH_TEST_FL_FAILED;
+               }
+               buf[2] = bnx2x_test_loopback(bp, link_up);
+               if (buf[2] != 0)
+                       etest->flags |= ETH_TEST_FL_FAILED;
+
+               bnx2x_nic_unload(bp, UNLOAD_NORMAL);
+               bnx2x_nic_load(bp, LOAD_NORMAL);
+               /* wait until link state is restored */
+               bnx2x_wait_for_link(bp, link_up);
+       }
+       if (bnx2x_test_nvram(bp) != 0) {
+               buf[3] = 1;
+               etest->flags |= ETH_TEST_FL_FAILED;
+       }
+       if (bnx2x_test_intr(bp) != 0) {
+               buf[4] = 1;
                etest->flags |= ETH_TEST_FL_FAILED;
        }
+       if (bp->port.pmf)
+               if (bnx2x_link_test(bp) != 0) {
+                       buf[5] = 1;
+                       etest->flags |= ETH_TEST_FL_FAILED;
+               }
+       buf[7] = bnx2x_mc_assert(bp);
+       if (buf[7] != 0)
+               etest->flags |= ETH_TEST_FL_FAILED;
 
 #ifdef BNX2X_EXTRA_DEBUG
        bnx2x_panic_dump(bp);
 #endif
-       bp->stats_state = stats_state;
 }
 
-static struct {
+static const struct {
+       long offset;
+       int size;
+       u32 flags;
        char string[ETH_GSTRING_LEN];
-} bnx2x_stats_str_arr[BNX2X_NUM_STATS] = {
-       { "rx_bytes"},
-       { "rx_error_bytes"},
-       { "tx_bytes"},
-       { "tx_error_bytes"},
-       { "rx_ucast_packets"},
-       { "rx_mcast_packets"},
-       { "rx_bcast_packets"},
-       { "tx_ucast_packets"},
-       { "tx_mcast_packets"},
-       { "tx_bcast_packets"},
-       { "tx_mac_errors"},     /* 10 */
-       { "tx_carrier_errors"},
-       { "rx_crc_errors"},
-       { "rx_align_errors"},
-       { "tx_single_collisions"},
-       { "tx_multi_collisions"},
-       { "tx_deferred"},
-       { "tx_excess_collisions"},
-       { "tx_late_collisions"},
-       { "tx_total_collisions"},
-       { "rx_fragments"},      /* 20 */
-       { "rx_jabbers"},
-       { "rx_undersize_packets"},
-       { "rx_oversize_packets"},
-       { "rx_xon_frames"},
-       { "rx_xoff_frames"},
-       { "tx_xon_frames"},
-       { "tx_xoff_frames"},
-       { "rx_mac_ctrl_frames"},
-       { "rx_filtered_packets"},
-       { "rx_discards"},       /* 30 */
-       { "brb_discard"},
-       { "brb_truncate"},
-       { "xxoverflow"}
-};
-
-#define STATS_OFFSET32(offset_name) \
-       (offsetof(struct bnx2x_eth_stats, offset_name) / 4)
-
-static unsigned long bnx2x_stats_offset_arr[BNX2X_NUM_STATS] = {
-       STATS_OFFSET32(total_bytes_received_hi),
-       STATS_OFFSET32(stat_IfHCInBadOctets_hi),
-       STATS_OFFSET32(total_bytes_transmitted_hi),
-       STATS_OFFSET32(stat_IfHCOutBadOctets_hi),
-       STATS_OFFSET32(total_unicast_packets_received_hi),
-       STATS_OFFSET32(total_multicast_packets_received_hi),
-       STATS_OFFSET32(total_broadcast_packets_received_hi),
-       STATS_OFFSET32(total_unicast_packets_transmitted_hi),
-       STATS_OFFSET32(total_multicast_packets_transmitted_hi),
-       STATS_OFFSET32(total_broadcast_packets_transmitted_hi),
-       STATS_OFFSET32(stat_Dot3statsInternalMacTransmitErrors), /* 10 */
-       STATS_OFFSET32(stat_Dot3StatsCarrierSenseErrors),
-       STATS_OFFSET32(crc_receive_errors),
-       STATS_OFFSET32(alignment_errors),
-       STATS_OFFSET32(single_collision_transmit_frames),
-       STATS_OFFSET32(multiple_collision_transmit_frames),
-       STATS_OFFSET32(stat_Dot3StatsDeferredTransmissions),
-       STATS_OFFSET32(excessive_collision_frames),
-       STATS_OFFSET32(late_collision_frames),
-       STATS_OFFSET32(number_of_bugs_found_in_stats_spec),
-       STATS_OFFSET32(runt_packets_received),                  /* 20 */
-       STATS_OFFSET32(jabber_packets_received),
-       STATS_OFFSET32(error_runt_packets_received),
-       STATS_OFFSET32(error_jabber_packets_received),
-       STATS_OFFSET32(pause_xon_frames_received),
-       STATS_OFFSET32(pause_xoff_frames_received),
-       STATS_OFFSET32(pause_xon_frames_transmitted),
-       STATS_OFFSET32(pause_xoff_frames_transmitted),
-       STATS_OFFSET32(control_frames_received),
-       STATS_OFFSET32(mac_filter_discard),
-       STATS_OFFSET32(no_buff_discard),                        /* 30 */
-       STATS_OFFSET32(brb_discard),
-       STATS_OFFSET32(brb_truncate_discard),
-       STATS_OFFSET32(xxoverflow_discard)
-};
-
-static u8 bnx2x_stats_len_arr[BNX2X_NUM_STATS] = {
-       8, 0, 8, 0, 8, 8, 8, 8, 8, 8,
-       4, 0, 4, 4, 4, 4, 4, 4, 4, 4,
-       4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-       4, 4, 4, 4
+} bnx2x_stats_arr[BNX2X_NUM_STATS] = {
+/* 1 */        { STATS_OFFSET32(valid_bytes_received_hi),     8, 1, "rx_bytes" },
+       { STATS_OFFSET32(error_bytes_received_hi),     8, 1, "rx_error_bytes" },
+       { STATS_OFFSET32(total_bytes_transmitted_hi),  8, 1, "tx_bytes" },
+       { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), 8, 0, "tx_error_bytes" },
+       { STATS_OFFSET32(total_unicast_packets_received_hi),
+                                               8, 1, "rx_ucast_packets" },
+       { STATS_OFFSET32(total_multicast_packets_received_hi),
+                                               8, 1, "rx_mcast_packets" },
+       { STATS_OFFSET32(total_broadcast_packets_received_hi),
+                                               8, 1, "rx_bcast_packets" },
+       { STATS_OFFSET32(total_unicast_packets_transmitted_hi),
+                                               8, 1, "tx_packets" },
+       { STATS_OFFSET32(tx_stat_dot3statsinternalmactransmiterrors_hi),
+                                               8, 0, "tx_mac_errors" },
+/* 10 */{ STATS_OFFSET32(rx_stat_dot3statscarriersenseerrors_hi),
+                                               8, 0, "tx_carrier_errors" },
+       { STATS_OFFSET32(rx_stat_dot3statsfcserrors_hi),
+                                               8, 0, "rx_crc_errors" },
+       { STATS_OFFSET32(rx_stat_dot3statsalignmenterrors_hi),
+                                               8, 0, "rx_align_errors" },
+       { STATS_OFFSET32(tx_stat_dot3statssinglecollisionframes_hi),
+                                               8, 0, "tx_single_collisions" },
+       { STATS_OFFSET32(tx_stat_dot3statsmultiplecollisionframes_hi),
+                                               8, 0, "tx_multi_collisions" },
+       { STATS_OFFSET32(tx_stat_dot3statsdeferredtransmissions_hi),
+                                               8, 0, "tx_deferred" },
+       { STATS_OFFSET32(tx_stat_dot3statsexcessivecollisions_hi),
+                                               8, 0, "tx_excess_collisions" },
+       { STATS_OFFSET32(tx_stat_dot3statslatecollisions_hi),
+                                               8, 0, "tx_late_collisions" },
+       { STATS_OFFSET32(tx_stat_etherstatscollisions_hi),
+                                               8, 0, "tx_total_collisions" },
+       { STATS_OFFSET32(rx_stat_etherstatsfragments_hi),
+                                               8, 0, "rx_fragments" },
+/* 20 */{ STATS_OFFSET32(rx_stat_etherstatsjabbers_hi), 8, 0, "rx_jabbers" },
+       { STATS_OFFSET32(rx_stat_etherstatsundersizepkts_hi),
+                                               8, 0, "rx_undersize_packets" },
+       { STATS_OFFSET32(jabber_packets_received),
+                                               4, 1, "rx_oversize_packets" },
+       { STATS_OFFSET32(tx_stat_etherstatspkts64octets_hi),
+                                               8, 0, "tx_64_byte_packets" },
+       { STATS_OFFSET32(tx_stat_etherstatspkts65octetsto127octets_hi),
+                                       8, 0, "tx_65_to_127_byte_packets" },
+       { STATS_OFFSET32(tx_stat_etherstatspkts128octetsto255octets_hi),
+                                       8, 0, "tx_128_to_255_byte_packets" },
+       { STATS_OFFSET32(tx_stat_etherstatspkts256octetsto511octets_hi),
+                                       8, 0, "tx_256_to_511_byte_packets" },
+       { STATS_OFFSET32(tx_stat_etherstatspkts512octetsto1023octets_hi),
+                                       8, 0, "tx_512_to_1023_byte_packets" },
+       { STATS_OFFSET32(etherstatspkts1024octetsto1522octets_hi),
+                                       8, 0, "tx_1024_to_1522_byte_packets" },
+       { STATS_OFFSET32(etherstatspktsover1522octets_hi),
+                                       8, 0, "tx_1523_to_9022_byte_packets" },
+/* 30 */{ STATS_OFFSET32(rx_stat_xonpauseframesreceived_hi),
+                                               8, 0, "rx_xon_frames" },
+       { STATS_OFFSET32(rx_stat_xoffpauseframesreceived_hi),
+                                               8, 0, "rx_xoff_frames" },
+       { STATS_OFFSET32(tx_stat_outxonsent_hi),  8, 0, "tx_xon_frames" },
+       { STATS_OFFSET32(tx_stat_outxoffsent_hi), 8, 0, "tx_xoff_frames" },
+       { STATS_OFFSET32(rx_stat_maccontrolframesreceived_hi),
+                                               8, 0, "rx_mac_ctrl_frames" },
+       { STATS_OFFSET32(mac_filter_discard),   4, 1, "rx_filtered_packets" },
+       { STATS_OFFSET32(no_buff_discard),      4, 1, "rx_discards" },
+       { STATS_OFFSET32(xxoverflow_discard),   4, 1, "rx_fw_discards" },
+       { STATS_OFFSET32(brb_drop_hi),          8, 1, "brb_discard" },
+/* 39 */{ STATS_OFFSET32(brb_truncate_discard), 8, 1, "brb_truncate" }
 };
 
 static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 {
+       struct bnx2x *bp = netdev_priv(dev);
+       int i, j;
+
        switch (stringset) {
        case ETH_SS_STATS:
-               memcpy(buf, bnx2x_stats_str_arr, sizeof(bnx2x_stats_str_arr));
+               for (i = 0, j = 0; i < BNX2X_NUM_STATS; i++) {
+                       if (IS_E1HMF(bp) && (!bnx2x_stats_arr[i].flags))
+                               continue;
+                       strcpy(buf + j*ETH_GSTRING_LEN,
+                              bnx2x_stats_arr[i].string);
+                       j++;
+               }
                break;
 
        case ETH_SS_TEST:
@@ -7444,34 +8921,44 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 
 static int bnx2x_get_stats_count(struct net_device *dev)
 {
-       return BNX2X_NUM_STATS;
+       struct bnx2x *bp = netdev_priv(dev);
+       int i, num_stats = 0;
+
+       for (i = 0; i < BNX2X_NUM_STATS; i++) {
+               if (IS_E1HMF(bp) && (!bnx2x_stats_arr[i].flags))
+                       continue;
+               num_stats++;
+       }
+       return num_stats;
 }
 
 static void bnx2x_get_ethtool_stats(struct net_device *dev,
                                    struct ethtool_stats *stats, u64 *buf)
 {
        struct bnx2x *bp = netdev_priv(dev);
-       u32 *hw_stats = (u32 *)bnx2x_sp_check(bp, eth_stats);
-       int i;
+       u32 *hw_stats = (u32 *)&bp->eth_stats;
+       int i, j;
 
-       for (i = 0; i < BNX2X_NUM_STATS; i++) {
-               if (bnx2x_stats_len_arr[i] == 0) {
-                       /* skip this counter */
-                       buf[i] = 0;
+       for (i = 0, j = 0; i < BNX2X_NUM_STATS; i++) {
+               if (IS_E1HMF(bp) && (!bnx2x_stats_arr[i].flags))
                        continue;
-               }
-               if (!hw_stats) {
-                       buf[i] = 0;
+
+               if (bnx2x_stats_arr[i].size == 0) {
+                       /* skip this counter */
+                       buf[j] = 0;
+                       j++;
                        continue;
                }
-               if (bnx2x_stats_len_arr[i] == 4) {
+               if (bnx2x_stats_arr[i].size == 4) {
                        /* 4-byte counter */
-                      buf[i] = (u64) *(hw_stats + bnx2x_stats_offset_arr[i]);
+                       buf[j] = (u64) *(hw_stats + bnx2x_stats_arr[i].offset);
+                       j++;
                        continue;
                }
                /* 8-byte counter */
-               buf[i] = HILO_U64(*(hw_stats + bnx2x_stats_offset_arr[i]),
-                                *(hw_stats + bnx2x_stats_offset_arr[i] + 1));
+               buf[j] = HILO_U64(*(hw_stats + bnx2x_stats_arr[i].offset),
+                                 *(hw_stats + bnx2x_stats_arr[i].offset + 1));
+               j++;
        }
 }
 
@@ -7515,38 +9002,40 @@ static int bnx2x_phys_id(struct net_device *dev, u32 data)
 }
 
 static struct ethtool_ops bnx2x_ethtool_ops = {
-       .get_settings           = bnx2x_get_settings,
-       .set_settings           = bnx2x_set_settings,
-       .get_drvinfo            = bnx2x_get_drvinfo,
+       .get_settings           = bnx2x_get_settings,
+       .set_settings           = bnx2x_set_settings,
+       .get_drvinfo            = bnx2x_get_drvinfo,
        .get_wol                = bnx2x_get_wol,
        .set_wol                = bnx2x_set_wol,
-       .get_msglevel           = bnx2x_get_msglevel,
-       .set_msglevel           = bnx2x_set_msglevel,
-       .nway_reset             = bnx2x_nway_reset,
-       .get_link               = ethtool_op_get_link,
-       .get_eeprom_len         = bnx2x_get_eeprom_len,
-       .get_eeprom             = bnx2x_get_eeprom,
-       .set_eeprom             = bnx2x_set_eeprom,
-       .get_coalesce           = bnx2x_get_coalesce,
-       .set_coalesce           = bnx2x_set_coalesce,
-       .get_ringparam          = bnx2x_get_ringparam,
-       .set_ringparam          = bnx2x_set_ringparam,
-       .get_pauseparam         = bnx2x_get_pauseparam,
-       .set_pauseparam         = bnx2x_set_pauseparam,
-       .get_rx_csum            = bnx2x_get_rx_csum,
-       .set_rx_csum            = bnx2x_set_rx_csum,
-       .get_tx_csum            = ethtool_op_get_tx_csum,
-       .set_tx_csum            = ethtool_op_set_tx_csum,
-       .get_sg                 = ethtool_op_get_sg,
-       .set_sg                 = ethtool_op_set_sg,
+       .get_msglevel           = bnx2x_get_msglevel,
+       .set_msglevel           = bnx2x_set_msglevel,
+       .nway_reset             = bnx2x_nway_reset,
+       .get_link               = ethtool_op_get_link,
+       .get_eeprom_len         = bnx2x_get_eeprom_len,
+       .get_eeprom             = bnx2x_get_eeprom,
+       .set_eeprom             = bnx2x_set_eeprom,
+       .get_coalesce           = bnx2x_get_coalesce,
+       .set_coalesce           = bnx2x_set_coalesce,
+       .get_ringparam          = bnx2x_get_ringparam,
+       .set_ringparam          = bnx2x_set_ringparam,
+       .get_pauseparam         = bnx2x_get_pauseparam,
+       .set_pauseparam         = bnx2x_set_pauseparam,
+       .get_rx_csum            = bnx2x_get_rx_csum,
+       .set_rx_csum            = bnx2x_set_rx_csum,
+       .get_tx_csum            = ethtool_op_get_tx_csum,
+       .set_tx_csum            = ethtool_op_set_tx_hw_csum,
+       .set_flags              = bnx2x_set_flags,
+       .get_flags              = ethtool_op_get_flags,
+       .get_sg                 = ethtool_op_get_sg,
+       .set_sg                 = ethtool_op_set_sg,
        .get_tso                = ethtool_op_get_tso,
        .set_tso                = bnx2x_set_tso,
        .self_test_count        = bnx2x_self_test_count,
-       .self_test              = bnx2x_self_test,
-       .get_strings            = bnx2x_get_strings,
+       .self_test              = bnx2x_self_test,
+       .get_strings            = bnx2x_get_strings,
        .phys_id                = bnx2x_phys_id,
        .get_stats_count        = bnx2x_get_stats_count,
-       .get_ethtool_stats      = bnx2x_get_ethtool_stats
+       .get_ethtool_stats      = bnx2x_get_ethtool_stats,
 };
 
 /* end of ethtool_ops */
@@ -7640,9 +9129,180 @@ poll_panic:
        return work_done;
 }
 
-/* Called with netif_tx_lock.
+
+/* we split the first BD into headers and data BDs
+ * to ease the pain of our fellow micocode engineers
+ * we use one mapping for both BDs
+ * So far this has only been observed to happen
+ * in Other Operating Systems(TM)
+ */
+static noinline u16 bnx2x_tx_split(struct bnx2x *bp,
+                                  struct bnx2x_fastpath *fp,
+                                  struct eth_tx_bd **tx_bd, u16 hlen,
+                                  u16 bd_prod, int nbd)
+{
+       struct eth_tx_bd *h_tx_bd = *tx_bd;
+       struct eth_tx_bd *d_tx_bd;
+       dma_addr_t mapping;
+       int old_len = le16_to_cpu(h_tx_bd->nbytes);
+
+       /* first fix first BD */
+       h_tx_bd->nbd = cpu_to_le16(nbd);
+       h_tx_bd->nbytes = cpu_to_le16(hlen);
+
+       DP(NETIF_MSG_TX_QUEUED, "TSO split header size is %d "
+          "(%x:%x) nbd %d\n", h_tx_bd->nbytes, h_tx_bd->addr_hi,
+          h_tx_bd->addr_lo, h_tx_bd->nbd);
+
+       /* now get a new data BD
+        * (after the pbd) and fill it */
+       bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
+       d_tx_bd = &fp->tx_desc_ring[bd_prod];
+
+       mapping = HILO_U64(le32_to_cpu(h_tx_bd->addr_hi),
+                          le32_to_cpu(h_tx_bd->addr_lo)) + hlen;
+
+       d_tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
+       d_tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
+       d_tx_bd->nbytes = cpu_to_le16(old_len - hlen);
+       d_tx_bd->vlan = 0;
+       /* this marks the BD as one that has no individual mapping
+        * the FW ignores this flag in a BD not marked start
+        */
+       d_tx_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_SW_LSO;
+       DP(NETIF_MSG_TX_QUEUED,
+          "TSO split data size is %d (%x:%x)\n",
+          d_tx_bd->nbytes, d_tx_bd->addr_hi, d_tx_bd->addr_lo);
+
+       /* update tx_bd for marking the last BD flag */
+       *tx_bd = d_tx_bd;
+
+       return bd_prod;
+}
+
+static inline u16 bnx2x_csum_fix(unsigned char *t_header, u16 csum, s8 fix)
+{
+       if (fix > 0)
+               csum = (u16) ~csum_fold(csum_sub(csum,
+                               csum_partial(t_header - fix, fix, 0)));
+
+       else if (fix < 0)
+               csum = (u16) ~csum_fold(csum_add(csum,
+                               csum_partial(t_header, -fix, 0)));
+
+       return swab16(csum);
+}
+
+static inline u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
+{
+       u32 rc;
+
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               rc = XMIT_PLAIN;
+
+       else {
+               if (skb->protocol == ntohs(ETH_P_IPV6)) {
+                       rc = XMIT_CSUM_V6;
+                       if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
+                               rc |= XMIT_CSUM_TCP;
+
+               } else {
+                       rc = XMIT_CSUM_V4;
+                       if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+                               rc |= XMIT_CSUM_TCP;
+               }
+       }
+
+       if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+               rc |= XMIT_GSO_V4;
+
+       else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+               rc |= XMIT_GSO_V6;
+
+       return rc;
+}
+
+/* check if packet requires linearization (packet is too fragmented) */
+static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
+                            u32 xmit_type)
+{
+       int to_copy = 0;
+       int hlen = 0;
+       int first_bd_sz = 0;
+
+       /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */
+       if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) {
+
+               if (xmit_type & XMIT_GSO) {
+                       unsigned short lso_mss = skb_shinfo(skb)->gso_size;
+                       /* Check if LSO packet needs to be copied:
+                          3 = 1 (for headers BD) + 2 (for PBD and last BD) */
+                       int wnd_size = MAX_FETCH_BD - 3;
+                       /* Number of widnows to check */
+                       int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size;
+                       int wnd_idx = 0;
+                       int frag_idx = 0;
+                       u32 wnd_sum = 0;
+
+                       /* Headers length */
+                       hlen = (int)(skb_transport_header(skb) - skb->data) +
+                               tcp_hdrlen(skb);
+
+                       /* Amount of data (w/o headers) on linear part of SKB*/
+                       first_bd_sz = skb_headlen(skb) - hlen;
+
+                       wnd_sum  = first_bd_sz;
+
+                       /* Calculate the first sum - it's special */
+                       for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++)
+                               wnd_sum +=
+                                       skb_shinfo(skb)->frags[frag_idx].size;
+
+                       /* If there was data on linear skb data - check it */
+                       if (first_bd_sz > 0) {
+                               if (unlikely(wnd_sum < lso_mss)) {
+                                       to_copy = 1;
+                                       goto exit_lbl;
+                               }
+
+                               wnd_sum -= first_bd_sz;
+                       }
+
+                       /* Others are easier: run through the frag list and
+                          check all windows */
+                       for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) {
+                               wnd_sum +=
+                         skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1].size;
+
+                               if (unlikely(wnd_sum < lso_mss)) {
+                                       to_copy = 1;
+                                       break;
+                               }
+                               wnd_sum -=
+                                       skb_shinfo(skb)->frags[wnd_idx].size;
+                       }
+
+               } else {
+                       /* in non-LSO too fragmented packet should always
+                          be linearized */
+                       to_copy = 1;
+               }
+       }
+
+exit_lbl:
+       if (unlikely(to_copy))
+               DP(NETIF_MSG_TX_QUEUED,
+                  "Linearization IS REQUIRED for %s packet. "
+                  "num_frags %d  hlen %d  first_bd_sz %d\n",
+                  (xmit_type & XMIT_GSO) ? "LSO" : "non-LSO",
+                  skb_shinfo(skb)->nr_frags, hlen, first_bd_sz);
+
+       return to_copy;
+}
+
+/* called with netif_tx_lock
  * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
- * netif_wake_queue().
+ * netif_wake_queue()
  */
 static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -7652,39 +9312,60 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct eth_tx_bd *tx_bd;
        struct eth_tx_parse_bd *pbd = NULL;
        u16 pkt_prod, bd_prod;
-       int nbd, fp_index = 0;
+       int nbd, fp_index;
        dma_addr_t mapping;
+       u32 xmit_type = bnx2x_xmit_type(bp, skb);
+       int vlan_off = (bp->e1hov ? 4 : 0);
+       int i;
+       u8 hlen = 0;
 
 #ifdef BNX2X_STOP_ON_ERROR
        if (unlikely(bp->panic))
                return NETDEV_TX_BUSY;
 #endif
 
-       fp_index = smp_processor_id() % (bp->num_queues);
-
+       fp_index = (smp_processor_id() % bp->num_queues);
        fp = &bp->fp[fp_index];
+
        if (unlikely(bnx2x_tx_avail(bp->fp) <
                                        (skb_shinfo(skb)->nr_frags + 3))) {
-               bp->slowpath->eth_stats.driver_xoff++,
+               bp->eth_stats.driver_xoff++,
                netif_stop_queue(dev);
                BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
                return NETDEV_TX_BUSY;
        }
 
+       DP(NETIF_MSG_TX_QUEUED, "SKB: summed %x  protocol %x  protocol(%x,%x)"
+          "  gso type %x  xmit_type %x\n",
+          skb->ip_summed, skb->protocol, ipv6_hdr(skb)->nexthdr,
+          ip_hdr(skb)->protocol, skb_shinfo(skb)->gso_type, xmit_type);
+
+       /* First, check if we need to linearaize the skb
+          (due to FW restrictions) */
+       if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) {
+               /* Statistics of linearization */
+               bp->lin_cnt++;
+               if (skb_linearize(skb) != 0) {
+                       DP(NETIF_MSG_TX_QUEUED, "SKB linearization failed - "
+                          "silently dropping this SKB\n");
+                       dev_kfree_skb_any(skb);
+                       return 0;
+               }
+       }
+
        /*
-       This is a bit ugly. First we use one BD which we mark as start,
+       Please read carefully. First we use one BD which we mark as start,
        then for TSO or xsum we have a parsing info BD,
-       and only then we have the rest of the TSO bds.
+       and only then we have the rest of the TSO BDs.
        (don't forget to mark the last one as last,
        and to unmap only AFTER you write to the BD ...)
-       I would like to thank DovH for this mess.
+       And above all, all pdb sizes are in words - NOT DWORDS!
        */
 
        pkt_prod = fp->tx_pkt_prod++;
-       bd_prod = fp->tx_bd_prod;
-       bd_prod = TX_BD(bd_prod);
+       bd_prod = TX_BD(fp->tx_bd_prod);
 
-       /* get a tx_buff and first bd */
+       /* get a tx_buf and first BD */
        tx_buf = &fp->tx_buf_ring[TX_BD(pkt_prod)];
        tx_bd = &fp->tx_desc_ring[bd_prod];
 
@@ -7693,65 +9374,80 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
                               ETH_TX_BD_ETH_ADDR_TYPE_SHIFT);
        tx_bd->general_data |= 1; /* header nbd */
 
-       /* remember the first bd of the packet */
-       tx_buf->first_bd = bd_prod;
+       /* remember the first BD of the packet */
+       tx_buf->first_bd = fp->tx_bd_prod;
+       tx_buf->skb = skb;
 
        DP(NETIF_MSG_TX_QUEUED,
           "sending pkt %u @%p  next_idx %u  bd %u @%p\n",
           pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, tx_bd);
 
-       if (skb->ip_summed == CHECKSUM_PARTIAL) {
-               struct iphdr *iph = ip_hdr(skb);
-               u8 len;
+       if ((bp->vlgrp != NULL) && vlan_tx_tag_present(skb)) {
+               tx_bd->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
+               tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_VLAN_TAG;
+               vlan_off += 4;
+       } else
+               tx_bd->vlan = cpu_to_le16(pkt_prod);
 
-               tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IP_CSUM;
+       if (xmit_type) {
 
-               /* turn on parsing and get a bd */
+               /* turn on parsing and get a BD */
                bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
                pbd = (void *)&fp->tx_desc_ring[bd_prod];
-               len = ((u8 *)iph - (u8 *)skb->data) / 2;
+
+               memset(pbd, 0, sizeof(struct eth_tx_parse_bd));
+       }
+
+       if (xmit_type & XMIT_CSUM) {
+               hlen = (skb_network_header(skb) - skb->data + vlan_off) / 2;
 
                /* for now NS flag is not used in Linux */
-               pbd->global_data = (len |
+               pbd->global_data = (hlen |
                                    ((skb->protocol == ntohs(ETH_P_8021Q)) <<
                                     ETH_TX_PARSE_BD_LLC_SNAP_EN_SHIFT));
-               pbd->ip_hlen = ip_hdrlen(skb) / 2;
-               pbd->total_hlen = cpu_to_le16(len + pbd->ip_hlen);
-               if (iph->protocol == IPPROTO_TCP) {
-                       struct tcphdr *th = tcp_hdr(skb);
 
-                       tx_bd->bd_flags.as_bitfield |=
-                                               ETH_TX_BD_FLAGS_TCP_CSUM;
-                       pbd->tcp_flags = pbd_tcp_flags(skb);
-                       pbd->total_hlen += cpu_to_le16(tcp_hdrlen(skb) / 2);
-                       pbd->tcp_pseudo_csum = swab16(th->check);
+               pbd->ip_hlen = (skb_transport_header(skb) -
+                               skb_network_header(skb)) / 2;
+
+               hlen += pbd->ip_hlen + tcp_hdrlen(skb) / 2;
+
+               pbd->total_hlen = cpu_to_le16(hlen);
+               hlen = hlen*2 - vlan_off;
 
-               } else if (iph->protocol == IPPROTO_UDP) {
-                       struct udphdr *uh = udp_hdr(skb);
+               tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_TCP_CSUM;
 
+               if (xmit_type & XMIT_CSUM_V4)
                        tx_bd->bd_flags.as_bitfield |=
-                                               ETH_TX_BD_FLAGS_TCP_CSUM;
-                       pbd->total_hlen += cpu_to_le16(4);
+                                               ETH_TX_BD_FLAGS_IP_CSUM;
+               else
+                       tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IPV6;
+
+               if (xmit_type & XMIT_CSUM_TCP) {
+                       pbd->tcp_pseudo_csum = swab16(tcp_hdr(skb)->check);
+
+               } else {
+                       s8 fix = SKB_CS_OFF(skb); /* signed! */
+
                        pbd->global_data |= ETH_TX_PARSE_BD_CS_ANY_FLG;
-                       pbd->cs_offset = 5; /* 10 >> 1 */
-                       pbd->tcp_pseudo_csum = 0;
-                       /* HW bug: we need to subtract 10 bytes before the
-                        * UDP header from the csum
-                        */
-                       uh->check = (u16) ~csum_fold(csum_sub(uh->check,
-                               csum_partial(((u8 *)(uh)-10), 10, 0)));
-               }
-       }
+                       pbd->cs_offset = fix / 2;
 
-       if ((bp->vlgrp != NULL) && vlan_tx_tag_present(skb)) {
-               tx_bd->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
-               tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_VLAN_TAG;
-       } else {
-               tx_bd->vlan = cpu_to_le16(pkt_prod);
+                       DP(NETIF_MSG_TX_QUEUED,
+                          "hlen %d  offset %d  fix %d  csum before fix %x\n",
+                          le16_to_cpu(pbd->total_hlen), pbd->cs_offset, fix,
+                          SKB_CS(skb));
+
+                       /* HW bug: fixup the CSUM */
+                       pbd->tcp_pseudo_csum =
+                               bnx2x_csum_fix(skb_transport_header(skb),
+                                              SKB_CS(skb), fix);
+
+                       DP(NETIF_MSG_TX_QUEUED, "csum after fix %x\n",
+                          pbd->tcp_pseudo_csum);
+               }
        }
 
        mapping = pci_map_single(bp->pdev, skb->data,
-                                skb->len, PCI_DMA_TODEVICE);
+                                skb_headlen(skb), PCI_DMA_TODEVICE);
 
        tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
        tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
@@ -7760,13 +9456,12 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
        tx_bd->nbytes = cpu_to_le16(skb_headlen(skb));
 
        DP(NETIF_MSG_TX_QUEUED, "first bd @%p  addr (%x:%x)  nbd %d"
-          "  nbytes %d  flags %x  vlan %u\n",
-          tx_bd, tx_bd->addr_hi, tx_bd->addr_lo, tx_bd->nbd,
-          tx_bd->nbytes, tx_bd->bd_flags.as_bitfield, tx_bd->vlan);
+          "  nbytes %d  flags %x  vlan %x\n",
+          tx_bd, tx_bd->addr_hi, tx_bd->addr_lo, le16_to_cpu(tx_bd->nbd),
+          le16_to_cpu(tx_bd->nbytes), tx_bd->bd_flags.as_bitfield,
+          le16_to_cpu(tx_bd->vlan));
 
-       if (skb_shinfo(skb)->gso_size &&
-           (skb->len > (bp->dev->mtu + ETH_HLEN))) {
-               int hlen = 2 * le16_to_cpu(pbd->total_hlen);
+       if (xmit_type & XMIT_GSO) {
 
                DP(NETIF_MSG_TX_QUEUED,
                   "TSO packet len %d  hlen %d  total len %d  tso size %d\n",
@@ -7775,99 +9470,60 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
                tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_SW_LSO;
 
-               if (tx_bd->nbytes > cpu_to_le16(hlen)) {
-                       /* we split the first bd into headers and data bds
-                        * to ease the pain of our fellow micocode engineers
-                        * we use one mapping for both bds
-                        * So far this has only been observed to happen
-                        * in Other Operating Systems(TM)
-                        */
-
-                       /* first fix first bd */
-                       nbd++;
-                       tx_bd->nbd = cpu_to_le16(nbd);
-                       tx_bd->nbytes = cpu_to_le16(hlen);
-
-                       /* we only print this as an error
-                        * because we don't think this will ever happen.
-                        */
-                       BNX2X_ERR("TSO split header size is %d (%x:%x)"
-                                 "  nbd %d\n", tx_bd->nbytes, tx_bd->addr_hi,
-                                 tx_bd->addr_lo, tx_bd->nbd);
-
-                       /* now get a new data bd
-                        * (after the pbd) and fill it */
-                       bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
-                       tx_bd = &fp->tx_desc_ring[bd_prod];
-
-                       tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
-                       tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping) + hlen);
-                       tx_bd->nbytes = cpu_to_le16(skb_headlen(skb) - hlen);
-                       tx_bd->vlan = cpu_to_le16(pkt_prod);
-                       /* this marks the bd
-                        * as one that has no individual mapping
-                        * the FW ignores this flag in a bd not marked start
-                        */
-                       tx_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_SW_LSO;
-                       DP(NETIF_MSG_TX_QUEUED,
-                          "TSO split data size is %d (%x:%x)\n",
-                          tx_bd->nbytes, tx_bd->addr_hi, tx_bd->addr_lo);
-               }
-
-               if (!pbd) {
-                       /* supposed to be unreached
-                        * (and therefore not handled properly...)
-                        */
-                       BNX2X_ERR("LSO with no PBD\n");
-                       BUG();
-               }
+               if (unlikely(skb_headlen(skb) > hlen))
+                       bd_prod = bnx2x_tx_split(bp, fp, &tx_bd, hlen,
+                                                bd_prod, ++nbd);
 
                pbd->lso_mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
                pbd->tcp_send_seq = swab32(tcp_hdr(skb)->seq);
-               pbd->ip_id = swab16(ip_hdr(skb)->id);
-               pbd->tcp_pseudo_csum =
+               pbd->tcp_flags = pbd_tcp_flags(skb);
+
+               if (xmit_type & XMIT_GSO_V4) {
+                       pbd->ip_id = swab16(ip_hdr(skb)->id);
+                       pbd->tcp_pseudo_csum =
                                swab16(~csum_tcpudp_magic(ip_hdr(skb)->saddr,
                                                          ip_hdr(skb)->daddr,
                                                          0, IPPROTO_TCP, 0));
+
+               } else
+                       pbd->tcp_pseudo_csum =
+                               swab16(~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+                                                       &ipv6_hdr(skb)->daddr,
+                                                       0, IPPROTO_TCP, 0));
+
                pbd->global_data |= ETH_TX_PARSE_BD_PSEUDO_CS_WITHOUT_LEN;
        }
 
-       {
-               int i;
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+               bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
+               tx_bd = &fp->tx_desc_ring[bd_prod];
 
-                       bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
-                       tx_bd = &fp->tx_desc_ring[bd_prod];
+               mapping = pci_map_page(bp->pdev, frag->page, frag->page_offset,
+                                      frag->size, PCI_DMA_TODEVICE);
 
-                       mapping = pci_map_page(bp->pdev, frag->page,
-                                              frag->page_offset,
-                                              frag->size, PCI_DMA_TODEVICE);
+               tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
+               tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
+               tx_bd->nbytes = cpu_to_le16(frag->size);
+               tx_bd->vlan = cpu_to_le16(pkt_prod);
+               tx_bd->bd_flags.as_bitfield = 0;
 
-                       tx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
-                       tx_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
-                       tx_bd->nbytes = cpu_to_le16(frag->size);
-                       tx_bd->vlan = cpu_to_le16(pkt_prod);
-                       tx_bd->bd_flags.as_bitfield = 0;
-                       DP(NETIF_MSG_TX_QUEUED, "frag %d  bd @%p"
-                          "  addr (%x:%x)  nbytes %d  flags %x\n",
-                          i, tx_bd, tx_bd->addr_hi, tx_bd->addr_lo,
-                          tx_bd->nbytes, tx_bd->bd_flags.as_bitfield);
-               } /* for */
+               DP(NETIF_MSG_TX_QUEUED,
+                  "frag %d  bd @%p  addr (%x:%x)  nbytes %d  flags %x\n",
+                  i, tx_bd, tx_bd->addr_hi, tx_bd->addr_lo,
+                  le16_to_cpu(tx_bd->nbytes), tx_bd->bd_flags.as_bitfield);
        }
 
-       /* now at last mark the bd as the last bd */
+       /* now at last mark the BD as the last BD */
        tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_END_BD;
 
        DP(NETIF_MSG_TX_QUEUED, "last bd @%p  flags %x\n",
           tx_bd, tx_bd->bd_flags.as_bitfield);
 
-       tx_buf->skb = skb;
-
        bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
 
-       /* now send a tx doorbell, counting the next bd
+       /* now send a tx doorbell, counting the next BD
         * if the packet contains or ends with it
         */
        if (TX_BD_POFF(bd_prod) < nbd)
@@ -7879,25 +9535,25 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
                   "  tcp_flags %x  xsum %x  seq %u  hlen %u\n",
                   pbd, pbd->global_data, pbd->ip_hlen, pbd->ip_id,
                   pbd->lso_mss, pbd->tcp_flags, pbd->tcp_pseudo_csum,
-                  pbd->tcp_send_seq, pbd->total_hlen);
+                  pbd->tcp_send_seq, le16_to_cpu(pbd->total_hlen));
 
-       DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %u  bd %d\n", nbd, bd_prod);
+       DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d  bd %u\n", nbd, bd_prod);
 
        fp->hw_tx_prods->bds_prod =
                cpu_to_le16(le16_to_cpu(fp->hw_tx_prods->bds_prod) + nbd);
        mb(); /* FW restriction: must not reorder writing nbd and packets */
        fp->hw_tx_prods->packets_prod =
                cpu_to_le32(le32_to_cpu(fp->hw_tx_prods->packets_prod) + 1);
-       DOORBELL(bp, fp_index, 0);
+       DOORBELL(bp, FP_IDX(fp), 0);
 
        mmiowb();
 
-       fp->tx_bd_prod = bd_prod;
+       fp->tx_bd_prod += nbd;
        dev->trans_start = jiffies;
 
        if (unlikely(bnx2x_tx_avail(fp) < MAX_SKB_FRAGS + 3)) {
                netif_stop_queue(dev);
-               bp->slowpath->eth_stats.driver_xoff++;
+               bp->eth_stats.driver_xoff++;
                if (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3)
                        netif_wake_queue(dev);
        }
@@ -7906,26 +9562,26 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
        return NETDEV_TX_OK;
 }
 
-/* Called with rtnl_lock */
+/* called with rtnl_lock */
 static int bnx2x_open(struct net_device *dev)
 {
        struct bnx2x *bp = netdev_priv(dev);
 
        bnx2x_set_power_state(bp, PCI_D0);
 
-       return bnx2x_nic_load(bp, 1);
+       return bnx2x_nic_load(bp, LOAD_OPEN);
 }
 
-/* Called with rtnl_lock */
+/* called with rtnl_lock */
 static int bnx2x_close(struct net_device *dev)
 {
        struct bnx2x *bp = netdev_priv(dev);
 
        /* Unload the driver, release IRQs */
-       bnx2x_nic_unload(bp, 1);
-
-       if (!CHIP_REV_IS_SLOW(bp))
-               bnx2x_set_power_state(bp, PCI_D3hot);
+       bnx2x_nic_unload(bp, UNLOAD_CLOSE);
+       if (atomic_read(&bp->pdev->enable_cnt) == 1)
+               if (!CHIP_REV_IS_SLOW(bp))
+                       bnx2x_set_power_state(bp, PCI_D3hot);
 
        return 0;
 }
@@ -8320,10 +9976,7 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev,
        dev->features |= (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX);
 #endif
        dev->features |= (NETIF_F_TSO | NETIF_F_TSO_ECN);
-
-       bp->timer_interval = HZ;
-       bp->current_interval = (poll ? poll : HZ);
-
+       dev->features |= NETIF_F_TSO6;
 
        return 0;
 
@@ -8528,13 +10181,102 @@ static int bnx2x_resume(struct pci_dev *pdev)
        return rc;
 }
 
+/**
+ * bnx2x_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t bnx2x_io_error_detected(struct pci_dev *pdev,
+                                               pci_channel_state_t state)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct bnx2x *bp = netdev_priv(dev);
+
+       rtnl_lock();
+
+       netif_device_detach(dev);
+
+       if (netif_running(dev))
+               bnx2x_nic_unload(bp, UNLOAD_CLOSE);
+
+       pci_disable_device(pdev);
+
+       rtnl_unlock();
+
+       /* Request a slot reset */
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * bnx2x_io_slot_reset - called after the PCI bus has been reset
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ */
+static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct bnx2x *bp = netdev_priv(dev);
+
+       rtnl_lock();
+
+       if (pci_enable_device(pdev)) {
+               dev_err(&pdev->dev,
+                       "Cannot re-enable PCI device after reset\n");
+               rtnl_unlock();
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       pci_set_master(pdev);
+       pci_restore_state(pdev);
+
+       if (netif_running(dev))
+               bnx2x_set_power_state(bp, PCI_D0);
+
+       rtnl_unlock();
+
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * bnx2x_io_resume - called when traffic can start flowing again
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation.
+ */
+static void bnx2x_io_resume(struct pci_dev *pdev)
+{
+       struct net_device *dev = pci_get_drvdata(pdev);
+       struct bnx2x *bp = netdev_priv(dev);
+
+       rtnl_lock();
+
+       if (netif_running(dev))
+               bnx2x_nic_load(bp, LOAD_OPEN);
+
+       netif_device_attach(dev);
+
+       rtnl_unlock();
+}
+
+static struct pci_error_handlers bnx2x_err_handler = {
+       .error_detected = bnx2x_io_error_detected,
+       .slot_reset = bnx2x_io_slot_reset,
+       .resume = bnx2x_io_resume,
+};
+
 static struct pci_driver bnx2x_pci_driver = {
-       .name       = DRV_MODULE_NAME,
-       .id_table   = bnx2x_pci_tbl,
-       .probe      = bnx2x_init_one,
-       .remove     = __devexit_p(bnx2x_remove_one),
-       .suspend    = bnx2x_suspend,
-       .resume     = bnx2x_resume,
+       .name        = DRV_MODULE_NAME,
+       .id_table    = bnx2x_pci_tbl,
+       .probe       = bnx2x_init_one,
+       .remove      = __devexit_p(bnx2x_remove_one),
+       .suspend     = bnx2x_suspend,
+       .resume      = bnx2x_resume,
+       .err_handler = &bnx2x_err_handler,
 };
 
 static int __init bnx2x_init(void)