#include <net/checksum.h>
#include <asm/irq.h>
+#include <asm/firmware.h>
#include "pasemi_mac.h"
+/* We have our own align, since ppc64 in general has it at 0 because
+ * of design flaws in some of the server bridge chips. However, for
+ * PWRficient doing the unaligned copies is more expensive than doing
+ * unaligned DMA, so make sure the data is aligned instead.
+ */
+#define LOCAL_SKB_ALIGN 2
/* TODO list
*
- * - Get rid of pci_{read,write}_config(), map registers with ioremap
- * for performance
- * - PHY support
* - Multicast support
* - Large MTU support
- * - Other performance improvements
+ * - SW LRO
+ * - Multiqueue RX/TX
*/
/* Must be a power of two */
-#define RX_RING_SIZE 512
-#define TX_RING_SIZE 512
+#define RX_RING_SIZE 4096
+#define TX_RING_SIZE 4096
#define DEFAULT_MSG_ENABLE \
(NETIF_MSG_DRV | \
static struct pasdma_status *dma_status;
+static int translation_enabled(void)
+{
+#if defined(CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE)
+ return 1;
+#else
+ return firmware_has_feature(FW_FEATURE_LPAR);
+#endif
+}
+
static void write_iob_reg(struct pasemi_mac *mac, unsigned int reg,
unsigned int val)
{
struct pasemi_mac_rxring *ring;
struct pasemi_mac *mac = netdev_priv(dev);
int chan_id = mac->dma_rxch;
+ unsigned int cfg;
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
PAS_DMA_RXCHAN_BASEU_BRBH(ring->dma >> 32) |
PAS_DMA_RXCHAN_BASEU_SIZ(RX_RING_SIZE >> 3));
- write_dma_reg(mac, PAS_DMA_RXCHAN_CFG(chan_id),
- PAS_DMA_RXCHAN_CFG_HBU(2));
+ cfg = PAS_DMA_RXCHAN_CFG_HBU(2);
+
+ if (translation_enabled())
+ cfg |= PAS_DMA_RXCHAN_CFG_CTR;
+
+ write_dma_reg(mac, PAS_DMA_RXCHAN_CFG(chan_id), cfg);
write_dma_reg(mac, PAS_DMA_RXINT_BASEL(mac->dma_if),
- PAS_DMA_RXINT_BASEL_BRBL(__pa(ring->buffers)));
+ PAS_DMA_RXINT_BASEL_BRBL(ring->buf_dma));
write_dma_reg(mac, PAS_DMA_RXINT_BASEU(mac->dma_if),
- PAS_DMA_RXINT_BASEU_BRBH(__pa(ring->buffers) >> 32) |
+ PAS_DMA_RXINT_BASEU_BRBH(ring->buf_dma >> 32) |
PAS_DMA_RXINT_BASEU_SIZ(RX_RING_SIZE >> 3));
- write_dma_reg(mac, PAS_DMA_RXINT_CFG(mac->dma_if),
- PAS_DMA_RXINT_CFG_DHL(3) |
- PAS_DMA_RXINT_CFG_L2 |
- PAS_DMA_RXINT_CFG_LW);
+ cfg = PAS_DMA_RXINT_CFG_DHL(3) | PAS_DMA_RXINT_CFG_L2 |
+ PAS_DMA_RXINT_CFG_LW | PAS_DMA_RXINT_CFG_RBP |
+ PAS_DMA_RXINT_CFG_HEN;
+
+ if (translation_enabled())
+ cfg |= PAS_DMA_RXINT_CFG_ITRR | PAS_DMA_RXINT_CFG_ITR;
+
+ write_dma_reg(mac, PAS_DMA_RXINT_CFG(mac->dma_if), cfg);
ring->next_to_fill = 0;
ring->next_to_clean = 0;
u32 val;
int chan_id = mac->dma_txch;
struct pasemi_mac_txring *ring;
+ unsigned int cfg;
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (!ring)
write_dma_reg(mac, PAS_DMA_TXCHAN_BASEU(chan_id), val);
- write_dma_reg(mac, PAS_DMA_TXCHAN_CFG(chan_id),
- PAS_DMA_TXCHAN_CFG_TY_IFACE |
- PAS_DMA_TXCHAN_CFG_TATTR(mac->dma_if) |
- PAS_DMA_TXCHAN_CFG_UP |
- PAS_DMA_TXCHAN_CFG_WT(2));
+ cfg = PAS_DMA_TXCHAN_CFG_TY_IFACE |
+ PAS_DMA_TXCHAN_CFG_TATTR(mac->dma_if) |
+ PAS_DMA_TXCHAN_CFG_UP |
+ PAS_DMA_TXCHAN_CFG_WT(2);
+
+ if (translation_enabled())
+ cfg |= PAS_DMA_TXCHAN_CFG_TRD | PAS_DMA_TXCHAN_CFG_TRR;
+
+ write_dma_reg(mac, PAS_DMA_TXCHAN_CFG(chan_id), cfg);
ring->next_to_fill = 0;
ring->next_to_clean = 0;
struct pasemi_mac_buffer *info;
dma_addr_t dmas[MAX_SKB_FRAGS+1];
int freed;
+ int start, limit;
+
+ start = mac->tx->next_to_clean;
+ limit = mac->tx->next_to_fill;
- for (i = 0; i < TX_RING_SIZE; i += freed) {
+ /* Compensate for when fill has wrapped and clean has not */
+ if (start > limit)
+ limit += TX_RING_SIZE;
+
+ for (i = start; i < limit; i += freed) {
info = &TX_RING_INFO(mac, i+1);
if (info->dma && info->skb) {
for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++)
static void pasemi_mac_replenish_rx_ring(struct net_device *dev, int limit)
{
struct pasemi_mac *mac = netdev_priv(dev);
- int start = mac->rx->next_to_fill;
- unsigned int fill, count;
+ int fill, count;
if (limit <= 0)
return;
- fill = start;
+ fill = mac->rx->next_to_fill;
for (count = 0; count < limit; count++) {
struct pasemi_mac_buffer *info = &RX_RING_INFO(mac, fill);
u64 *buff = &RX_BUFF(mac, fill);
/* skb might still be in there for recycle on short receives */
if (info->skb)
skb = info->skb;
- else
+ else {
skb = dev_alloc_skb(BUF_SIZE);
+ skb_reserve(skb, LOCAL_SKB_ALIGN);
+ }
if (unlikely(!skb))
break;
- dma = pci_map_single(mac->dma_pdev, skb->data, BUF_SIZE,
+ dma = pci_map_single(mac->dma_pdev, skb->data,
+ BUF_SIZE - LOCAL_SKB_ALIGN,
PCI_DMA_FROMDEVICE);
if (unlikely(dma_mapping_error(dma))) {
wmb();
- write_dma_reg(mac, PAS_DMA_RXCHAN_INCR(mac->dma_rxch), count);
write_dma_reg(mac, PAS_DMA_RXINT_INCR(mac->dma_if), count);
- mac->rx->next_to_fill += count;
+ mac->rx->next_to_fill = (mac->rx->next_to_fill + count) &
+ (RX_RING_SIZE - 1);
}
static void pasemi_mac_restart_rx_intr(struct pasemi_mac *mac)
int count;
struct pasemi_mac_buffer *info;
struct sk_buff *skb;
- unsigned int i, len;
+ unsigned int len;
u64 macrx;
dma_addr_t dma;
+ int buf_index;
+ u64 eval;
spin_lock(&mac->rx->lock);
n = mac->rx->next_to_clean;
- for (count = limit; count; count--) {
-
- rmb();
+ prefetch(&RX_RING(mac, n));
+ for (count = 0; count < limit; count++) {
macrx = RX_RING(mac, n);
if ((macrx & XCT_MACRX_E) ||
info = NULL;
- /* We have to scan for our skb since there's no way
- * to back-map them from the descriptor, and if we
- * have several receive channels then they might not
- * show up in the same order as they were put on the
- * interface ring.
- */
+ BUG_ON(!(macrx & XCT_MACRX_RR_8BRES));
- dma = (RX_RING(mac, n+1) & XCT_PTR_ADDR_M);
- for (i = mac->rx->next_to_fill;
- i < (mac->rx->next_to_fill + RX_RING_SIZE);
- i++) {
- info = &RX_RING_INFO(mac, i);
- if (info->dma == dma)
- break;
- }
+ eval = (RX_RING(mac, n+1) & XCT_RXRES_8B_EVAL_M) >>
+ XCT_RXRES_8B_EVAL_S;
+ buf_index = eval-1;
- prefetchw(info);
+ dma = (RX_RING(mac, n+2) & XCT_PTR_ADDR_M);
+ info = &RX_RING_INFO(mac, buf_index);
skb = info->skb;
- prefetchw(skb);
- info->dma = 0;
- pci_unmap_single(mac->dma_pdev, dma, skb->len,
- PCI_DMA_FROMDEVICE);
+ prefetch(skb);
+ prefetch(&skb->data_len);
len = (macrx & XCT_MACRX_LLEN_M) >> XCT_MACRX_LLEN_S;
+ pci_unmap_single(mac->dma_pdev, dma, len, PCI_DMA_FROMDEVICE);
+
+ if (macrx & XCT_MACRX_CRC) {
+ /* CRC error flagged */
+ mac->netdev->stats.rx_errors++;
+ mac->netdev->stats.rx_crc_errors++;
+ /* No need to free skb, it'll be reused */
+ goto next;
+ }
+
if (len < 256) {
- struct sk_buff *new_skb =
- netdev_alloc_skb(mac->netdev, len + NET_IP_ALIGN);
+ struct sk_buff *new_skb;
+
+ new_skb = netdev_alloc_skb(mac->netdev,
+ len + LOCAL_SKB_ALIGN);
if (new_skb) {
- skb_reserve(new_skb, NET_IP_ALIGN);
+ skb_reserve(new_skb, LOCAL_SKB_ALIGN);
memcpy(new_skb->data, skb->data, len);
/* save the skb in buffer_info as good */
skb = new_skb;
} else
info->skb = NULL;
- /* Need to zero it out since hardware doesn't, since the
- * replenish loop uses it to tell when it's done.
- */
- RX_BUFF(mac, i) = 0;
+ info->dma = 0;
- skb_put(skb, len);
+ /* Don't include CRC */
+ skb_put(skb, len-4);
if (likely((macrx & XCT_MACRX_HTY_M) == XCT_MACRX_HTY_IPV4_OK)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->protocol = eth_type_trans(skb, mac->netdev);
netif_receive_skb(skb);
+next:
RX_RING(mac, n) = 0;
RX_RING(mac, n+1) = 0;
- n += 2;
+ /* Need to zero it out since hardware doesn't, since the
+ * replenish loop uses it to tell when it's done.
+ */
+ RX_BUFF(mac, buf_index) = 0;
+
+ n += 4;
}
if (n > RX_RING_SIZE) {
write_iob_reg(mac, PAS_IOB_COM_PKTHDRCNT, 0);
n &= (RX_RING_SIZE-1);
}
+
mac->rx->next_to_clean = n;
- pasemi_mac_replenish_rx_ring(mac->netdev, limit-count);
+
+ /* Increase is in number of 16-byte entries, and since each descriptor
+ * with an 8BRES takes up 3x8 bytes (padded to 4x8), increase with
+ * count*2.
+ */
+ write_dma_reg(mac, PAS_DMA_RXCHAN_INCR(mac->dma_rxch), count << 1);
+
+ pasemi_mac_replenish_rx_ring(mac->netdev, count);
spin_unlock(&mac->rx->lock);
static int pasemi_mac_clean_tx(struct pasemi_mac *mac)
{
int i, j;
- struct pasemi_mac_buffer *info;
- unsigned int start, descr_count, buf_count, limit;
+ unsigned int start, descr_count, buf_count, batch_limit;
+ unsigned int ring_limit;
unsigned int total_count;
unsigned long flags;
struct sk_buff *skbs[TX_CLEAN_BATCHSIZE];
dma_addr_t dmas[TX_CLEAN_BATCHSIZE][MAX_SKB_FRAGS+1];
total_count = 0;
- limit = TX_CLEAN_BATCHSIZE;
+ batch_limit = TX_CLEAN_BATCHSIZE;
restart:
spin_lock_irqsave(&mac->tx->lock, flags);
start = mac->tx->next_to_clean;
+ ring_limit = mac->tx->next_to_fill;
+
+ /* Compensate for when fill has wrapped but clean has not */
+ if (start > ring_limit)
+ ring_limit += TX_RING_SIZE;
buf_count = 0;
descr_count = 0;
for (i = start;
- descr_count < limit && i < mac->tx->next_to_fill;
+ descr_count < batch_limit && i < ring_limit;
i += buf_count) {
u64 mactx = TX_RING(mac, i);
+ struct sk_buff *skb;
if ((mactx & XCT_MACTX_E) ||
(*mac->tx_status & PAS_STATUS_ERROR))
/* Not yet transmitted */
break;
- info = &TX_RING_INFO(mac, i+1);
- skbs[descr_count] = info->skb;
+ skb = TX_RING_INFO(mac, i+1).skb;
+ skbs[descr_count] = skb;
- buf_count = 2 + skb_shinfo(info->skb)->nr_frags;
- for (j = 0; j <= skb_shinfo(info->skb)->nr_frags; j++)
+ buf_count = 2 + skb_shinfo(skb)->nr_frags;
+ for (j = 0; j <= skb_shinfo(skb)->nr_frags; j++)
dmas[descr_count][j] = TX_RING_INFO(mac, i+1+j).dma;
-
- info->dma = 0;
TX_RING(mac, i) = 0;
TX_RING(mac, i+1) = 0;
- TX_RING_INFO(mac, i+1).skb = 0;
- TX_RING_INFO(mac, i+1).dma = 0;
/* Since we always fill with an even number of entries, make
* sure we skip any unused one at the end as well.
buf_count++;
descr_count++;
}
- mac->tx->next_to_clean = i;
+ mac->tx->next_to_clean = i & (TX_RING_SIZE-1);
spin_unlock_irqrestore(&mac->tx->lock, flags);
netif_wake_queue(mac->netdev);
total_count += descr_count;
/* If the batch was full, try to clean more */
- if (descr_count == limit)
+ if (descr_count == batch_limit)
goto restart;
return total_count;
/* enable rx if */
write_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if),
- PAS_DMA_RXINT_RCMDSTA_EN);
+ PAS_DMA_RXINT_RCMDSTA_EN |
+ PAS_DMA_RXINT_RCMDSTA_DROPS_M |
+ PAS_DMA_RXINT_RCMDSTA_BP |
+ PAS_DMA_RXINT_RCMDSTA_OO |
+ PAS_DMA_RXINT_RCMDSTA_BT);
/* enable rx channel */
write_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch),
PAS_DMA_RXCHAN_CCMDSTA_EN |
- PAS_DMA_RXCHAN_CCMDSTA_DU);
+ PAS_DMA_RXCHAN_CCMDSTA_DU |
+ PAS_DMA_RXCHAN_CCMDSTA_OD |
+ PAS_DMA_RXCHAN_CCMDSTA_FD |
+ PAS_DMA_RXCHAN_CCMDSTA_DT);
/* enable tx channel */
write_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch),
- PAS_DMA_TXCHAN_TCMDSTA_EN);
+ PAS_DMA_TXCHAN_TCMDSTA_EN |
+ PAS_DMA_TXCHAN_TCMDSTA_SZ |
+ PAS_DMA_TXCHAN_TCMDSTA_DB |
+ PAS_DMA_TXCHAN_TCMDSTA_DE |
+ PAS_DMA_TXCHAN_TCMDSTA_DA);
pasemi_mac_replenish_rx_ring(dev, RX_RING_SIZE);
+ write_dma_reg(mac, PAS_DMA_RXCHAN_INCR(mac->dma_rxch), RX_RING_SIZE>>1);
+
flags = PAS_MAC_CFG_PCFG_S1 | PAS_MAC_CFG_PCFG_PE |
PAS_MAC_CFG_PCFG_PR | PAS_MAC_CFG_PCFG_CE;
static int pasemi_mac_close(struct net_device *dev)
{
struct pasemi_mac *mac = netdev_priv(dev);
- unsigned int stat;
+ unsigned int sta;
int retries;
if (mac->phydev) {
netif_stop_queue(dev);
napi_disable(&mac->napi);
+ sta = read_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
+ if (sta & (PAS_DMA_RXINT_RCMDSTA_BP |
+ PAS_DMA_RXINT_RCMDSTA_OO |
+ PAS_DMA_RXINT_RCMDSTA_BT))
+ printk(KERN_DEBUG "pasemi_mac: rcmdsta error: 0x%08x\n", sta);
+
+ sta = read_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch));
+ if (sta & (PAS_DMA_RXCHAN_CCMDSTA_DU |
+ PAS_DMA_RXCHAN_CCMDSTA_OD |
+ PAS_DMA_RXCHAN_CCMDSTA_FD |
+ PAS_DMA_RXCHAN_CCMDSTA_DT))
+ printk(KERN_DEBUG "pasemi_mac: ccmdsta error: 0x%08x\n", sta);
+
+ sta = read_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch));
+ if (sta & (PAS_DMA_TXCHAN_TCMDSTA_SZ |
+ PAS_DMA_TXCHAN_TCMDSTA_DB |
+ PAS_DMA_TXCHAN_TCMDSTA_DE |
+ PAS_DMA_TXCHAN_TCMDSTA_DA))
+ printk(KERN_DEBUG "pasemi_mac: tcmdsta error: 0x%08x\n", sta);
+
/* Clean out any pending buffers */
pasemi_mac_clean_tx(mac);
pasemi_mac_clean_rx(mac, RX_RING_SIZE);
write_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch), PAS_DMA_RXCHAN_CCMDSTA_ST);
for (retries = 0; retries < MAX_RETRIES; retries++) {
- stat = read_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch));
- if (!(stat & PAS_DMA_TXCHAN_TCMDSTA_ACT))
+ sta = read_dma_reg(mac, PAS_DMA_TXCHAN_TCMDSTA(mac->dma_txch));
+ if (!(sta & PAS_DMA_TXCHAN_TCMDSTA_ACT))
break;
cond_resched();
}
- if (stat & PAS_DMA_TXCHAN_TCMDSTA_ACT)
+ if (sta & PAS_DMA_TXCHAN_TCMDSTA_ACT)
dev_err(&mac->dma_pdev->dev, "Failed to stop tx channel\n");
for (retries = 0; retries < MAX_RETRIES; retries++) {
- stat = read_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch));
- if (!(stat & PAS_DMA_RXCHAN_CCMDSTA_ACT))
+ sta = read_dma_reg(mac, PAS_DMA_RXCHAN_CCMDSTA(mac->dma_rxch));
+ if (!(sta & PAS_DMA_RXCHAN_CCMDSTA_ACT))
break;
cond_resched();
}
- if (stat & PAS_DMA_RXCHAN_CCMDSTA_ACT)
+ if (sta & PAS_DMA_RXCHAN_CCMDSTA_ACT)
dev_err(&mac->dma_pdev->dev, "Failed to stop rx channel\n");
for (retries = 0; retries < MAX_RETRIES; retries++) {
- stat = read_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
- if (!(stat & PAS_DMA_RXINT_RCMDSTA_ACT))
+ sta = read_dma_reg(mac, PAS_DMA_RXINT_RCMDSTA(mac->dma_if));
+ if (!(sta & PAS_DMA_RXINT_RCMDSTA_ACT))
break;
cond_resched();
}
- if (stat & PAS_DMA_RXINT_RCMDSTA_ACT)
+ if (sta & PAS_DMA_RXINT_RCMDSTA_ACT)
dev_err(&mac->dma_pdev->dev, "Failed to stop rx interface\n");
/* Then, disable the channel. This must be done separately from
unsigned long flags;
int i, nfrags;
- dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD;
+ dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_CRC_PAD;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
const unsigned char *nh = skb_network_header(skb);
spin_lock_irqsave(&txring->lock, flags);
- if (RING_AVAIL(txring) <= nfrags+3) {
- spin_unlock_irqrestore(&txring->lock, flags);
- pasemi_mac_clean_tx(mac);
- pasemi_mac_restart_tx_intr(mac);
- spin_lock_irqsave(&txring->lock, flags);
-
- if (RING_AVAIL(txring) <= nfrags+3) {
- /* Still no room -- stop the queue and wait for tx
- * intr when there's room.
- */
- netif_stop_queue(dev);
- goto out_err;
- }
+ /* Avoid stepping on the same cache line that the DMA controller
+ * is currently about to send, so leave at least 8 words available.
+ * Total free space needed is mactx + fragments + 8
+ */
+ if (RING_AVAIL(txring) < nfrags + 10) {
+ /* no room -- stop the queue and wait for tx intr */
+ netif_stop_queue(dev);
+ goto out_err;
}
TX_RING(mac, txring->next_to_fill) = mactx;
if (nfrags & 1)
nfrags++;
- txring->next_to_fill += nfrags + 1;
-
+ txring->next_to_fill = (txring->next_to_fill + nfrags + 1) &
+ (TX_RING_SIZE-1);
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
netif_napi_add(dev, &mac->napi, pasemi_mac_poll, 64);
- dev->features = NETIF_F_HW_CSUM | NETIF_F_LLTX | NETIF_F_SG;
+ dev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX | NETIF_F_SG;
/* These should come out of the device tree eventually */
mac->dma_txch = index;