From 8918ac92eb689cbcb11b62d022055dc35be8307c Mon Sep 17 00:00:00 2001 From: jfv Date: Sat, 27 Mar 2010 00:21:40 +0000 Subject: [PATCH] Update the driver to Intel version 2.1.6 - add some new hardware support for 82599 - Big change to interrupt architecture, it now uses a queue which contains an RX/TX pair as the recipient of the interrupt. This will reduce overall system interrupts/msix usage. - Improved RX mbuf handling: the old get_buf routine is no longer synchronized with rxeof, this allows the elimination of packet discards due to mbuf allocation failure. - Much simplified and improved AIM code, it now happens in the queue interrupt context and takes into account both the traffic on the RX AND TX side. - variety of small tweaks, like ring size, that have been seen as performance improvements. - Thanks to those that provided feedback or suggested changes, I hope I've caught all of them. --- sys/dev/ixgbe/LICENSE | 2 +- sys/dev/ixgbe/ixgbe.c | 1438 +++++++++++++--------------------- sys/dev/ixgbe/ixgbe.h | 162 ++-- sys/dev/ixgbe/ixgbe_82598.c | 84 +- sys/dev/ixgbe/ixgbe_82599.c | 158 +++- sys/dev/ixgbe/ixgbe_api.c | 17 +- sys/dev/ixgbe/ixgbe_api.h | 4 +- sys/dev/ixgbe/ixgbe_common.c | 159 +++- sys/dev/ixgbe/ixgbe_phy.c | 134 +++- sys/dev/ixgbe/ixgbe_phy.h | 10 +- sys/dev/ixgbe/ixgbe_type.h | 80 +- sys/modules/ixgbe/Makefile | 2 +- 12 files changed, 1157 insertions(+), 1093 deletions(-) diff --git a/sys/dev/ixgbe/LICENSE b/sys/dev/ixgbe/LICENSE index 39264e087beb..0cf44c8581e4 100644 --- a/sys/dev/ixgbe/LICENSE +++ b/sys/dev/ixgbe/LICENSE @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index f66443ae77f8..2c303d95ade4 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -46,7 +46,7 @@ int ixgbe_display_debug_stats = 0; /********************************************************************* * Driver version *********************************************************************/ -char ixgbe_driver_version[] = "2.0.7"; +char ixgbe_driver_version[] = "2.1.6"; /********************************************************************* * PCI Device ID Table @@ -76,6 +76,7 @@ static ixgbe_vendor_info_t ixgbe_vendor_info_array[] = {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0}, + {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} @@ -136,12 +137,11 @@ static void ixgbe_free_receive_structures(struct adapter *); static void ixgbe_free_receive_buffers(struct rx_ring *); static void ixgbe_setup_hw_rsc(struct rx_ring *); -static void ixgbe_init_moderation(struct adapter *); static void ixgbe_enable_intr(struct adapter *); static void ixgbe_disable_intr(struct adapter *); static void ixgbe_update_stats_counters(struct adapter *); static bool ixgbe_txeof(struct tx_ring *); -static bool ixgbe_rxeof(struct rx_ring *, int); +static bool ixgbe_rxeof(struct ix_queue *, int); static void ixgbe_rx_checksum(u32, struct mbuf *); static void ixgbe_set_promisc(struct adapter *); static void ixgbe_disable_promisc(struct adapter *); @@ -149,7 +149,7 @@ static void ixgbe_set_multi(struct adapter *); static void ixgbe_print_hw_stats(struct adapter *); static void ixgbe_print_debug_info(struct adapter *); static void ixgbe_update_link_status(struct adapter *); -static int ixgbe_get_buf(struct rx_ring *, int, int); +static void ixgbe_refresh_mbufs(struct rx_ring *, int); static int ixgbe_xmit(struct tx_ring *, struct mbuf **); static int ixgbe_sysctl_stats(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_debug(SYSCTL_HANDLER_ARGS); @@ -169,7 +169,9 @@ static void ixgbe_setup_vlan_hw_support(struct adapter *); static void ixgbe_register_vlan(void *, struct ifnet *, u16); static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); -static void ixgbe_update_aim(struct rx_ring *); +static __inline void ixgbe_rx_discard(struct rx_ring *, int); +static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *, + struct mbuf *, u32); /* Support for pluggable optic modules */ static bool ixgbe_sfp_probe(struct adapter *); @@ -178,13 +180,11 @@ static bool ixgbe_sfp_probe(struct adapter *); static void ixgbe_legacy_irq(void *); /* The MSI/X Interrupt handlers */ -static void ixgbe_msix_tx(void *); -static void ixgbe_msix_rx(void *); +static void ixgbe_msix_que(void *); static void ixgbe_msix_link(void *); /* Deferred interrupt tasklets */ -static void ixgbe_handle_tx(void *, int); -static void ixgbe_handle_rx(void *, int); +static void ixgbe_handle_que(void *, int); static void ixgbe_handle_link(void *, int); static void ixgbe_handle_msf(void *, int); static void ixgbe_handle_mod(void *, int); @@ -222,23 +222,16 @@ MODULE_DEPEND(ixgbe, ether, 1, 1, 1); */ /* -** These parameters are used in Adaptive -** Interrupt Moderation. The value is set -** into EITR and controls the interrupt -** frequency. They can be modified but -** be careful in tuning them. +** AIM: Adaptive Interrupt Moderation +** which means that the interrupt rate +** is varied over time based on the +** traffic for that interrupt vector */ static int ixgbe_enable_aim = TRUE; TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim); -static int ixgbe_low_latency = IXGBE_LOW_LATENCY; -TUNABLE_INT("hw.ixgbe.low_latency", &ixgbe_low_latency); -static int ixgbe_ave_latency = IXGBE_AVE_LATENCY; -TUNABLE_INT("hw.ixgbe.ave_latency", &ixgbe_ave_latency); -static int ixgbe_bulk_latency = IXGBE_BULK_LATENCY; -TUNABLE_INT("hw.ixgbe.bulk_latency", &ixgbe_bulk_latency); /* How many packets rxeof tries to clean at a time */ -static int ixgbe_rx_process_limit = 100; +static int ixgbe_rx_process_limit = 128; TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit); /* Flow control setting, default to full */ @@ -271,20 +264,24 @@ static bool ixgbe_header_split = TRUE; TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split); /* - * Number of Queues, should normally - * be left at 0, it then autoconfigures to - * the number of cpus. Each queue is a pair - * of RX and TX rings with a dedicated interrupt + * Number of Queues, can be set to 0, + * it then autoconfigures based on the + * number of cpus. Each queue is a pair + * of RX and TX rings with a msix vector */ static int ixgbe_num_queues = 0; TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues); -/* Number of TX descriptors per ring */ -static int ixgbe_txd = DEFAULT_TXD; +/* +** Number of TX descriptors per ring, +** setting higher than RX as this seems +** the better performing choice. +*/ +static int ixgbe_txd = PERFORM_TXD; TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd); /* Number of RX descriptors per ring */ -static int ixgbe_rxd = DEFAULT_RXD; +static int ixgbe_rxd = PERFORM_RXD; TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd); /* Keep running tab on them for sanity check */ @@ -420,9 +417,11 @@ ixgbe_attach(device_t dev) case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM : case IXGBE_DEV_ID_82598EB_SFP_LOM : case IXGBE_DEV_ID_82598AT : - case IXGBE_DEV_ID_82598AT2 : adapter->optics = IFM_10G_SR; break; + case IXGBE_DEV_ID_82598AT2 : + adapter->optics = IFM_10G_T; + break; case IXGBE_DEV_ID_82598EB_XF_LR : adapter->optics = IFM_10G_LR; break; @@ -439,6 +438,10 @@ ixgbe_attach(device_t dev) case IXGBE_DEV_ID_82599_XAUI_LOM : case IXGBE_DEV_ID_82599_COMBO_BACKPLANE : ixgbe_num_segs = IXGBE_82599_SCATTER; + break; + case IXGBE_DEV_ID_82599_T3_LOM: + ixgbe_num_segs = IXGBE_82599_SCATTER; + adapter->optics = IFM_10G_T; default: break; } @@ -464,21 +467,6 @@ ixgbe_attach(device_t dev) OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW, &ixgbe_enable_aim, 1, "Interrupt Moderation"); - SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW, - &ixgbe_low_latency, 1, "Low Latency"); - - SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW, - &ixgbe_ave_latency, 1, "Average Latency"); - - SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), - SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW, - &ixgbe_bulk_latency, 1, "Bulk Latency"); - /* Set up the timer callout */ callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); @@ -592,22 +580,6 @@ ixgbe_attach(device_t dev) /* Setup OS specific network interface */ ixgbe_setup_interface(dev, adapter); -#ifdef IXGBE_IEEE1588 - /* - ** Setup the timer: IEEE 1588 support - */ - adapter->cycles.read = ixgbe_read_clock; - adapter->cycles.mask = (u64)-1; - adapter->cycles.mult = 1; - adapter->cycles.shift = IXGBE_TSYNC_SHIFT; - IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (1<<24) | - IXGBE_TSYNC_CYCLE_TIME * IXGBE_TSYNC_SHIFT); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0xFF800000); - - // JFV - this is not complete yet -#endif - /* Sysctl for limiting the amount of work done in the taskqueue */ ixgbe_add_rx_process_limit(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, @@ -632,12 +604,13 @@ ixgbe_attach(device_t dev) (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); - if (hw->bus.width <= ixgbe_bus_width_pcie_x4) { + if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && + (hw->bus.speed == ixgbe_bus_speed_2500)) { device_printf(dev, "PCI-Express bandwidth available" " for this card\n is not sufficient for" " optimal performance.\n"); device_printf(dev, "For optimal performance a x8 " - "PCI-Express slot is required.\n"); + "PCIE, or x4 PCIE 2 slot is required.\n"); } /* let hardware know driver is loaded */ @@ -670,8 +643,7 @@ static int ixgbe_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct ix_queue *que = adapter->queues; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); @@ -686,17 +658,10 @@ ixgbe_detach(device_t dev) ixgbe_stop(adapter); IXGBE_CORE_UNLOCK(adapter); - for (int i = 0; i < adapter->num_queues; i++, txr++) { - if (txr->tq) { - taskqueue_drain(txr->tq, &txr->tx_task); - taskqueue_free(txr->tq); - } - } - - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - if (rxr->tq) { - taskqueue_drain(rxr->tq, &rxr->rx_task); - taskqueue_free(rxr->tq); + for (int i = 0; i < adapter->num_queues; i++, que++) { + if (que->tq) { + taskqueue_drain(que->tq, &que->que_task); + taskqueue_free(que->tq); } } @@ -833,6 +798,9 @@ ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m) /* Which queue to use */ if ((m->m_flags & M_FLOWID) != 0) i = m->m_pkthdr.flowid % adapter->num_queues; + else /* use the cpu we're on */ + i = curcpu % adapter->num_queues; + txr = &adapter->tx_rings[i]; if (IXGBE_TX_TRYLOCK(txr)) { @@ -849,59 +817,43 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) { struct adapter *adapter = txr->adapter; struct mbuf *next; - int err = 0; + int enqueued, err = 0; - if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || - (!adapter->link_active)) { - err = drbr_enqueue(ifp, txr->br, m); + if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING || adapter->link_active == 0) { + if (m != NULL) + err = drbr_enqueue(ifp, txr->br, m); return (err); } - if (m == NULL) /* Called by tasklet */ - goto process; - - /* If nothing queued go right to xmit */ - if (!drbr_needs_enqueue(ifp, txr->br)) { - if ((err = ixgbe_xmit(txr, &m)) != 0) { - if (m != NULL) - err = drbr_enqueue(ifp, txr->br, m); - return (err); - } else { - /* Success, update stats */ - drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags); - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, m); - /* Set the watchdog */ - txr->watchdog_check = TRUE; - } - - } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0) - return (err); - -process: - if (drbr_empty(ifp, txr->br)) - return (err); + enqueued = 0; + if (m == NULL) + next = drbr_dequeue(ifp, txr->br); + else + next = m; /* Process the queue */ - while (TRUE) { - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; - next = drbr_dequeue(ifp, txr->br); - if (next == NULL) - break; + while (next != NULL) { if ((err = ixgbe_xmit(txr, &next)) != 0) { if (next != NULL) err = drbr_enqueue(ifp, txr->br, next); break; } + enqueued++; drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags); + /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); - /* Set the watchdog */ - txr->watchdog_check = TRUE; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD) { + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + break; + } + next = drbr_dequeue(ifp, txr->br); } - - if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD) - ifp->if_drv_flags |= IFF_DRV_OACTIVE; + + if (enqueued > 0) + txr->watchdog_check = TRUE; return (err); } @@ -938,8 +890,8 @@ ixgbe_qflush(struct ifnet *ifp) static int ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { - struct adapter *adapter = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *) data; + struct adapter *adapter = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { @@ -999,8 +951,7 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; - /* Only allow changing when using header split */ - if ((mask & IFCAP_LRO) && (ixgbe_header_split)) + if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; @@ -1010,15 +961,6 @@ ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data) break; } -#ifdef IXGBE_IEEE1588 - /* - ** IOCTL support for Precision Time (IEEE 1588) Support - */ - case SIOCSHWTSTAMP: - error = ixgbe_hwtstamp_ioctl(adapter, ifp); - break; -#endif - default: IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); @@ -1045,15 +987,20 @@ ixgbe_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; - struct ixgbe_hw *hw; + struct ixgbe_hw *hw = &adapter->hw; u32 k, txdctl, mhadd, gpie; u32 rxdctl, rxctrl; int err; - INIT_DEBUGOUT("ixgbe_init: begin"); - - hw = &adapter->hw; mtx_assert(&adapter->core_mtx, MA_OWNED); + INIT_DEBUGOUT("ixgbe_init: begin"); + ixgbe_reset_hw(hw); + hw->adapter_stopped = FALSE; + ixgbe_stop_adapter(hw); + callout_stop(&adapter->timer); + + /* reprogram the RAR[0] in case user changed it. */ + ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr, @@ -1061,9 +1008,6 @@ ixgbe_init_locked(struct adapter *adapter) ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1); hw->addr_ctrl.rar_used_count = 1; - /* Do a warm reset */ - ixgbe_reset_hw(hw); - /* Prepare transmit descriptors and buffers */ if (ixgbe_setup_transmit_structures(adapter)) { device_printf(dev,"Could not setup transmit structures\n"); @@ -1071,6 +1015,7 @@ ixgbe_init_locked(struct adapter *adapter) return; } + ixgbe_init_hw(hw); ixgbe_initialize_transmit_units(adapter); /* Setup Multicast table */ @@ -1095,9 +1040,6 @@ ixgbe_init_locked(struct adapter *adapter) /* Configure RX settings */ ixgbe_initialize_receive_units(adapter); - /* Configure Interrupt Moderation */ - ixgbe_init_moderation(adapter); - gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE); if (hw->mac.type == ixgbe_mac_82599EB) { @@ -1174,7 +1116,7 @@ ixgbe_init_locked(struct adapter *adapter) if (hw->mac.type == ixgbe_mac_82598EB) rxctrl |= IXGBE_RXCTRL_DMBYPS; rxctrl |= IXGBE_RXCTRL_RXEN; - IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl); + ixgbe_enable_rx_dma(hw, rxctrl); callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); @@ -1291,34 +1233,22 @@ ixgbe_rearm_queues(struct adapter *adapter, u64 queues) } } -static void -ixgbe_handle_rx(void *context, int pending) -{ - struct rx_ring *rxr = context; - struct adapter *adapter = rxr->adapter; - u32 loop = MAX_LOOP; - bool more; - - do { - more = ixgbe_rxeof(rxr, -1); - } while (loop-- && more); - /* Reenable this interrupt */ - ixgbe_enable_queue(adapter, rxr->msix); -} static void -ixgbe_handle_tx(void *context, int pending) +ixgbe_handle_que(void *context, int pending) { - struct tx_ring *txr = context; - struct adapter *adapter = txr->adapter; + struct ix_queue *que = context; + struct adapter *adapter = que->adapter; + struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; u32 loop = MAX_LOOP; - bool more; + bool more_rx, more_tx; IXGBE_TX_LOCK(txr); do { - more = ixgbe_txeof(txr); - } while (loop-- && more); + more_rx = ixgbe_rxeof(que, adapter->rx_process_limit); + more_tx = ixgbe_txeof(txr); + } while (loop-- && (more_rx || more_tx)); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { #if __FreeBSD_version >= 800000 @@ -1332,7 +1262,7 @@ ixgbe_handle_tx(void *context, int pending) IXGBE_TX_UNLOCK(txr); /* Reenable this interrupt */ - ixgbe_enable_queue(adapter, txr->msix); + ixgbe_enable_queue(adapter, que->msix); } @@ -1345,33 +1275,32 @@ ixgbe_handle_tx(void *context, int pending) static void ixgbe_legacy_irq(void *arg) { - struct adapter *adapter = arg; + struct ix_queue *que = arg; + struct adapter *adapter = que->adapter; struct ixgbe_hw *hw = &adapter->hw; struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; - bool more; + bool more_tx, more_rx; u32 reg_eicr, loop = MAX_LOOP; reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); + ++que->irqs; if (reg_eicr == 0) { ixgbe_enable_intr(adapter); return; } - if (ixgbe_rxeof(rxr, adapter->rx_process_limit)) - taskqueue_enqueue(rxr->tq, &rxr->rx_task); + more_rx = ixgbe_rxeof(que, adapter->rx_process_limit); IXGBE_TX_LOCK(txr); - ++txr->tx_irq; do { - more = ixgbe_txeof(txr); - } while (loop-- && more); + more_tx = ixgbe_txeof(txr); + } while (loop-- && more_tx); IXGBE_TX_UNLOCK(txr); - if (more) - taskqueue_enqueue(txr->tq, &txr->tx_task); + if (more_rx || more_tx) + taskqueue_enqueue(que->tq, &que->que_task); /* Check for fan failure */ if ((hw->phy.media_type == ixgbe_media_type_copper) && @@ -1382,15 +1311,8 @@ ixgbe_legacy_irq(void *arg) } /* Link status change */ - if (reg_eicr & IXGBE_EICR_LSC) { - ixgbe_check_link(&adapter->hw, - &adapter->link_speed, &adapter->link_up, 0); - ixgbe_update_link_status(adapter); - } - - /* Update interrupt rate */ - if (ixgbe_enable_aim == TRUE) - ixgbe_update_aim(rxr); + if (reg_eicr & IXGBE_EICR_LSC) + taskqueue_enqueue(adapter->tq, &adapter->link_task); ixgbe_enable_intr(adapter); return; @@ -1399,55 +1321,85 @@ ixgbe_legacy_irq(void *arg) /********************************************************************* * - * MSI TX Interrupt Service routine + * MSI Queue Interrupt Service routine * **********************************************************************/ void -ixgbe_msix_tx(void *arg) +ixgbe_msix_que(void *arg) { - struct tx_ring *txr = arg; - struct adapter *adapter = txr->adapter; - bool more; + struct ix_queue *que = arg; + struct adapter *adapter = que->adapter; + struct tx_ring *txr = que->txr; + struct rx_ring *rxr = que->rxr; + bool more_tx, more_rx; + u32 newitr = 0; - ixgbe_disable_queue(adapter, txr->msix); + ixgbe_disable_queue(adapter, que->msix); + ++que->irqs; + + more_rx = ixgbe_rxeof(que, adapter->rx_process_limit); IXGBE_TX_LOCK(txr); - ++txr->tx_irq; - more = ixgbe_txeof(txr); + more_tx = ixgbe_txeof(txr); IXGBE_TX_UNLOCK(txr); - if (more) - taskqueue_enqueue(txr->tq, &txr->tx_task); - else /* Reenable this interrupt */ - ixgbe_enable_queue(adapter, txr->msix); - return; -} + more_rx = ixgbe_rxeof(que, adapter->rx_process_limit); -/********************************************************************* - * - * MSIX RX Interrupt Service routine - * - **********************************************************************/ -static void -ixgbe_msix_rx(void *arg) -{ - struct rx_ring *rxr = arg; - struct adapter *adapter = rxr->adapter; - bool more; + /* Do AIM now? */ - ixgbe_disable_queue(adapter, rxr->msix); + if (ixgbe_enable_aim == FALSE) + goto no_calc; + /* + ** Do Adaptive Interrupt Moderation: + ** - Write out last calculated setting + ** - Calculate based on average size over + ** the last interval. + */ + if (que->eitr_setting) + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_EITR(que->msix), que->eitr_setting); + + que->eitr_setting = 0; - ++rxr->rx_irq; - more = ixgbe_rxeof(rxr, adapter->rx_process_limit); + /* Idle, do nothing */ + if ((txr->bytes == 0) && (rxr->bytes == 0)) + goto no_calc; + + if ((txr->bytes) && (txr->packets)) + newitr = txr->bytes/txr->packets; + if ((rxr->bytes) && (rxr->packets)) + newitr = max(newitr, + (rxr->bytes / rxr->packets)); + newitr += 24; /* account for hardware frame, crc */ - /* Update interrupt rate */ - if (ixgbe_enable_aim == TRUE) - ixgbe_update_aim(rxr); + /* set an upper boundary */ + newitr = min(newitr, 3000); - if (more) - taskqueue_enqueue(rxr->tq, &rxr->rx_task); + /* Be nice to the mid range */ + if ((newitr > 300) && (newitr < 1200)) + newitr = (newitr / 3); else - ixgbe_enable_queue(adapter, rxr->msix); + newitr = (newitr / 2); + + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + newitr |= newitr << 16; + else + newitr |= IXGBE_EITR_CNT_WDIS; + + /* save for next interrupt */ + que->eitr_setting = newitr; + + /* Reset state */ + txr->bytes = 0; + txr->packets = 0; + rxr->bytes = 0; + rxr->packets = 0; + +no_calc: + if (more_tx || more_rx) + taskqueue_enqueue(que->tq, &que->que_task); + else /* Reenable this interrupt */ + ixgbe_enable_queue(adapter, que->msix); return; } @@ -1512,84 +1464,6 @@ ixgbe_msix_link(void *arg) return; } -/* -** Routine to do adjust the RX EITR value based on traffic, -** its a simple three state model, but seems to help. -** -** Note that the three EITR values are tuneable using -** sysctl in real time. The feature can be effectively -** nullified by setting them equal. -*/ -#define BULK_THRESHOLD 10000 -#define AVE_THRESHOLD 1600 - -static void -ixgbe_update_aim(struct rx_ring *rxr) -{ - struct adapter *adapter = rxr->adapter; - u32 olditr, newitr; - - /* Update interrupt moderation based on traffic */ - olditr = rxr->eitr_setting; - newitr = olditr; - - /* Idle, don't change setting */ - if (rxr->bytes == 0) - return; - - if (olditr == ixgbe_low_latency) { - if (rxr->bytes > AVE_THRESHOLD) - newitr = ixgbe_ave_latency; - } else if (olditr == ixgbe_ave_latency) { - if (rxr->bytes < AVE_THRESHOLD) - newitr = ixgbe_low_latency; - else if (rxr->bytes > BULK_THRESHOLD) - newitr = ixgbe_bulk_latency; - } else if (olditr == ixgbe_bulk_latency) { - if (rxr->bytes < BULK_THRESHOLD) - newitr = ixgbe_ave_latency; - } - - if (olditr != newitr) { - /* Change interrupt rate */ - rxr->eitr_setting = newitr; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(rxr->me), - newitr | (newitr << 16)); - } - - rxr->bytes = 0; - return; -} - -static void -ixgbe_init_moderation(struct adapter *adapter) -{ - struct rx_ring *rxr = adapter->rx_rings; - struct tx_ring *txr = adapter->tx_rings; - - /* Single interrupt - MSI or Legacy? */ - if (adapter->msix < 2) { - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(0), 100); - return; - } - - /* TX irq moderation rate is fixed */ - for (int i = 0; i < adapter->num_queues; i++, txr++) - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_EITR(txr->msix), ixgbe_ave_latency); - - /* RX moderation will be adapted over time, set default */ - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_EITR(rxr->msix), ixgbe_low_latency); - } - - /* Set Link moderation */ - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR); - -} - /********************************************************************* * * Media Ioctl callback @@ -1665,11 +1539,10 @@ ixgbe_media_change(struct ifnet * ifp) /********************************************************************* * - * This routine maps the mbufs to tx descriptors. - * WARNING: while this code is using an MQ style infrastructure, - * it would NOT work as is with more than 1 queue. + * This routine maps the mbufs to tx descriptors, allowing the + * TX engine to transmit the packets. + * - return 0 on success, positive on failure * - * return 0 on success, positive on failure **********************************************************************/ static int @@ -1695,14 +1568,6 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) if (m_head->m_flags & M_VLANTAG) cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE; - /* Do a clean if descriptors are low */ - if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) { - ixgbe_txeof(txr); - /* Now do we at least have a minimal? */ - if (txr->tx_avail <= IXGBE_TX_OP_THRESHOLD) - return (ENOBUFS); - } - /* * Important to capture the first descriptor * used because it will contain the index of @@ -1756,7 +1621,7 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) /* Make certain there are enough descriptors */ if (nsegs > txr->tx_avail - 2) { - txr->no_tx_desc_avail++; + txr->no_desc_avail++; error = ENOBUFS; goto xmit_fail; } @@ -1814,7 +1679,7 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) txd->read.cmd_type_len = htole32(txr->txd_cmd | cmd_type_len |seglen); txd->read.olinfo_status = htole32(olinfo_status); - last = i; /* Next descriptor that will get completed */ + last = i; /* descriptor that will get completion IRQ */ if (++i == adapter->num_tx_desc) i = 0; @@ -1843,7 +1708,13 @@ ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp) * hardware that this frame is available to transmit. */ ++txr->total_packets; + txr->watchdog_time = ticks; IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i); + + /* Do a clean if descriptors are low */ + if (txr->tx_avail <= IXGBE_TX_CLEANUP_THRESHOLD) + ixgbe_txeof(txr); + return (0); xmit_fail: @@ -1978,7 +1849,6 @@ ixgbe_local_timer(void *arg) struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; - bool tx_hung = FALSE; mtx_assert(&adapter->core_mtx, MA_OWNED); @@ -1989,21 +1859,32 @@ ixgbe_local_timer(void *arg) ixgbe_update_link_status(adapter); ixgbe_update_stats_counters(adapter); - if (ixgbe_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING) { + + /* Debug display */ + if (ixgbe_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_print_hw_stats(adapter); - } + + /* + * If the interface has been paused + * then don't do the watchdog check + */ + if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF) + goto out; /* ** Check for time since any descriptor was cleaned */ for (int i = 0; i < adapter->num_queues; i++, txr++) { - if (txr->watchdog_check == FALSE) + IXGBE_TX_LOCK(txr); + if (txr->watchdog_check == FALSE) { + IXGBE_TX_UNLOCK(txr); continue; - if ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG) { - tx_hung = TRUE; - goto hung; } + if ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG) + goto hung; + IXGBE_TX_UNLOCK(txr); } out: + ixgbe_rearm_queues(adapter, adapter->que_mask); callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); return; @@ -2017,6 +1898,7 @@ ixgbe_local_timer(void *arg) txr->me, txr->tx_avail, txr->next_to_clean); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; + IXGBE_TX_UNLOCK(txr); ixgbe_init_locked(adapter); } @@ -2123,8 +2005,7 @@ static int ixgbe_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct ix_queue *que = adapter->queues; int error, rid = 0; /* MSI RID at 1 */ @@ -2144,15 +2025,10 @@ ixgbe_allocate_legacy(struct adapter *adapter) * Try allocating a fast interrupt and the associated deferred * processing contexts. */ - TASK_INIT(&txr->tx_task, 0, ixgbe_handle_tx, txr); - TASK_INIT(&rxr->rx_task, 0, ixgbe_handle_rx, rxr); - txr->tq = taskqueue_create_fast("ixgbe_txq", M_NOWAIT, - taskqueue_thread_enqueue, &txr->tq); - rxr->tq = taskqueue_create_fast("ixgbe_rxq", M_NOWAIT, - taskqueue_thread_enqueue, &rxr->tq); - taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", - device_get_nameunit(adapter->dev)); - taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq", + TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); + que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq", device_get_nameunit(adapter->dev)); /* Tasklets for Link, SFP and Multispeed Fiber */ @@ -2169,15 +2045,17 @@ ixgbe_allocate_legacy(struct adapter *adapter) if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq, - adapter, &adapter->tag)) != 0) { + que, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); - taskqueue_free(txr->tq); - taskqueue_free(rxr->tq); - txr->tq = NULL; - rxr->tq = NULL; + taskqueue_free(que->tq); + taskqueue_free(adapter->tq); + que->tq = NULL; + adapter->tq = NULL; return (error); } + /* For simplicity in the handlers */ + adapter->que_mask = IXGBE_EIMS_ENABLE_MASK; return (0); } @@ -2192,83 +2070,44 @@ static int ixgbe_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct ix_queue *que = adapter->queues; int error, rid, vector = 0; - /* TX setup: the code is here for multi tx, - there are other parts of the driver not ready for it */ - for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) { + for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { rid = vector + 1; - txr->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, + que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (!txr->res) { + if (que->res == NULL) { device_printf(dev,"Unable to allocate" - " bus resource: tx interrupt [%d]\n", vector); + " bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ - error = bus_setup_intr(dev, txr->res, + error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, - ixgbe_msix_tx, txr, &txr->tag); + ixgbe_msix_que, que, &que->tag); if (error) { - txr->res = NULL; - device_printf(dev, "Failed to register TX handler"); + que->res = NULL; + device_printf(dev, "Failed to register QUE handler"); return (error); } - txr->msix = vector; + que->msix = vector; + adapter->que_mask |= (u64)(1 << que->msix); /* ** Bind the msix vector, and thus the ** ring to the corresponding cpu. */ if (adapter->num_queues > 1) - bus_bind_intr(dev, txr->res, i); + bus_bind_intr(dev, que->res, i); - TASK_INIT(&txr->tx_task, 0, ixgbe_handle_tx, txr); - txr->tq = taskqueue_create_fast("ixgbe_txq", M_NOWAIT, - taskqueue_thread_enqueue, &txr->tq); - taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", + TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); + que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); } - /* RX setup */ - for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) { - rid = vector + 1; - rxr->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); - if (!rxr->res) { - device_printf(dev,"Unable to allocate" - " bus resource: rx interrupt [%d]," - "rid = %d\n", i, rid); - return (ENXIO); - } - /* Set the handler function */ - error = bus_setup_intr(dev, rxr->res, - INTR_TYPE_NET | INTR_MPSAFE, NULL, - ixgbe_msix_rx, rxr, &rxr->tag); - if (error) { - rxr->res = NULL; - device_printf(dev, "Failed to register RX handler"); - return (error); - } - rxr->msix = vector; - /* used in local timer */ - adapter->rx_mask |= (u64)(1 << vector); - /* - ** Bind the msix vector, and thus the - ** ring to the corresponding cpu. - */ - if (adapter->num_queues > 1) - bus_bind_intr(dev, rxr->res, i); - - TASK_INIT(&rxr->rx_task, 0, ixgbe_handle_rx, rxr); - rxr->tq = taskqueue_create_fast("ixgbe_rxq", M_NOWAIT, - taskqueue_thread_enqueue, &rxr->tq); - taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq", - device_get_nameunit(adapter->dev)); - } - - /* Now for Link changes */ + /* and Link */ rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); @@ -2340,15 +2179,15 @@ ixgbe_setup_msix(struct adapter *adapter) } /* Figure out a reasonable auto config value */ - queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus; + queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; if (ixgbe_num_queues == 0) ixgbe_num_queues = queues; /* - ** Want two vectors (RX/TX) per queue + ** Want one vector (RX/TX pair) per queue ** plus an additional for Link. */ - want = (ixgbe_num_queues * 2) + 1; + want = ixgbe_num_queues + 1; if (msgs >= want) msgs = want; else { @@ -2409,8 +2248,7 @@ ixgbe_allocate_pci_resources(struct adapter *adapter) static void ixgbe_free_pci_resources(struct adapter * adapter) { - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct ix_queue *que = adapter->queues; device_t dev = adapter->dev; int rid, memrid; @@ -2431,29 +2269,18 @@ ixgbe_free_pci_resources(struct adapter * adapter) goto mem; /* - ** Release all the interrupt resources: - ** notice this is harmless for Legacy or - ** MSI since pointers will always be NULL + ** Release all msix queue resources: */ - for (int i = 0; i < adapter->num_queues; i++, txr++) { - rid = txr->msix + 1; - if (txr->tag != NULL) { - bus_teardown_intr(dev, txr->res, txr->tag); - txr->tag = NULL; + for (int i = 0; i < adapter->num_queues; i++, que++) { + rid = que->msix + 1; + if (que->tag != NULL) { + bus_teardown_intr(dev, que->res, que->tag); + que->tag = NULL; } - if (txr->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res); + if (que->res != NULL) + bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - rid = rxr->msix + 1; - if (rxr->tag != NULL) { - bus_teardown_intr(dev, rxr->res, rxr->tag); - rxr->tag = NULL; - } - if (rxr->res != NULL) - bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res); - } /* Clean the Legacy or Link interrupt last */ if (adapter->linkvec) /* we are doing MSIX */ @@ -2467,6 +2294,7 @@ ixgbe_free_pci_resources(struct adapter * adapter) } if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); + mem: if (adapter->msix) pci_release_msi(dev); @@ -2524,9 +2352,7 @@ ixgbe_setup_interface(device_t dev, struct adapter *adapter) ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO4 | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; - ifp->if_capabilities |= IFCAP_JUMBO_MTU; - if (ixgbe_header_split) - ifp->if_capabilities |= IFCAP_LRO; + ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; @@ -2546,6 +2372,7 @@ ixgbe_setup_interface(device_t dev, struct adapter *adapter) } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); + return; } @@ -2556,17 +2383,7 @@ ixgbe_config_link(struct adapter *adapter) u32 autoneg, err = 0; bool sfp, negotiate; - switch (hw->phy.type) { - case ixgbe_phy_sfp_avago: - case ixgbe_phy_sfp_ftl: - case ixgbe_phy_sfp_intel: - case ixgbe_phy_sfp_unknown: - case ixgbe_phy_tw_tyco: - case ixgbe_phy_tw_unknown: - sfp = TRUE; - default: - sfp = FALSE; - } + sfp = ixgbe_is_sfp(hw); if (sfp) { if (hw->phy.multispeed_fiber) { @@ -2613,8 +2430,8 @@ ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size, device_t dev = adapter->dev; int r; - r = bus_dma_tag_create(NULL, /* parent */ - 1, 0, /* alignment, bounds */ + r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ + DBA_ALIGN, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ @@ -2679,21 +2496,30 @@ ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma) static int ixgbe_allocate_queues(struct adapter *adapter) { - device_t dev = adapter->dev; - struct tx_ring *txr; - struct rx_ring *rxr; + device_t dev = adapter->dev; + struct ix_queue *que; + struct tx_ring *txr; + struct rx_ring *rxr; int rsize, tsize, error = IXGBE_SUCCESS; int txconf = 0, rxconf = 0; + /* First allocate the top level queue structs */ + if (!(adapter->queues = + (struct ix_queue *) malloc(sizeof(struct ix_queue) * + adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { + device_printf(dev, "Unable to allocate queue memory\n"); + error = ENOMEM; + goto fail; + } + /* First allocate the TX ring struct memory */ if (!(adapter->tx_rings = (struct tx_ring *) malloc(sizeof(struct tx_ring) * adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); error = ENOMEM; - goto fail; + goto tx_fail; } - txr = adapter->tx_rings; /* Next allocate the RX */ if (!(adapter->rx_rings = @@ -2703,11 +2529,10 @@ ixgbe_allocate_queues(struct adapter *adapter) error = ENOMEM; goto rx_fail; } - rxr = adapter->rx_rings; /* For the ring itself */ tsize = roundup2(adapter->num_tx_desc * - sizeof(union ixgbe_adv_tx_desc), 4096); + sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN); /* * Now set up the TX queues, txconf is needed to handle the @@ -2746,6 +2571,12 @@ ixgbe_allocate_queues(struct adapter *adapter) /* Allocate a buf ring */ txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF, M_WAITOK, &txr->tx_mtx); + if (txr->br == NULL) { + device_printf(dev, + "Critical Failure setting up buf ring\n"); + error = ENOMEM; + goto err_tx_desc; + } #endif } @@ -2753,7 +2584,7 @@ ixgbe_allocate_queues(struct adapter *adapter) * Next the RX queues... */ rsize = roundup2(adapter->num_rx_desc * - sizeof(union ixgbe_adv_rx_desc), 4096); + sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); for (int i = 0; i < adapter->num_queues; i++, rxconf++) { rxr = &adapter->rx_rings[i]; /* Set up some basics */ @@ -2784,6 +2615,16 @@ ixgbe_allocate_queues(struct adapter *adapter) } } + /* + ** Finally set up the queue holding structs + */ + for (int i = 0; i < adapter->num_queues; i++) { + que = &adapter->queues[i]; + que->adapter = adapter; + que->txr = &adapter->tx_rings[i]; + que->rxr = &adapter->rx_rings[i]; + } + return (0); err_rx_desc: @@ -2795,6 +2636,8 @@ ixgbe_allocate_queues(struct adapter *adapter) free(adapter->rx_rings, M_DEVBUF); rx_fail: free(adapter->tx_rings, M_DEVBUF); +tx_fail: + free(adapter->queues, M_DEVBUF); fail: return (error); } @@ -2871,6 +2714,7 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr) int i; /* Clear the old ring contents */ + IXGBE_TX_LOCK(txr); bzero((void *)txr->tx_base, (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc); /* Reset indices */ @@ -2902,6 +2746,7 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr) bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + IXGBE_TX_UNLOCK(txr); } /********************************************************************* @@ -3281,6 +3126,7 @@ static void ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) { struct adapter *adapter = txr->adapter; + struct ix_queue *que; struct ixgbe_atr_input atr_input; struct ip *ip; struct tcphdr *th; @@ -3331,6 +3177,7 @@ ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) src_ipv4_addr = ip->ip_src.s_addr; dst_ipv4_addr = ip->ip_dst.s_addr; flex_bytes = etype; + que = &adapter->queues[txr->me]; ixgbe_atr_set_vlan_id_82599(&atr_input, vlan_id); ixgbe_atr_set_src_port_82599(&atr_input, dst_port); @@ -3343,7 +3190,7 @@ ixgbe_atr(struct tx_ring *txr, struct mbuf *mp) /* This assumes the Rx queue and Tx queue are bound to the same CPU */ ixgbe_fdir_add_signature_filter_82599(&adapter->hw, - &atr_input, txr->msix); + &atr_input, que->msix); } #endif @@ -3401,7 +3248,8 @@ ixgbe_txeof(struct tx_ring *txr) ++txr->tx_avail; if (tx_buffer->m_head) { - ifp->if_opackets++; + txr->bytes += + tx_buffer->m_head->m_pkthdr.len; bus_dmamap_sync(txr->txtag, tx_buffer->map, BUS_DMASYNC_POSTWRITE); @@ -3421,6 +3269,8 @@ ixgbe_txeof(struct tx_ring *txr) tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first]; } + ++txr->packets; + ++ifp->if_opackets; /* See if there is more work now */ last = tx_buffer->eop_index; if (last != -1) { @@ -3456,27 +3306,32 @@ ixgbe_txeof(struct tx_ring *txr) /********************************************************************* * - * Refresh mbuf buffers for a range of descriptors + * Refresh mbuf buffers for RX descriptor rings + * - now keeps its own state so discards due to resource + * exhaustion are unnecessary, if an mbuf cannot be obtained + * it just returns, keeping its placeholder, thus it can simply + * be recalled to try again. * **********************************************************************/ -static int -ixgbe_get_buf(struct rx_ring *rxr, int first, int limit) +static void +ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit) { struct adapter *adapter = rxr->adapter; bus_dma_segment_t seg[2]; struct ixgbe_rx_buf *rxbuf; struct mbuf *mh, *mp; bus_dmamap_t map; - int i, nsegs, error; + int i, nsegs, error, cleaned; - i = first; + i = rxr->next_to_refresh; + cleaned = -1; /* Signify no completions */ while (i != limit) { rxbuf = &rxr->rx_buffers[i]; if (rxbuf->m_head == NULL) { mh = m_gethdr(M_DONTWAIT, MT_DATA); if (mh == NULL) - goto failure; + goto update; } else /* reuse */ mh = rxbuf->m_head; @@ -3487,13 +3342,14 @@ ixgbe_get_buf(struct rx_ring *rxr, int first, int limit) mp = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, adapter->rx_mbuf_sz); if (mp == NULL) - goto failure; + goto update; mp->m_len = adapter->rx_mbuf_sz; mp->m_flags &= ~M_PKTHDR; } else { /* reusing */ mp = rxbuf->m_pack; mp->m_len = adapter->rx_mbuf_sz; mp->m_flags &= ~M_PKTHDR; + mp->m_next = NULL; } /* @@ -3509,7 +3365,7 @@ ixgbe_get_buf(struct rx_ring *rxr, int first, int limit) if (error != 0) { printf("GET BUF: dmamap load failure - %d\n", error); m_free(mh); - return (error); + goto update; } /* Unload old mapping and update buffer struct */ @@ -3527,52 +3383,21 @@ ixgbe_get_buf(struct rx_ring *rxr, int first, int limit) rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr); rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr); + cleaned = i; /* Calculate next index */ if (++i == adapter->num_rx_desc) i = 0; + /* This is the work marker for refresh */ + rxr->next_to_refresh = i; } +update: + if (cleaned != -1) /* If we refreshed some, bump tail */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), cleaned); - return (0); - -failure: - panic("GET BUF: ENOBUFS\n"); - -#if 0 - /* - ** If we get here, we have an mbuf resource - ** issue, so we discard the incoming packet - ** and attempt to reuse existing mbufs next - ** pass thru the ring, but to do so we must - ** fix up the descriptor which had the address - ** clobbered with writeback info. - */ -remap: - adapter->mbuf_header_failed++; - merr = ENOBUFS; - /* Is there a reusable buffer? */ - mh = rxr->rx_buffers[i].m_head; - if (mh == NULL) /* Nope, init error */ - return (merr); - mp = rxr->rx_buffers[i].m_pack; - if (mp == NULL) /* Nope, init error */ - return (merr); - /* Get our old mapping */ - rxbuf = &rxr->rx_buffers[i]; - error = bus_dmamap_load_mbuf_sg(rxr->rxtag, - rxbuf->map, mh, seg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - /* We really have a problem */ - m_free(mh); - return (error); - } - /* Now fix the descriptor as needed */ - rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr); - rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr); - - return (merr); -#endif + return; } + /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one @@ -3603,7 +3428,7 @@ ixgbe_allocate_receive_buffers(struct rx_ring *rxr) ** with packet split (hence the two segments, even though ** it may not always use this. */ - if ((error = bus_dma_tag_create(NULL, /* parent */ + if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ @@ -3646,7 +3471,6 @@ ixgbe_allocate_receive_buffers(struct rx_ring *rxr) return (error); } - /* ** Used to detect a descriptor that has ** been merged by Hardware RSC. @@ -3670,11 +3494,12 @@ ixgbe_setup_hw_rsc(struct rx_ring *rxr) { struct adapter *adapter = rxr->adapter; struct ixgbe_hw *hw = &adapter->hw; - u32 rscctrl, rdrxctl; + u32 rscctrl, rdrxctl; rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; + rdrxctl |= IXGBE_RDRXCTL_RSCACKC; IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me)); @@ -3723,6 +3548,7 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr) dev = adapter->dev; /* Clear the ring contents */ + IXGBE_RX_LOCK(rxr); rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN); bzero((void *)rxr->rx_base, rsize); @@ -3781,14 +3607,12 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr) /* Setup our descriptor indices */ rxr->next_to_check = 0; - rxr->last_refreshed = 0; + rxr->next_to_refresh = 0; rxr->lro_enabled = FALSE; /* Use header split if configured */ if (ixgbe_header_split) rxr->hdr_split = TRUE; - else - ifp->if_capabilities &= ~IFCAP_LRO; bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); @@ -3796,40 +3620,25 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr) /* ** Now set up the LRO interface: ** 82598 uses software LRO, the - ** 82599 on the other hand uses a - ** hardware assist to do the same. - ** We only do LRO when header split - ** is enabled, its simpler that way. + ** 82599 additionally uses a + ** hardware assist. + ** + ** Disable RSC when RXCSUM is off */ - if ((ifp->if_capenable & IFCAP_LRO) && (rxr->hdr_split)) { + if ((adapter->hw.mac.type == ixgbe_mac_82599EB) && + (ifp->if_capenable & IFCAP_RXCSUM)) + ixgbe_setup_hw_rsc(rxr); + else if (ifp->if_capenable & IFCAP_LRO) { int err = tcp_lro_init(lro); if (err) panic("LRO Initialization failed!\n"); INIT_DEBUGOUT("RX Soft LRO Initialized\n"); rxr->lro_enabled = TRUE; lro->ifp = adapter->ifp; - ixgbe_setup_hw_rsc(rxr); } + IXGBE_RX_UNLOCK(rxr); return (0); -#if 0 -fail: - /* - * We need to clean up any buffers allocated - * so far, 'j' is the failing index. - */ - for (int i = 0; i < j; i++) { - rxbuf = &rxr->rx_buffers[i]; - if (rxbuf->m_head != NULL) { - bus_dmamap_sync(rxr->rxtag, rxbuf->map, - BUS_DMASYNC_POSTREAD); - bus_dmamap_unload(rxr->rxtag, rxbuf->map); - m_freem(rxbuf->m_head); - rxbuf->m_head = NULL; - } - } - return (ENOBUFS); -#endif } /********************************************************************* @@ -4025,6 +3834,7 @@ ixgbe_free_receive_structures(struct adapter *adapter) free(adapter->rx_rings, M_DEVBUF); } + /********************************************************************* * * Free receive ring data structures @@ -4065,6 +3875,61 @@ ixgbe_free_receive_buffers(struct rx_ring *rxr) return; } +static __inline void +ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) +{ + + /* + * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet + * should be computed by hardware. Also it should not have VLAN tag in + * ethernet header. + */ + if (rxr->lro_enabled && + (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && + (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 && + (ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) == + (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) && + (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { + /* + * Send to the stack if: + ** - LRO not enabled, or + ** - no LRO resources, or + ** - lro enqueue fails + */ + if (rxr->lro.lro_cnt != 0) + if (tcp_lro_rx(&rxr->lro, m, 0) == 0) + return; + } + (*ifp->if_input)(ifp, m); +} + +static __inline void +ixgbe_rx_discard(struct rx_ring *rxr, int i) +{ + struct adapter *adapter = rxr->adapter; + struct ixgbe_rx_buf *rbuf; + struct mbuf *mh, *mp; + + rbuf = &rxr->rx_buffers[i]; + if (rbuf->fmp != NULL) /* Partial chain ? */ + m_freem(rbuf->fmp); + + mh = rbuf->m_head; + mp = rbuf->m_pack; + + /* Reuse loaded DMA map and just update mbuf chain */ + mh->m_len = MHLEN; + mh->m_flags |= M_PKTHDR; + mh->m_next = NULL; + + mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz; + mp->m_data = mp->m_ext.ext_buf; + mp->m_next = NULL; + return; +} + + /********************************************************************* * * This routine executes in interrupt context. It replenishes @@ -4077,154 +3942,176 @@ ixgbe_free_receive_buffers(struct rx_ring *rxr) * Return TRUE for more work, FALSE for all clean. *********************************************************************/ static bool -ixgbe_rxeof(struct rx_ring *rxr, int count) +ixgbe_rxeof(struct ix_queue *que, int count) { - struct adapter *adapter = rxr->adapter; - struct ifnet *ifp = adapter->ifp; + struct adapter *adapter = que->adapter; + struct rx_ring *rxr = que->rxr; + struct ifnet *ifp = adapter->ifp; struct lro_ctrl *lro = &rxr->lro; struct lro_entry *queued; - int i, processed = 0; - u32 staterr; + int i, nextp, processed = 0; + u32 staterr = 0; union ixgbe_adv_rx_desc *cur; - + struct ixgbe_rx_buf *rbuf, *nbuf; IXGBE_RX_LOCK(rxr); - i = rxr->next_to_check; - cur = &rxr->rx_base[i]; - staterr = cur->wb.upper.status_error; - if (!(staterr & IXGBE_RXD_STAT_DD)) { - IXGBE_RX_UNLOCK(rxr); - return FALSE; - } + for (i = rxr->next_to_check; count != 0;) { + struct mbuf *sendmp, *mh, *mp; + u32 rsc, ptype; + u16 hlen, plen, hdr, vtag; + bool eop; + + /* Sync the ring. */ + bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - /* Sync the ring */ - bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, - BUS_DMASYNC_POSTREAD); + cur = &rxr->rx_base[i]; + staterr = le32toh(cur->wb.upper.status_error); - while ((staterr & IXGBE_RXD_STAT_DD) && (count != 0) && - (ifp->if_drv_flags & IFF_DRV_RUNNING)) { - struct mbuf *sendmp, *mh, *mp, *nh, *np; - struct ixgbe_rx_buf *nxtbuf; - u32 rsc; - u16 hlen, plen, hdr, nextp, vtag; - bool accept_frame, eop; + if ((staterr & IXGBE_RXD_STAT_DD) == 0) + break; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + break; + count--; + sendmp = NULL; + nbuf = NULL; + rsc = 0; + cur->wb.upper.status_error = 0; + rbuf = &rxr->rx_buffers[i]; + mh = rbuf->m_head; + mp = rbuf->m_pack; - accept_frame = TRUE; - hlen = plen = rsc = nextp = 0; - sendmp = mh = mp = nh = np = NULL; - - /* Sync the buffers */ - bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map, - BUS_DMASYNC_POSTREAD); - mh = rxr->rx_buffers[i].m_head; - mp = rxr->rx_buffers[i].m_pack; + plen = le16toh(cur->wb.upper.length); + ptype = le32toh(cur->wb.lower.lo_dword.data) & + IXGBE_RXDADV_PKTTYPE_MASK; + hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); vtag = le16toh(cur->wb.upper.vlan); eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0); + /* Make sure all parts of a bad packet are discarded */ + if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) || + (rxr->discard)) { + ifp->if_ierrors++; + rxr->rx_discarded++; + if (!eop) + rxr->discard = TRUE; + else + rxr->discard = FALSE; + ixgbe_rx_discard(rxr, i); + goto next_desc; + } + + /* + ** On 82599 which supports a hardware + ** LRO (called HW RSC), packets need + ** not be fragmented across sequential + ** descriptors, rather the next descriptor + ** is indicated in bits of the descriptor. + ** This also means that we might proceses + ** more than one packet at a time, something + ** that has never been true before, it + ** required eliminating global chain pointers + ** in favor of what we are doing here. -jfv + */ if (!eop) { /* - ** On 82599 which supports a hardware - ** LRO (called HW RSC), packets need - ** not be fragmented across sequential - ** descriptors, rather the next descriptor - ** is indicated in bits of the current. - ** This also means that we might proceses - ** more than one packet at a time, something - ** that has never been true before, it - ** required eliminating global chain pointers - ** in favor of what we are doing here. -jfv + ** Figure out the next descriptor + ** of this frame. */ if (rxr->hw_rsc == TRUE) { rsc = ixgbe_rsc_count(cur); rxr->rsc_num += (rsc - 1); } - if (rsc) { + if (rsc) { /* Get hardware index */ nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >> IXGBE_RXDADV_NEXTP_SHIFT); - } else { + } else { /* Just sequential */ nextp = i + 1; if (nextp == adapter->num_rx_desc) nextp = 0; } - nxtbuf = &rxr->rx_buffers[nextp]; - prefetch(nxtbuf); + nbuf = &rxr->rx_buffers[nextp]; + prefetch(nbuf); } - /* - ** The way the hardware is configured to - ** split, it will ONLY use the header buffer - ** when header split is enabled, otherwise we - ** get legacy behavior, ie, both header and - ** payload are DMA'd into JUST the payload buffer. + ** The header mbuf is ONLY used when header + ** split is enabled, otherwise we get normal + ** behavior, ie, both header and payload + ** are DMA'd into the payload buffer. ** ** Rather than using the fmp/lmp global pointers ** we now keep the head of a packet chain in the - ** m_nextpkt pointer and pass this along from one + ** buffer struct and pass this along from one ** descriptor to the next, until we get EOP. - ** */ - if ((rxr->hdr_split) && (mh->m_nextpkt == NULL)) { - hdr = le16toh(cur-> - wb.lower.lo_dword.hs_rss.hdr_info); + if (rxr->hdr_split && (rbuf->fmp == NULL)) { + /* This must be an initial descriptor */ hlen = (hdr & IXGBE_RXDADV_HDRBUFLEN_MASK) >> IXGBE_RXDADV_HDRBUFLEN_SHIFT; if (hlen > IXGBE_RX_HDR) hlen = IXGBE_RX_HDR; - plen = le16toh(cur->wb.upper.length); mh->m_len = hlen; mh->m_flags |= M_PKTHDR; mh->m_next = NULL; mh->m_pkthdr.len = mh->m_len; - /* Null this so getbuf replenishes */ - rxr->rx_buffers[i].m_head = NULL; + /* Null buf pointer so it is refreshed */ + rbuf->m_head = NULL; /* - ** Get the payload length, this + ** Check the payload length, this ** could be zero if its a small ** packet. */ - if (plen) { + if (plen > 0) { mp->m_len = plen; mp->m_next = NULL; mp->m_flags &= ~M_PKTHDR; mh->m_next = mp; mh->m_pkthdr.len += mp->m_len; - /* Null this so getbuf replenishes */ - rxr->rx_buffers[i].m_pack = NULL; + /* Null buf pointer so it is refreshed */ + rbuf->m_pack = NULL; rxr->rx_split_packets++; } - /* Setup the forward chain */ - if (eop == 0) { - nh = rxr->rx_buffers[nextp].m_head; - np = rxr->rx_buffers[nextp].m_pack; - nh->m_nextpkt = mh; - if (plen) - mp->m_next = np; - else - mh->m_next = np; - } else { - sendmp = mh; - if (staterr & IXGBE_RXD_STAT_VP) { - sendmp->m_pkthdr.ether_vtag = vtag; - sendmp->m_flags |= M_VLANTAG; - } - } + /* + ** Now create the forward + ** chain so when complete + ** we wont have to. + */ + if (eop == 0) { + /* stash the chain head */ + nbuf->fmp = mh; + /* Make forward chain */ + if (plen) + mp->m_next = nbuf->m_pack; + else + mh->m_next = nbuf->m_pack; + } else { + /* Singlet, prepare to send */ + sendmp = mh; + if (staterr & IXGBE_RXD_STAT_VP) { + sendmp->m_pkthdr.ether_vtag = vtag; + sendmp->m_flags |= M_VLANTAG; + } + } } else { /* ** Either no header split, or a ** secondary piece of a fragmented - ** packet. + ** split packet. */ - mp->m_len = le16toh(cur->wb.upper.length); - rxr->rx_buffers[i].m_pack = NULL; - /* stored head pointer */ - sendmp = mh->m_nextpkt; - if (sendmp != NULL) { + mp->m_len = plen; + /* + ** See if there is a stored head + ** that determines what we are + */ + sendmp = rbuf->fmp; + rbuf->m_pack = rbuf->fmp = NULL; + + if (sendmp != NULL) /* secondary frag */ sendmp->m_pkthdr.len += mp->m_len; - sendmp->m_nextpkt = NULL; - } else { + else { /* first desc of a non-ps chain */ sendmp = mp; sendmp->m_flags |= M_PKTHDR; @@ -4233,106 +4120,53 @@ ixgbe_rxeof(struct rx_ring *rxr, int count) sendmp->m_pkthdr.ether_vtag = vtag; sendmp->m_flags |= M_VLANTAG; } - } - /* Carry head forward */ + } + /* Pass the head pointer on */ if (eop == 0) { - nh = rxr->rx_buffers[nextp].m_head; - np = rxr->rx_buffers[nextp].m_pack; - nh->m_nextpkt = sendmp; - mp->m_next = np; + nbuf->fmp = sendmp; sendmp = NULL; + mp->m_next = nbuf->m_pack; } - mh->m_nextpkt = NULL; } - - if (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) - accept_frame = FALSE; - - if (accept_frame) { - ++processed; - if (eop) { - --count; - sendmp->m_pkthdr.rcvif = ifp; - ifp->if_ipackets++; - rxr->rx_packets++; - /* capture data for AIM */ - rxr->bytes += sendmp->m_pkthdr.len; - rxr->rx_bytes += rxr->bytes; - if (ifp->if_capenable & IFCAP_RXCSUM) - ixgbe_rx_checksum(staterr, sendmp); - else - sendmp->m_pkthdr.csum_flags = 0; + ++processed; + /* Sending this frame? */ + if (eop) { + sendmp->m_pkthdr.rcvif = ifp; + ifp->if_ipackets++; + rxr->rx_packets++; + /* capture data for AIM */ + rxr->bytes += sendmp->m_pkthdr.len; + rxr->rx_bytes += sendmp->m_pkthdr.len; + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + ixgbe_rx_checksum(staterr, sendmp); #if __FreeBSD_version >= 800000 - /* Get the RSS Hash */ - sendmp->m_pkthdr.flowid = - le16toh(cur->wb.lower.hi_dword.rss); - sendmp->m_flags |= M_FLOWID; + sendmp->m_pkthdr.flowid = que->msix; + sendmp->m_flags |= M_FLOWID; #endif - } - } else { - ifp->if_ierrors++; - /* Reuse loaded DMA map and just update mbuf chain */ - mh->m_len = MHLEN; - mh->m_flags |= M_PKTHDR; - mh->m_next = NULL; - mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz; - mp->m_data = mp->m_ext.ext_buf; - if (mp->m_next) { /* Free chain */ - sendmp = mp->m_next; - m_free(sendmp); - } - mp->m_next = NULL; - if (adapter->max_frame_size <= - (MCLBYTES - ETHER_ALIGN)) - m_adj(mp, ETHER_ALIGN); - sendmp = NULL; } +next_desc: bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - rxr->last_refreshed = i; /* for updating tail */ + /* Advance our pointers to the next descriptor. */ if (++i == adapter->num_rx_desc) i = 0; - /* Prefetch next descriptor */ - cur = &rxr->rx_base[i]; - prefetch(cur); - /* - ** Now send up to the stack, - ** note that the RX lock is - ** held thru this call. - */ - if (sendmp != NULL) { - /* - ** Send to the stack if: - ** - Soft LRO not enabled, or - ** - no Soft LRO resources, or - ** - soft lro enqueue fails - */ - if ((!rxr->lro_enabled) || - ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0)))) - (*ifp->if_input)(ifp, sendmp); - } + /* Now send to the stack or do LRO */ + if (sendmp != NULL) + ixgbe_rx_input(rxr, ifp, sendmp, ptype); - /* Replenish every 8 max */ + /* Every 8 descriptors we go to refresh mbufs */ if (processed == 8) { - ixgbe_get_buf(rxr, rxr->next_to_check, i); + ixgbe_refresh_mbufs(rxr, i); processed = 0; - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_RDT(rxr->me), rxr->last_refreshed); - rxr->next_to_check = i; } - - /* Next iteration */ - staterr = cur->wb.upper.status_error; } - /* Replenish remaining work */ + /* Refresh any remaining buf structs */ if (processed != 0) { - ixgbe_get_buf(rxr, rxr->next_to_check, i); + ixgbe_refresh_mbufs(rxr, i); processed = 0; - IXGBE_WRITE_REG(&adapter->hw, - IXGBE_RDT(rxr->me), rxr->last_refreshed); } rxr->next_to_check = i; @@ -4340,8 +4174,7 @@ ixgbe_rxeof(struct rx_ring *rxr, int count) /* * Flush any outstanding LRO work */ - while (!SLIST_EMPTY(&lro->lro_active)) { - queued = SLIST_FIRST(&lro->lro_active); + while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } @@ -4349,17 +4182,18 @@ ixgbe_rxeof(struct rx_ring *rxr, int count) IXGBE_RX_UNLOCK(rxr); /* - ** Leaving with more to clean? - ** then schedule another interrupt. + ** We still have cleaning to do? + ** Schedule another interrupt if so. */ - if (staterr & IXGBE_RXD_STAT_DD) { - ixgbe_rearm_queues(adapter, (u64)(1 << rxr->msix)); - return TRUE; + if ((staterr & IXGBE_RXD_STAT_DD) != 0) { + ixgbe_rearm_queues(adapter, (u64)(1 << que->msix)); + return (TRUE); } - return FALSE; + return (FALSE); } + /********************************************************************* * * Verify that the hardware indicated that the checksum is valid. @@ -4374,7 +4208,6 @@ ixgbe_rx_checksum(u32 staterr, struct mbuf * mp) u8 errors = (u8) (staterr >> 24); if (status & IXGBE_RXD_STAT_IPCS) { - /* Did it pass? */ if (!(errors & IXGBE_RXD_ERR_IPE)) { /* IP Checksum Good */ mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; @@ -4384,7 +4217,6 @@ ixgbe_rx_checksum(u32 staterr, struct mbuf * mp) mp->m_pkthdr.csum_flags = 0; } if (status & IXGBE_RXD_STAT_L4CS) { - /* Did it pass? */ if (!(errors & IXGBE_RXD_ERR_TCPE)) { mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); @@ -4493,8 +4325,7 @@ static void ixgbe_enable_intr(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct ix_queue *que = adapter->queues; u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); @@ -4528,10 +4359,8 @@ ixgbe_enable_intr(struct adapter *adapter) ** allow for handling the extended (beyond 32) MSIX ** vectors that can be used by 82599 */ - for (int i = 0; i < adapter->num_queues; i++, rxr++) - ixgbe_enable_queue(adapter, rxr->msix); - for (int i = 0; i < adapter->num_queues; i++, txr++) - ixgbe_enable_queue(adapter, txr->msix); + for (int i = 0; i < adapter->num_queues; i++, que++) + ixgbe_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); @@ -4626,14 +4455,17 @@ ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) static void ixgbe_configure_ivars(struct adapter *adapter) { - struct tx_ring *txr = adapter->tx_rings; - struct rx_ring *rxr = adapter->rx_rings; + struct ix_queue *que = adapter->queues; - for (int i = 0; i < adapter->num_queues; i++, rxr++) - ixgbe_set_ivar(adapter, i, rxr->msix, 0); - - for (int i = 0; i < adapter->num_queues; i++, txr++) - ixgbe_set_ivar(adapter, i, txr->msix, 1); + for (int i = 0; i < adapter->num_queues; i++, que++) { + /* First the RX queue entry */ + ixgbe_set_ivar(adapter, i, que->msix, 0); + /* ... and the TX */ + ixgbe_set_ivar(adapter, i, que->msix, 1); + /* Set an Initial EITR value */ + IXGBE_WRITE_REG(&adapter->hw, + IXGBE_EITR(que->msix), IXGBE_LOW_LATENCY); + } /* For the Link interrupt */ ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1); @@ -4922,44 +4754,43 @@ static void ixgbe_print_debug_info(struct adapter *adapter) { device_t dev = adapter->dev; - struct rx_ring *rxr = adapter->rx_rings; - struct tx_ring *txr = adapter->tx_rings; - struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw *hw = &adapter->hw; + struct ix_queue *que = adapter->queues; + struct rx_ring *rxr; + struct tx_ring *txr; + struct lro_ctrl *lro; device_printf(dev,"Error Byte Count = %u \n", IXGBE_READ_REG(hw, IXGBE_ERRBC)); - for (int i = 0; i < adapter->num_queues; i++, rxr++) { - struct lro_ctrl *lro = &rxr->lro; - device_printf(dev,"Queue[%d]: rdh = %d, hw rdt = %d\n", + for (int i = 0; i < adapter->num_queues; i++, que++) { + txr = que->txr; + rxr = que->rxr; + lro = &rxr->lro; + device_printf(dev,"QUE(%d) IRQs Handled: %lu\n", + que->msix, (long)que->irqs); + device_printf(dev,"RX[%d]: rdh = %d, hw rdt = %d\n", i, IXGBE_READ_REG(hw, IXGBE_RDH(i)), IXGBE_READ_REG(hw, IXGBE_RDT(i))); + device_printf(dev,"TX[%d] tdh = %d, hw tdt = %d\n", i, + IXGBE_READ_REG(hw, IXGBE_TDH(i)), + IXGBE_READ_REG(hw, IXGBE_TDT(i))); device_printf(dev,"RX(%d) Packets Received: %lld\n", rxr->me, (long long)rxr->rx_packets); device_printf(dev,"RX(%d) Split RX Packets: %lld\n", rxr->me, (long long)rxr->rx_split_packets); device_printf(dev,"RX(%d) Bytes Received: %lu\n", rxr->me, (long)rxr->rx_bytes); - device_printf(dev,"RX(%d) IRQ Handled: %lu\n", - rxr->me, (long)rxr->rx_irq); device_printf(dev,"RX(%d) LRO Queued= %d\n", rxr->me, lro->lro_queued); device_printf(dev,"RX(%d) LRO Flushed= %d\n", rxr->me, lro->lro_flushed); device_printf(dev,"RX(%d) HW LRO Merges= %lu\n", rxr->me, (long)rxr->rsc_num); - } - - for (int i = 0; i < adapter->num_queues; i++, txr++) { - device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", i, - IXGBE_READ_REG(hw, IXGBE_TDH(i)), - IXGBE_READ_REG(hw, IXGBE_TDT(i))); device_printf(dev,"TX(%d) Packets Sent: %lu\n", txr->me, (long)txr->total_packets); - device_printf(dev,"TX(%d) IRQ Handled: %lu\n", - txr->me, (long)txr->tx_irq); device_printf(dev,"TX(%d) NO Desc Avail: %lu\n", - txr->me, (long)txr->no_tx_desc_avail); + txr->me, (long)txr->no_desc_avail); } device_printf(dev,"Link IRQ Handled: %lu\n", @@ -5050,174 +4881,3 @@ ixgbe_add_rx_process_limit(struct adapter *adapter, const char *name, SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description); } - -#ifdef IXGBE_IEEE1588 - -/* -** ixgbe_hwtstamp_ioctl - control hardware time stamping -** -** Outgoing time stamping can be enabled and disabled. Play nice and -** disable it when requested, although it shouldn't case any overhead -** when no packet needs it. At most one packet in the queue may be -** marked for time stamping, otherwise it would be impossible to tell -** for sure to which packet the hardware time stamp belongs. -** -** Incoming time stamping has to be configured via the hardware -** filters. Not all combinations are supported, in particular event -** type has to be specified. Matching the kind of event packet is -** not supported, with the exception of "all V2 events regardless of -** level 2 or 4". -** -*/ -static int -ixgbe_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr) -{ - struct ixgbe_hw *hw = &adapter->hw; - struct hwtstamp_ctrl *config; - u32 tsync_tx_ctl_bit = IXGBE_TSYNCTXCTL_ENABLED; - u32 tsync_rx_ctl_bit = IXGBE_TSYNCRXCTL_ENABLED; - u32 tsync_rx_ctl_type = 0; - u32 tsync_rx_cfg = 0; - int is_l4 = 0; - int is_l2 = 0; - u16 port = 319; /* PTP */ - u32 regval; - - config = (struct hwtstamp_ctrl *) ifr->ifr_data; - - /* reserved for future extensions */ - if (config->flags) - return (EINVAL); - - switch (config->tx_type) { - case HWTSTAMP_TX_OFF: - tsync_tx_ctl_bit = 0; - break; - case HWTSTAMP_TX_ON: - tsync_tx_ctl_bit = IXGBE_TSYNCTXCTL_ENABLED; - break; - default: - return (ERANGE); - } - - switch (config->rx_filter) { - case HWTSTAMP_FILTER_NONE: - tsync_rx_ctl_bit = 0; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_ALL: - /* - * register TSYNCRXCFG must be set, therefore it is not - * possible to time stamp both Sync and Delay_Req messages - * => fall back to time stamping all packets - */ - tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_ALL; - config->rx_filter = HWTSTAMP_FILTER_ALL; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = IXGBE_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; - is_l4 = 1; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = IXGBE_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; - is_l4 = 1; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = IXGBE_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; - is_l2 = 1; - is_l4 = 1; - config->rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = IXGBE_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; - is_l2 = 1; - is_l4 = 1; - config->rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - tsync_rx_ctl_type = IXGBE_TSYNCRXCTL_TYPE_EVENT_V2; - config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = 1; - break; - default: - return -ERANGE; - } - - /* enable/disable TX */ - regval = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL); - regval = (regval & ~IXGBE_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit; - IXGBE_WRITE_REG(hw, IXGBE_TSYNCTXCTL, regval); - - /* enable/disable RX, define which PTP packets are time stamped */ - regval = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); - regval = (regval & ~IXGBE_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit; - regval = (regval & ~0xE) | tsync_rx_ctl_type; - IXGBE_WRITE_REG(hw, IXGBE_TSYNCRXCTL, regval); - IXGBE_WRITE_REG(hw, IXGBE_TSYNCRXCFG, tsync_rx_cfg); - - /* - * Ethertype Filter Queue Filter[0][15:0] = 0x88F7 - * (Ethertype to filter on) - * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter) - * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping) - */ - IXGBE_WRITE_REG(hw, IXGBE_ETQF0, is_l2 ? 0x440088f7 : 0); - - /* L4 Queue Filter[0]: only filter by source and destination port */ - IXGBE_WRITE_REG(hw, IXGBE_SPQF0, htons(port)); - IXGBE_WRITE_REG(hw, IXGBE_IMIREXT(0), is_l4 ? - ((1<<12) | (1<<19) /* bypass size and control flags */) : 0); - IXGBE_WRITE_REG(hw, IXGBE_IMIR(0), is_l4 ? - (htons(port) - | (0<<16) /* immediate interrupt disabled */ - | 0 /* (1<<17) bit cleared: do not bypass - destination port check */) - : 0); - IXGBE_WRITE_REG(hw, IXGBE_FTQF0, is_l4 ? - (0x11 /* UDP */ - | (1<<15) /* VF not compared */ - | (1<<27) /* Enable Timestamping */ - | (7<<28) /* only source port filter enabled, - source/target address and protocol - masked */) - : ((1<<15) | (15<<28) /* all mask bits set = filter not - enabled */)); - - wrfl(); - - adapter->hwtstamp_ctrl = config; - - /* clear TX/RX time stamp registers, just to be sure */ - regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH); - regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH); - - return (error); -} - -/* -** ixgbe_read_clock - read raw cycle counter (to be used by time counter) -*/ -static cycle_t ixgbe_read_clock(const struct cyclecounter *tc) -{ - struct adapter *adapter = - container_of(tc, struct igb_adapter, cycles); - struct ixgbe_hw *hw = &adapter->hw; - u64 stamp; - - stamp = IXGBE_READ_REG(hw, IXGBE_SYSTIML); - stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32ULL; - - return (stamp); -} - -#endif /* IXGBE_IEEE1588 */ diff --git a/sys/dev/ixgbe/ixgbe.h b/sys/dev/ixgbe/ixgbe.h index d52eed3a07e3..008d2c8cb017 100644 --- a/sys/dev/ixgbe/ixgbe.h +++ b/sys/dev/ixgbe/ixgbe.h @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -176,7 +176,7 @@ #define MSIX_82599_BAR 4 #define IXGBE_TSO_SIZE 65535 #define IXGBE_TX_BUFFER_SIZE ((u32) 1514) -#define IXGBE_RX_HDR 256 +#define IXGBE_RX_HDR 128 #define IXGBE_VFTA_SIZE 128 #define IXGBE_BR_SIZE 4096 #define CSUM_OFFLOAD 7 /* Bits in csum flags */ @@ -231,6 +231,7 @@ struct ixgbe_tx_buf { struct ixgbe_rx_buf { struct mbuf *m_head; struct mbuf *m_pack; + struct mbuf *fmp; bus_dmamap_t map; }; @@ -248,20 +249,34 @@ struct ixgbe_dma_alloc { }; /* - * The transmit ring, one per tx queue +** Driver queue struct: this is the interrupt container +** for the associated tx and rx ring. +*/ +struct ix_queue { + struct adapter *adapter; + u32 msix; /* This queue's MSIX vector */ + u32 eims; /* This queue's EIMS bit */ + u32 eitr_setting; + struct resource *res; + void *tag; + struct tx_ring *txr; + struct rx_ring *rxr; + struct task que_task; + struct taskqueue *tq; + u64 irqs; +}; + +/* + * The transmit ring, one per queue */ struct tx_ring { struct adapter *adapter; struct mtx tx_mtx; u32 me; - u32 msix; bool watchdog_check; int watchdog_time; union ixgbe_adv_tx_desc *tx_base; - volatile u32 tx_hwb; struct ixgbe_dma_alloc txdma; - struct task tx_task; - struct taskqueue *tq; u32 next_avail_desc; u32 next_to_clean; struct ixgbe_tx_buf *tx_buffers; @@ -272,17 +287,14 @@ struct tx_ring { #if __FreeBSD_version >= 800000 struct buf_ring *br; #endif - /* Interrupt resources */ - void *tag; - struct resource *res; #ifdef IXGBE_FDIR u16 atr_sample; u16 atr_count; #endif + u32 bytes; /* used for AIM */ + u32 packets; /* Soft Stats */ - u32 no_tx_desc_avail; - u32 no_tx_desc_late; - u64 tx_irq; + u64 no_desc_avail; u64 total_packets; }; @@ -294,35 +306,29 @@ struct rx_ring { struct adapter *adapter; struct mtx rx_mtx; u32 me; - u32 msix; - u32 payload; - struct task rx_task; - struct taskqueue *tq; union ixgbe_adv_rx_desc *rx_base; struct ixgbe_dma_alloc rxdma; struct lro_ctrl lro; bool lro_enabled; bool hdr_split; bool hw_rsc; - unsigned int last_refreshed; - unsigned int next_to_check; + bool discard; + u32 next_to_refresh; + u32 next_to_check; + char mtx_name[16]; struct ixgbe_rx_buf *rx_buffers; bus_dma_tag_t rxtag; bus_dmamap_t spare_map; - char mtx_name[16]; u32 bytes; /* Used for AIM calc */ - u32 eitr_setting; - - /* Interrupt resources */ - void *tag; - struct resource *res; + u32 packets; /* Soft stats */ u64 rx_irq; u64 rx_split_packets; u64 rx_packets; u64 rx_bytes; + u64 rx_discarded; u64 rsc_num; #ifdef IXGBE_FDIR u64 flm; @@ -331,94 +337,94 @@ struct rx_ring { /* Our adapter structure */ struct adapter { - struct ifnet *ifp; - struct ixgbe_hw hw; + struct ifnet *ifp; + struct ixgbe_hw hw; struct ixgbe_osdep osdep; - struct device *dev; + struct device *dev; - struct resource *pci_mem; - struct resource *msix_mem; + struct resource *pci_mem; + struct resource *msix_mem; /* * Interrupt resources: this set is * either used for legacy, or for Link * when doing MSIX */ - void *tag; - struct resource *res; + void *tag; + struct resource *res; - struct ifmedia media; - struct callout timer; - int msix; - int if_flags; + struct ifmedia media; + struct callout timer; + int msix; + int if_flags; - struct mtx core_mtx; + struct mtx core_mtx; - eventhandler_tag vlan_attach; - eventhandler_tag vlan_detach; + eventhandler_tag vlan_attach; + eventhandler_tag vlan_detach; - u16 num_vlans; - u16 num_queues; + u16 num_vlans; + u16 num_queues; /* Info about the board itself */ - u32 optics; - bool link_active; - u16 max_frame_size; - u32 link_speed; - bool link_up; - u32 linkvec; + u32 optics; + bool link_active; + u16 max_frame_size; + u32 link_speed; + bool link_up; + u32 linkvec; /* Mbuf cluster size */ - u32 rx_mbuf_sz; + u32 rx_mbuf_sz; /* Support for pluggable optics */ - bool sfp_probe; - struct task link_task; /* Link tasklet */ - struct task mod_task; /* SFP tasklet */ - struct task msf_task; /* Multispeed Fiber tasklet */ + bool sfp_probe; + struct task link_task; /* Link tasklet */ + struct task mod_task; /* SFP tasklet */ + struct task msf_task; /* Multispeed Fiber */ #ifdef IXGBE_FDIR int fdir_reinit; struct task fdir_task; #endif struct taskqueue *tq; + /* + ** Queues: + ** This is the irq holder, it has + ** and RX/TX pair or rings associated + ** with it. + */ + struct ix_queue *queues; + /* * Transmit rings: * Allocated at run time, an array of rings. */ - struct tx_ring *tx_rings; - int num_tx_desc; + struct tx_ring *tx_rings; + int num_tx_desc; /* * Receive rings: * Allocated at run time, an array of rings. */ - struct rx_ring *rx_rings; - int num_rx_desc; - u64 rx_mask; - u32 rx_process_limit; - -#ifdef IXGBE_IEEE1588 - /* IEEE 1588 precision time support */ - struct cyclecounter cycles; - struct nettimer clock; - struct nettime_compare compare; - struct hwtstamp_ctrl hwtstamp; -#endif + struct rx_ring *rx_rings; + int num_rx_desc; + u64 que_mask; + u32 rx_process_limit; /* Misc stats maintained by the driver */ - unsigned long dropped_pkts; - unsigned long mbuf_defrag_failed; - unsigned long mbuf_header_failed; - unsigned long mbuf_packet_failed; - unsigned long no_tx_map_avail; - unsigned long no_tx_dma_setup; - unsigned long watchdog_events; - unsigned long tso_tx; - unsigned long link_irq; + unsigned long dropped_pkts; + unsigned long mbuf_defrag_failed; + unsigned long mbuf_header_failed; + unsigned long mbuf_packet_failed; + unsigned long no_tx_map_avail; + unsigned long no_tx_dma_setup; + unsigned long watchdog_events; + unsigned long tso_tx; + unsigned long link_irq; - struct ixgbe_hw_stats stats; + struct ixgbe_hw_stats stats; }; /* Precision Time Sync (IEEE 1588) defines */ @@ -452,8 +458,8 @@ ixgbe_is_sfp(struct ixgbe_hw *hw) case ixgbe_phy_sfp_ftl: case ixgbe_phy_sfp_intel: case ixgbe_phy_sfp_unknown: - case ixgbe_phy_tw_tyco: - case ixgbe_phy_tw_unknown: + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: return TRUE; default: return FALSE; diff --git a/sys/dev/ixgbe/ixgbe_82598.c b/sys/dev/ixgbe/ixgbe_82598.c index 12711b0a9b4d..0570aa5c5884 100644 --- a/sys/dev/ixgbe/ixgbe_82598.c +++ b/sys/dev/ixgbe/ixgbe_82598.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -59,6 +59,7 @@ static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw, bool autoneg_wait_to_complete); static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw); s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw); +void ixgbe_enable_relaxed_ordering_82598(struct ixgbe_hw *hw); s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq); static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq); s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, @@ -164,6 +165,7 @@ s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw) /* MAC */ mac->ops.start_hw = &ixgbe_start_hw_82598; + mac->ops.enable_relaxed_ordering = &ixgbe_enable_relaxed_ordering_82598; mac->ops.reset_hw = &ixgbe_reset_hw_82598; mac->ops.get_media_type = &ixgbe_get_media_type_82598; mac->ops.get_supported_physical_layer = @@ -273,7 +275,8 @@ s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) * @hw: pointer to hardware structure * * Starts the hardware using the generic start_hw function. - * Then set pcie completion timeout + * Disables relaxed ordering Then set pcie completion timeout + * **/ s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) { @@ -287,17 +290,17 @@ s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) /* Disable relaxed ordering */ for (i = 0; ((i < hw->mac.max_tx_queues) && - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { + (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); regval &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN; IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval); } for (i = 0; ((i < hw->mac.max_rx_queues) && - (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { + (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); regval &= ~(IXGBE_DCA_RXCTRL_DESC_WRO_EN | - IXGBE_DCA_RXCTRL_DESC_HSRO_EN); + IXGBE_DCA_RXCTRL_DESC_HSRO_EN); IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); } @@ -439,15 +442,23 @@ s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num) DEBUGFUNC("ixgbe_fc_enable_82598"); /* - * On 82598 backplane having FC on causes resets while doing - * KX, so turn off here. + * On 82598 having Rx FC on causes resets while doing 1G + * so if it's on turn it off once we know link_speed. For + * more details see 82598 Specification update. */ hw->mac.ops.check_link(hw, &link_speed, &link_up, FALSE); - if (link_up && - link_speed == IXGBE_LINK_SPEED_1GB_FULL && - hw->mac.ops.get_media_type(hw) == ixgbe_media_type_backplane) { - hw->fc.disable_fc_autoneg = TRUE; - hw->fc.requested_mode = ixgbe_fc_none; + if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) { + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + hw->fc.requested_mode = ixgbe_fc_tx_pause; + break; + case ixgbe_fc_rx_pause: + hw->fc.requested_mode = ixgbe_fc_none; + break; + default: + /* no change */ + break; + } } /* Negotiate the fc mode to use */ @@ -842,12 +853,9 @@ static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) * Prevent the PCI-E bus from from hanging by disabling PCI-E master * access and verify no pending requests before reset */ - status = ixgbe_disable_pcie_master(hw); - if (status != IXGBE_SUCCESS) { - status = IXGBE_ERR_MASTER_REQUESTS_PENDING; - DEBUGOUT("PCI-E Master disable polling has failed.\n"); - } + ixgbe_disable_pcie_master(hw); +mac_reset_top: /* * Issue global reset to the MAC. This needs to be a SW reset. * If link reset is used, it might reset the MAC when mng is using it @@ -868,6 +876,19 @@ static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) DEBUGOUT("Reset polling failed to complete.\n"); } + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. We use 1usec since that is + * what is needed for ixgbe_disable_pcie_master(). The second reset + * then clears out any effects of those events. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + usec_delay(1); + goto mac_reset_top; + } + msec_delay(50); gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR); @@ -1299,3 +1320,32 @@ static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw) return IXGBE_SUCCESS; } +/** + * ixgbe_enable_relaxed_ordering_82598 - enable relaxed ordering + * @hw: pointer to hardware structure + * + **/ +void ixgbe_enable_relaxed_ordering_82598(struct ixgbe_hw *hw) +{ + u32 regval; + u32 i; + + DEBUGFUNC("ixgbe_enable_relaxed_ordering_82598"); + + /* Enable relaxed ordering */ + for (i = 0; ((i < hw->mac.max_tx_queues) && + (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); + regval |= IXGBE_DCA_TXCTRL_TX_WB_RO_EN; + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval); + } + + for (i = 0; ((i < hw->mac.max_rx_queues) && + (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + regval |= (IXGBE_DCA_RXCTRL_DESC_WRO_EN | + IXGBE_DCA_RXCTRL_DESC_HSRO_EN); + IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); + } + +} diff --git a/sys/dev/ixgbe/ixgbe_82599.c b/sys/dev/ixgbe/ixgbe_82599.c index 97b655a26ac7..8c5ff21200d1 100644 --- a/sys/dev/ixgbe/ixgbe_82599.c +++ b/sys/dev/ixgbe/ixgbe_82599.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -64,6 +64,7 @@ s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw); s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val); s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val); s32 ixgbe_start_hw_rev_1_82599(struct ixgbe_hw *hw); +void ixgbe_enable_relaxed_ordering_82599(struct ixgbe_hw *hw); s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw); s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw); u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw); @@ -267,6 +268,8 @@ s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, DEBUGFUNC("ixgbe_get_link_capabilities_82599"); + + /* * Determine link capabilities based on the stored value of AUTOC, * which represents EEPROM defaults. If AUTOC value has not @@ -878,7 +881,7 @@ static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) { s32 status = IXGBE_SUCCESS; - u32 ctrl, ctrl_ext; + u32 ctrl; u32 i; u32 autoc; u32 autoc2; @@ -913,12 +916,9 @@ s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) * Prevent the PCI-E bus from from hanging by disabling PCI-E master * access and verify no pending requests before reset */ - status = ixgbe_disable_pcie_master(hw); - if (status != IXGBE_SUCCESS) { - status = IXGBE_ERR_MASTER_REQUESTS_PENDING; - DEBUGOUT("PCI-E Master disable polling has failed.\n"); - } + ixgbe_disable_pcie_master(hw); +mac_reset_top: /* * Issue global reset to the MAC. This needs to be a SW reset. * If link reset is used, it might reset the MAC when mng is using it @@ -938,10 +938,19 @@ s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) status = IXGBE_ERR_RESET_FAILED; DEBUGOUT("Reset polling failed to complete.\n"); } - /* Clear PF Reset Done bit so PF/VF Mail Ops can work */ - ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); - ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; - IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); + + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. We use 1usec since that is + * what is needed for ixgbe_disable_pcie_master(). The second reset + * then clears out any effects of those events. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + usec_delay(1); + goto mac_reset_top; + } msec_delay(50); @@ -981,8 +990,6 @@ s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) hw->mac.num_rar_entries = 128; hw->mac.ops.init_rx_addrs(hw); - - /* Store the permanent SAN mac address */ hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); @@ -1207,6 +1214,9 @@ s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc) /* Send interrupt when 64 filters are left */ fdirctrl |= 4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT; + /* Initialize the drop queue to Rx queue 127 */ + fdirctrl |= (127 << IXGBE_FDIRCTRL_DROP_Q_SHIFT); + switch (pballoc) { case IXGBE_FDIR_PBALLOC_64K: /* 2k - 1 perfect filters */ @@ -1886,23 +1896,26 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, * ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter * @hw: pointer to hardware structure * @input: input bitstream + * @input_masks: masks for the input bitstream + * @soft_id: software index for the filters * @queue: queue index to direct traffic to * * Note that the caller to this function must lock before calling, since the * hardware writes must be protected from one another. **/ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, - struct ixgbe_atr_input *input, - u16 soft_id, - u8 queue) + struct ixgbe_atr_input *input, + struct ixgbe_atr_input_masks *input_masks, + u16 soft_id, u8 queue) { u32 fdircmd = 0; u32 fdirhash; - u32 src_ipv4, dst_ipv4; + u32 src_ipv4 = 0, dst_ipv4 = 0; u32 src_ipv6_1, src_ipv6_2, src_ipv6_3, src_ipv6_4; u16 src_port, dst_port, vlan_id, flex_bytes; u16 bucket_hash; u8 l4type; + u8 fdirm = 0; DEBUGFUNC("ixgbe_fdir_add_perfect_filter_82599"); @@ -1959,7 +1972,6 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, /* IPv4 */ ixgbe_atr_get_src_ipv4_82599(input, &src_ipv4); IXGBE_WRITE_REG(hw, IXGBE_FDIRIPSA, src_ipv4); - } ixgbe_atr_get_dst_ipv4_82599(input, &dst_ipv4); @@ -1968,7 +1980,78 @@ s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, (vlan_id | (flex_bytes << IXGBE_FDIRVLAN_FLEX_SHIFT))); IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, (src_port | - (dst_port << IXGBE_FDIRPORT_DESTINATION_SHIFT))); + (dst_port << IXGBE_FDIRPORT_DESTINATION_SHIFT))); + + /* + * Program the relevant mask registers. If src/dst_port or src/dst_addr + * are zero, then assume a full mask for that field. Also assume that + * a VLAN of 0 is unspecified, so mask that out as well. L4type + * cannot be masked out in this implementation. + * + * This also assumes IPv4 only. IPv6 masking isn't supported at this + * point in time. + */ + if (src_ipv4 == 0) + IXGBE_WRITE_REG(hw, IXGBE_FDIRSIP4M, 0xffffffff); + else + IXGBE_WRITE_REG(hw, IXGBE_FDIRSIP4M, input_masks->src_ip_mask); + + if (dst_ipv4 == 0) + IXGBE_WRITE_REG(hw, IXGBE_FDIRDIP4M, 0xffffffff); + else + IXGBE_WRITE_REG(hw, IXGBE_FDIRDIP4M, input_masks->dst_ip_mask); + + switch (l4type & IXGBE_ATR_L4TYPE_MASK) { + case IXGBE_ATR_L4TYPE_TCP: + if (src_port == 0) + IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, 0xffff); + else + IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, + input_masks->src_port_mask); + + if (dst_port == 0) + IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, + (IXGBE_READ_REG(hw, IXGBE_FDIRTCPM) | + (0xffff << 16))); + else + IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, + (IXGBE_READ_REG(hw, IXGBE_FDIRTCPM) | + (input_masks->dst_port_mask << 16))); + break; + case IXGBE_ATR_L4TYPE_UDP: + if (src_port == 0) + IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, 0xffff); + else + IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, + input_masks->src_port_mask); + + if (dst_port == 0) + IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, + (IXGBE_READ_REG(hw, IXGBE_FDIRUDPM) | + (0xffff << 16))); + else + IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, + (IXGBE_READ_REG(hw, IXGBE_FDIRUDPM) | + (input_masks->src_port_mask << 16))); + break; + default: + /* this already would have failed above */ + break; + } + + /* Program the last mask register, FDIRM */ + if (input_masks->vlan_id_mask || !vlan_id) + /* Mask both VLAN and VLANP - bits 0 and 1 */ + fdirm |= (IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP); + + if (input_masks->data_mask || !flex_bytes) + /* Flex bytes need masking, so mask the whole thing - bit 4 */ + fdirm |= IXGBE_FDIRM_FLEX; + + /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ + fdirm |= (IXGBE_FDIRM_POOL | IXGBE_FDIRM_DIPv6); + + IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); fdircmd |= IXGBE_FDIRCMD_CMD_ADD_FLOW; fdircmd |= IXGBE_FDIRCMD_FILTER_UPDATE; @@ -2063,7 +2146,7 @@ s32 ixgbe_start_hw_rev_1_82599(struct ixgbe_hw *hw) for (i = 0; i < hw->mac.max_rx_queues; i++) { regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); regval &= ~(IXGBE_DCA_RXCTRL_DESC_WRO_EN | - IXGBE_DCA_RXCTRL_DESC_HSRO_EN); + IXGBE_DCA_RXCTRL_DESC_HSRO_EN); IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); } @@ -2192,10 +2275,14 @@ u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw) goto out; switch (hw->phy.type) { - case ixgbe_phy_tw_tyco: - case ixgbe_phy_tw_unknown: + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; break; + case ixgbe_phy_sfp_ftl_active: + case ixgbe_phy_sfp_active_unknown: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA; + break; case ixgbe_phy_sfp_avago: case ixgbe_phy_sfp_ftl: case ixgbe_phy_sfp_intel: @@ -2328,3 +2415,30 @@ static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) fw_version_out: return status; } +/** + * ixgbe_enable_relaxed_ordering_82599 - Enable relaxed ordering + * @hw: pointer to hardware structure + * + **/ +void ixgbe_enable_relaxed_ordering_82599(struct ixgbe_hw *hw) +{ + u32 regval; + u32 i; + + DEBUGFUNC("ixgbe_enable_relaxed_ordering_82599"); + + /* Enable relaxed ordering */ + for (i = 0; i < hw->mac.max_tx_queues; i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); + regval |= IXGBE_DCA_TXCTRL_TX_WB_RO_EN; + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval); + } + + for (i = 0; i < hw->mac.max_rx_queues; i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + regval |= (IXGBE_DCA_RXCTRL_DESC_WRO_EN | + IXGBE_DCA_RXCTRL_DESC_HSRO_EN); + IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); + } + +} diff --git a/sys/dev/ixgbe/ixgbe_api.c b/sys/dev/ixgbe/ixgbe_api.c index 44644574860c..a65686e29f21 100644 --- a/sys/dev/ixgbe/ixgbe_api.c +++ b/sys/dev/ixgbe/ixgbe_api.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -111,6 +111,7 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw) case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: case IXGBE_DEV_ID_82599_SFP: case IXGBE_DEV_ID_82599_CX4: + case IXGBE_DEV_ID_82599_T3_LOM: hw->mac.type = ixgbe_mac_82599EB; break; default: @@ -167,6 +168,20 @@ s32 ixgbe_start_hw(struct ixgbe_hw *hw) IXGBE_NOT_IMPLEMENTED); } +/** + * ixgbe_enable_relaxed_ordering - Enables tx relaxed ordering, + * which is disabled by default in ixgbe_start_hw(); + * + * @hw: pointer to hardware structure + * + * Enable relaxed ordering; + **/ +void ixgbe_enable_relaxed_ordering(struct ixgbe_hw *hw) +{ + if (hw->mac.ops.enable_relaxed_ordering) + hw->mac.ops.enable_relaxed_ordering(hw); +} + /** * ixgbe_clear_hw_cntrs - Clear hardware counters * @hw: pointer to hardware structure diff --git a/sys/dev/ixgbe/ixgbe_api.h b/sys/dev/ixgbe/ixgbe_api.h index 8ab78ad8720c..48c523c18970 100644 --- a/sys/dev/ixgbe/ixgbe_api.h +++ b/sys/dev/ixgbe/ixgbe_api.h @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw); s32 ixgbe_init_hw(struct ixgbe_hw *hw); s32 ixgbe_reset_hw(struct ixgbe_hw *hw); s32 ixgbe_start_hw(struct ixgbe_hw *hw); +void ixgbe_enable_relaxed_ordering(struct ixgbe_hw *hw); s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw); enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw); s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr); @@ -122,6 +123,7 @@ s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, u8 queue); s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, struct ixgbe_atr_input *input, + struct ixgbe_atr_input_masks *masks, u16 soft_id, u8 queue); u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *input, u32 key); diff --git a/sys/dev/ixgbe/ixgbe_common.c b/sys/dev/ixgbe/ixgbe_common.c index 89e57d83dabb..217c477a6c6f 100644 --- a/sys/dev/ixgbe/ixgbe_common.c +++ b/sys/dev/ixgbe/ixgbe_common.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -474,8 +474,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) * Prevent the PCI-E bus from from hanging by disabling PCI-E master * access and verify no pending requests */ - if (ixgbe_disable_pcie_master(hw) != IXGBE_SUCCESS) - DEBUGOUT("PCI-E Master disable polling has failed.\n"); + ixgbe_disable_pcie_master(hw); return IXGBE_SUCCESS; } @@ -2198,10 +2197,14 @@ s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) u32 i; u32 reg_val; u32 number_of_queues; - s32 status = IXGBE_ERR_MASTER_REQUESTS_PENDING; + s32 status = IXGBE_SUCCESS; DEBUGFUNC("ixgbe_disable_pcie_master"); + /* Just jump out if bus mastering is already disabled */ + if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) + goto out; + /* Disable the receive unit by stopping each queue */ number_of_queues = hw->mac.max_rx_queues; for (i = 0; i < number_of_queues; i++) { @@ -2217,13 +2220,42 @@ s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_CTRL, reg_val); for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { - if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) { - status = IXGBE_SUCCESS; - break; - } + if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) + goto out; usec_delay(100); } + DEBUGOUT("GIO Master Disable bit didn't clear - requesting resets\n"); + status = IXGBE_ERR_MASTER_REQUESTS_PENDING; + + /* + * The GIO Master Disable bit didn't clear. There are multiple reasons + * for this listed in the datasheet 5.2.5.3.2 Master Disable, and they + * all require a double reset to recover from. Before proceeding, we + * first wait a little more to try to ensure that, at a minimum, the + * PCIe block has no transactions pending. + */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + if (!(IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_STATUS) & + IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING)) + break; + usec_delay(100); + } + + if (i == IXGBE_PCI_MASTER_DISABLE_TIMEOUT) + DEBUGOUT("PCIe transaction pending bit also did not clear.\n"); + + /* + * Two consecutive resets are required via CTRL.RST per datasheet + * 5.2.5.3.2 Master Disable. We set a flag to inform the reset routine + * of this need. The first reset prevents new master requests from + * being issued by our device. We then must wait 1usec for any + * remaining completions from the PCIe bus to trickle in, and then reset + * again to clear out any effects they may have had on our device. + */ + hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + +out: return status; } @@ -2695,6 +2727,10 @@ s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan) u32 first_empty_slot = 0; s32 regindex; + /* short cut the special case */ + if (vlan == 0) + return 0; + /* * Search for the vlan id in the VLVF entries. Save off the first empty * slot found along the way @@ -2717,7 +2753,7 @@ s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan) regindex = first_empty_slot; else { DEBUGOUT("No space in VLVF.\n"); - regindex = -1; + regindex = IXGBE_ERR_NO_SPACE; } } @@ -2738,8 +2774,11 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, { s32 regindex; u32 bitindex; + u32 vfta; u32 bits; u32 vt; + u32 targetbit; + bool vfta_changed = FALSE; DEBUGFUNC("ixgbe_set_vfta_generic"); @@ -2749,6 +2788,7 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, /* * this is a 2 part operation - first the VFTA, then the * VLVF and VLVFB if VT Mode is set + * We don't write the VFTA until we know the VLVF part succeeded. */ /* Part 1 @@ -2759,13 +2799,20 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, */ regindex = (vlan >> 5) & 0x7F; bitindex = vlan & 0x1F; - bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); - if (vlan_on) - bits |= (1 << bitindex); - else - bits &= ~(1 << bitindex); - IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits); + targetbit = (1 << bitindex); + vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); + if (vlan_on) { + if (!(vfta & targetbit)) { + vfta |= targetbit; + vfta_changed = TRUE; + } + } else { + if ((vfta & targetbit)) { + vfta &= ~targetbit; + vfta_changed = TRUE; + } + } /* Part 2 * If VT Mode is set @@ -2777,61 +2824,84 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, */ vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL); if (vt & IXGBE_VT_CTL_VT_ENABLE) { - if (vlan == 0) { - regindex = 0; - } else { - regindex = ixgbe_find_vlvf_slot(hw, vlan); - if (regindex < 0) - goto out; - } + s32 vlvf_index; + + vlvf_index = ixgbe_find_vlvf_slot(hw, vlan); + if (vlvf_index < 0) + return vlvf_index; if (vlan_on) { /* set the pool bit */ if (vind < 32) { bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB(regindex*2)); + IXGBE_VLVFB(vlvf_index*2)); bits |= (1 << vind); IXGBE_WRITE_REG(hw, - IXGBE_VLVFB(regindex*2), + IXGBE_VLVFB(vlvf_index*2), bits); } else { bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB((regindex*2)+1)); - bits |= (1 << vind); + IXGBE_VLVFB((vlvf_index*2)+1)); + bits |= (1 << (vind-32)); IXGBE_WRITE_REG(hw, - IXGBE_VLVFB((regindex*2)+1), + IXGBE_VLVFB((vlvf_index*2)+1), bits); } } else { /* clear the pool bit */ if (vind < 32) { bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB(regindex*2)); + IXGBE_VLVFB(vlvf_index*2)); bits &= ~(1 << vind); IXGBE_WRITE_REG(hw, - IXGBE_VLVFB(regindex*2), + IXGBE_VLVFB(vlvf_index*2), bits); bits |= IXGBE_READ_REG(hw, - IXGBE_VLVFB((regindex*2)+1)); + IXGBE_VLVFB((vlvf_index*2)+1)); } else { bits = IXGBE_READ_REG(hw, - IXGBE_VLVFB((regindex*2)+1)); - bits &= ~(1 << vind); + IXGBE_VLVFB((vlvf_index*2)+1)); + bits &= ~(1 << (vind-32)); IXGBE_WRITE_REG(hw, - IXGBE_VLVFB((regindex*2)+1), + IXGBE_VLVFB((vlvf_index*2)+1), bits); bits |= IXGBE_READ_REG(hw, - IXGBE_VLVFB(regindex*2)); + IXGBE_VLVFB(vlvf_index*2)); } } - if (bits) - IXGBE_WRITE_REG(hw, IXGBE_VLVF(regindex), + /* + * If there are still bits set in the VLVFB registers + * for the VLAN ID indicated we need to see if the + * caller is requesting that we clear the VFTA entry bit. + * If the caller has requested that we clear the VFTA + * entry bit but there are still pools/VFs using this VLAN + * ID entry then ignore the request. We're not worried + * about the case where we're turning the VFTA VLAN ID + * entry bit on, only when requested to turn it off as + * there may be multiple pools and/or VFs using the + * VLAN ID entry. In that case we cannot clear the + * VFTA bit until all pools/VFs using that VLAN ID have also + * been cleared. This will be indicated by "bits" being + * zero. + */ + if (bits) { + IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), (IXGBE_VLVF_VIEN | vlan)); + if (!vlan_on) { + /* someone wants to clear the vfta entry + * but some pools/VFs are still using it. + * Ignore it. */ + vfta_changed = FALSE; + } + } else - IXGBE_WRITE_REG(hw, IXGBE_VLVF(regindex), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0); } -out: + + if (vfta_changed) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta); + return IXGBE_SUCCESS; } @@ -2869,14 +2939,23 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) * Reads the links register to determine if link is up and the current speed **/ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, - bool *link_up, bool link_up_wait_to_complete) + bool *link_up, bool link_up_wait_to_complete) { - u32 links_reg; + u32 links_reg, links_orig; u32 i; DEBUGFUNC("ixgbe_check_mac_link_generic"); + /* clear the old state */ + links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS); + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + + if (links_orig != links_reg) { + DEBUGOUT2("LINKS changed from %08X to %08X\n", + links_orig, links_reg); + } + if (link_up_wait_to_complete) { for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { if (links_reg & IXGBE_LINKS_UP) { diff --git a/sys/dev/ixgbe/ixgbe_phy.c b/sys/dev/ixgbe/ixgbe_phy.c index 9bab98d29522..7ec2981d4c24 100644 --- a/sys/dev/ixgbe/ixgbe_phy.c +++ b/sys/dev/ixgbe/ixgbe_phy.c @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -77,7 +77,8 @@ s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw) phy->ops.i2c_bus_clear = &ixgbe_i2c_bus_clear; phy->ops.identify_sfp = &ixgbe_identify_sfp_module_generic; phy->sfp_type = ixgbe_sfp_type_unknown; - + phy->ops.check_overtemp = &ixgbe_tn_check_overtemp; + phy->ops.set_low_power_state = &ixgbe_tn_set_low_power_state; return IXGBE_SUCCESS; } @@ -241,13 +242,19 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) IXGBE_MDIO_PHY_XS_DEV_TYPE, IXGBE_MDIO_PHY_XS_RESET); - /* Poll for reset bit to self-clear indicating reset is complete */ - for (i = 0; i < 500; i++) { - msec_delay(1); + /* + * Poll for reset bit to self-clear indicating reset is complete. + * Some PHYs could take up to 3 seconds to complete and need about + * 1.7 usec delay after the reset is complete. + */ + for (i = 0; i < 30; i++) { + msec_delay(100); hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl); - if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) + if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) { + usec_delay(2); break; + } } if (ctrl & IXGBE_MDIO_PHY_XS_RESET) { @@ -922,6 +929,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) u8 comp_codes_10g = 0; u8 oui_bytes[3] = {0, 0, 0}; u8 cable_tech = 0; + u8 cable_spec = 0; u16 enforce_sfp = 0; DEBUGFUNC("ixgbe_identify_sfp_module_generic"); @@ -968,6 +976,8 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) * 4 SFP_DA_CORE1 - 82599-specific * 5 SFP_SR/LR_CORE0 - 82599-specific * 6 SFP_SR/LR_CORE1 - 82599-specific + * 7 SFP_act_lmt_DA_CORE0 - 82599-specific + * 8 SFP_act_lmt_DA_CORE1 - 82599-specific */ if (hw->mac.type == ixgbe_mac_82598EB) { if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) @@ -979,29 +989,40 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) else hw->phy.sfp_type = ixgbe_sfp_type_unknown; } else if (hw->mac.type == ixgbe_mac_82599EB) { - if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) { if (hw->bus.lan_id == 0) hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core0; else hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core1; - else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE) + } else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) { + hw->phy.ops.read_i2c_eeprom( + hw, IXGBE_SFF_CABLE_SPEC_COMP, + &cable_spec); + if (cable_spec & + IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_da_act_lmt_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_da_act_lmt_core1; + } else + hw->phy.sfp_type = + ixgbe_sfp_type_unknown; + } else if (comp_codes_10g & + (IXGBE_SFF_10GBASESR_CAPABLE | + IXGBE_SFF_10GBASELR_CAPABLE)) { if (hw->bus.lan_id == 0) hw->phy.sfp_type = ixgbe_sfp_type_srlr_core0; else hw->phy.sfp_type = ixgbe_sfp_type_srlr_core1; - else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE) - if (hw->bus.lan_id == 0) - hw->phy.sfp_type = - ixgbe_sfp_type_srlr_core0; - else - hw->phy.sfp_type = - ixgbe_sfp_type_srlr_core1; - else + } else { hw->phy.sfp_type = ixgbe_sfp_type_unknown; + } } if (hw->phy.sfp_type != stored_sfp_type) @@ -1036,10 +1057,14 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) switch (vendor_oui) { case IXGBE_SFF_VENDOR_OUI_TYCO: if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) - hw->phy.type = ixgbe_phy_tw_tyco; + hw->phy.type = + ixgbe_phy_sfp_passive_tyco; break; case IXGBE_SFF_VENDOR_OUI_FTL: - hw->phy.type = ixgbe_phy_sfp_ftl; + if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) + hw->phy.type = ixgbe_phy_sfp_ftl_active; + else + hw->phy.type = ixgbe_phy_sfp_ftl; break; case IXGBE_SFF_VENDOR_OUI_AVAGO: hw->phy.type = ixgbe_phy_sfp_avago; @@ -1049,15 +1074,20 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) break; default: if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) - hw->phy.type = ixgbe_phy_tw_unknown; + hw->phy.type = + ixgbe_phy_sfp_passive_unknown; + else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) + hw->phy.type = + ixgbe_phy_sfp_active_unknown; else hw->phy.type = ixgbe_phy_sfp_unknown; break; } } - /* All passive DA cables are supported */ - if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) { + /* Allow any DA cable vendor */ + if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE | + IXGBE_SFF_DA_ACTIVE_CABLE)) { status = IXGBE_SUCCESS; goto out; } @@ -1108,6 +1138,7 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 *data_offset) { u16 sfp_id; + u16 sfp_type = hw->phy.sfp_type; DEBUGFUNC("ixgbe_get_sfp_init_sequence_offsets"); @@ -1121,6 +1152,12 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, (hw->phy.sfp_type == ixgbe_sfp_type_da_cu)) return IXGBE_ERR_SFP_NOT_SUPPORTED; + /* Limiting active cables must be initialized as SR modules */ + if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0) + sfp_type = ixgbe_sfp_type_srlr_core0; + else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1) + sfp_type = ixgbe_sfp_type_srlr_core1; + /* Read offset to PHY init contents */ hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset); @@ -1137,7 +1174,7 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, hw->eeprom.ops.read(hw, *list_offset, &sfp_id); while (sfp_id != IXGBE_PHY_INIT_END_NL) { - if (sfp_id == hw->phy.sfp_type) { + if (sfp_id == sfp_type) { (*list_offset)++; hw->eeprom.ops.read(hw, *list_offset, data_offset); if ((!*data_offset) || (*data_offset == 0xFFFF)) { @@ -1722,3 +1759,56 @@ void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw) /* Put the i2c bus back to default state */ ixgbe_i2c_stop(hw); } + +/** + * ixgbe_check_overtemp - Checks if an overtemp occured. + * @hw: pointer to hardware structure + * + * Checks if the LASI temp alarm status was triggered due to overtemp + **/ +s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_SUCCESS; + u16 phy_data = 0; + + DEBUGFUNC("ixgbe_tn_check_overtemp"); + + if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM) + goto out; + + /* Check that the LASI temp alarm status was triggered */ + hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_data); + + if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM)) + goto out; + + status = IXGBE_ERR_OVERTEMP; +out: + return status; +} + + +/** + * ixgbe_set_tn_low_power_state - Sets the teranetics phy into low power state + * @hw: pointer to hardware structure + * + * Sets the phy into low power mode when LASI temp alarm status is triggered + **/ +s32 ixgbe_tn_set_low_power_state(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_SUCCESS; + u16 phy_data = 0; + + DEBUGFUNC("ixgbe_set_tn_low_power_state"); + + /* Set the phy into low power mode */ + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_PMD_CONTROL_ADDR, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_data); + phy_data |= IXGBE_MDIO_PHY_LOW_POWER_MODE; + hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_PMD_CONTROL_ADDR, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, phy_data); + + return status; +} + diff --git a/sys/dev/ixgbe/ixgbe_phy.h b/sys/dev/ixgbe/ixgbe_phy.h index 39f3bc83f0a4..8f49aa83e8f3 100644 --- a/sys/dev/ixgbe/ixgbe_phy.h +++ b/sys/dev/ixgbe/ixgbe_phy.h @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -47,9 +47,12 @@ #define IXGBE_SFF_1GBE_COMP_CODES 0x6 #define IXGBE_SFF_10GBE_COMP_CODES 0x3 #define IXGBE_SFF_CABLE_TECHNOLOGY 0x8 +#define IXGBE_SFF_CABLE_SPEC_COMP 0x3C /* Bitmasks */ #define IXGBE_SFF_DA_PASSIVE_CABLE 0x4 +#define IXGBE_SFF_DA_ACTIVE_CABLE 0x8 +#define IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING 0x4 #define IXGBE_SFF_1GBASESX_CAPABLE 0x1 #define IXGBE_SFF_1GBASELX_CAPABLE 0x2 #define IXGBE_SFF_10GBASESR_CAPABLE 0x10 @@ -84,6 +87,9 @@ #define IXGBE_I2C_T_SU_STO 4 #define IXGBE_I2C_T_BUF 5 +#define IXGBE_TN_LASI_STATUS_REG 0x9005 +#define IXGBE_TN_LASI_STATUS_TEMP_ALARM 0x0008 + s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw); bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr); @@ -119,6 +125,8 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 *list_offset, u16 *data_offset); +s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); +s32 ixgbe_tn_set_low_power_state(struct ixgbe_hw *hw); s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, u8 *data); s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, diff --git a/sys/dev/ixgbe/ixgbe_type.h b/sys/dev/ixgbe/ixgbe_type.h index 0b1011977094..2e1f0623b42d 100644 --- a/sys/dev/ixgbe/ixgbe_type.h +++ b/sys/dev/ixgbe/ixgbe_type.h @@ -1,6 +1,6 @@ /****************************************************************************** - Copyright (c) 2001-2009, Intel Corporation + Copyright (c) 2001-2010, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without @@ -57,9 +57,11 @@ #define IXGBE_DEV_ID_82599_KX4 0x10F7 #define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 #define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 +#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C #define IXGBE_DEV_ID_82599_CX4 0x10F9 #define IXGBE_DEV_ID_82599_SFP 0x10FB #define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC +#define IXGBE_DEV_ID_82599_T3_LOM 0x151C /* General Registers */ #define IXGBE_CTRL 0x00000 @@ -89,7 +91,7 @@ /* General Receive Control */ #define IXGBE_GRC_MNG 0x00000001 /* Manageability Enable */ -#define IXGBE_GRC_APME 0x00000002 /* Advanced Power Management Enable */ +#define IXGBE_GRC_APME 0x00000002 /* APM enabled in EEPROM */ #define IXGBE_VPDDIAG0 0x10204 #define IXGBE_VPDDIAG1 0x10208 @@ -198,6 +200,7 @@ #define IXGBE_RFCTL 0x05008 #define IXGBE_DRECCCTL 0x02F08 #define IXGBE_DRECCCTL_DISABLE 0 + /* Multicast Table Array - 128 entries */ #define IXGBE_MTA(_i) (0x05200 + ((_i) * 4)) #define IXGBE_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ @@ -334,7 +337,7 @@ /* Wake Up Control */ #define IXGBE_WUC_PME_EN 0x00000002 /* PME Enable */ #define IXGBE_WUC_PME_STATUS 0x00000004 /* PME Status */ -#define IXGBE_WUC_ADVD3WUC 0x00000010 /* D3Cold wake up cap. enable*/ +#define IXGBE_WUC_WKEN 0x00000010 /* Enable PE_WAKE_N pin assertion */ /* Wake Up Filter Control */ #define IXGBE_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ @@ -736,6 +739,12 @@ #define IXGBE_GCR_CMPL_TMOUT_RESEND 0x00010000 #define IXGBE_GCR_CAP_VER2 0x00040000 +#define IXGBE_GCR_EXT_MSIX_EN 0x80000000 +#define IXGBE_GCR_EXT_VT_MODE_16 0x00000001 +#define IXGBE_GCR_EXT_VT_MODE_32 0x00000002 +#define IXGBE_GCR_EXT_VT_MODE_64 0x00000003 +#define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \ + IXGBE_GCR_EXT_VT_MODE_64) /* Time Sync Registers */ #define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ #define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ @@ -889,6 +898,8 @@ #define IXGBE_RDRXCTL_AGGDIS 0x00010000 /* Aggregation disable */ #define IXGBE_RDRXCTL_RSCFRSTSIZE 0x003E0000 /* RSC First packet size */ #define IXGBE_RDRXCTL_RSCLLIDIS 0x00800000 /* Disable RSC compl on LLI */ +#define IXGBE_RDRXCTL_RSCACKC 0x02000000 /* must set 1 when RSC enabled */ +#define IXGBE_RDRXCTL_FCOE_WRFIX 0x04000000 /* must set 1 when RSC enabled */ /* RQTC Bit Masks and Shifts */ #define IXGBE_RQTC_SHIFT_TC(_i) ((_i) * 4) @@ -1020,7 +1031,9 @@ #define IXGBE_MDIO_PHY_10GBASET_ABILITY 0x0004 /* 10GBaseT capable */ #define IXGBE_MDIO_PHY_1000BASET_ABILITY 0x0020 /* 1000BaseT capable */ #define IXGBE_MDIO_PHY_100BASETX_ABILITY 0x0080 /* 100BaseTX capable */ +#define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE 0x0800 /* Set low power mode */ +#define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR 0x0000 /* PMA/PMD Control Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT 0xC30C /* PHY_XS SDA/SCL Status Reg */ @@ -1369,10 +1382,12 @@ * EAPOL 802.1x (0x888e): Filter 0 * FCoE (0x8906): Filter 2 * 1588 (0x88f7): Filter 3 + * FIP (0x8914): Filter 4 */ #define IXGBE_ETQF_FILTER_EAPOL 0 #define IXGBE_ETQF_FILTER_FCOE 2 #define IXGBE_ETQF_FILTER_1588 3 +#define IXGBE_ETQF_FILTER_FIP 4 /* VLAN Control Bit Masks */ #define IXGBE_VLNCTRL_VET 0x0000FFFF /* bits 0-15 */ #define IXGBE_VLNCTRL_CFI 0x10000000 /* bit 28 */ @@ -1476,6 +1491,7 @@ #define IXGBE_AUTOC2_10G_XFI (0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) #define IXGBE_AUTOC2_10G_SFI (0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) + /* LINKS Bit Masks */ #define IXGBE_LINKS_KX_AN_COMP 0x80000000 #define IXGBE_LINKS_UP 0x40000000 @@ -1655,6 +1671,8 @@ #define IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN 0x1 /* Alt. WWN base exists */ /* PCI Bus Info */ +#define IXGBE_PCI_DEVICE_STATUS 0xAA +#define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING 0x0020 #define IXGBE_PCI_LINK_STATUS 0xB2 #define IXGBE_PCI_DEVICE_CONTROL2 0xC8 #define IXGBE_PCI_LINK_WIDTH 0x3F0 @@ -1787,6 +1805,7 @@ #define IXGBE_MTQC_64Q_1PB 0x0 /* 64 queues 1 pack buffer */ #define IXGBE_MTQC_32VF 0x8 /* 4 TX Queues per pool w/32VF's */ #define IXGBE_MTQC_64VF 0x4 /* 2 TX Queues per pool w/64VF's */ +#define IXGBE_MTQC_4TC_4TQ 0x8 /* 4 TC if RT_ENA and VT_ENA */ #define IXGBE_MTQC_8TC_8TQ 0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */ /* Receive Descriptor bit definitions */ @@ -2000,10 +2019,9 @@ enum ixgbe_fdir_pballoc_type { #define IXGBE_FDIRM_VLANID 0x00000001 #define IXGBE_FDIRM_VLANP 0x00000002 #define IXGBE_FDIRM_POOL 0x00000004 -#define IXGBE_FDIRM_L3P 0x00000008 -#define IXGBE_FDIRM_L4P 0x00000010 -#define IXGBE_FDIRM_FLEX 0x00000020 -#define IXGBE_FDIRM_DIPv6 0x00000040 +#define IXGBE_FDIRM_L4P 0x00000008 +#define IXGBE_FDIRM_FLEX 0x00000010 +#define IXGBE_FDIRM_DIPv6 0x00000020 #define IXGBE_FDIRFREE_FREE_MASK 0xFFFF #define IXGBE_FDIRFREE_FREE_SHIFT 0 @@ -2218,6 +2236,8 @@ typedef u32 ixgbe_physical_layer; #define IXGBE_PHYSICAL_LAYER_1000BASE_BX 0x0400 #define IXGBE_PHYSICAL_LAYER_10GBASE_KR 0x0800 #define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 +#define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000 + /* Software ATR hash keys */ #define IXGBE_ATR_BUCKET_HASH_KEY 0xE214AD3D @@ -2258,6 +2278,15 @@ struct ixgbe_atr_input { u8 byte_stream[42]; }; +struct ixgbe_atr_input_masks { + u32 src_ip_mask; + u32 dst_ip_mask; + u16 src_port_mask; + u16 dst_port_mask; + u16 vlan_id_mask; + u16 data_mask; +}; + enum ixgbe_eeprom_type { ixgbe_eeprom_uninitialized = 0, ixgbe_eeprom_spi, @@ -2281,10 +2310,12 @@ enum ixgbe_phy_type { ixgbe_phy_qt, ixgbe_phy_xaui, ixgbe_phy_nl, - ixgbe_phy_tw_tyco, - ixgbe_phy_tw_unknown, + ixgbe_phy_sfp_passive_tyco, + ixgbe_phy_sfp_passive_unknown, + ixgbe_phy_sfp_active_unknown, ixgbe_phy_sfp_avago, ixgbe_phy_sfp_ftl, + ixgbe_phy_sfp_ftl_active, ixgbe_phy_sfp_unknown, ixgbe_phy_sfp_intel, ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/ @@ -2312,6 +2343,8 @@ enum ixgbe_sfp_type { ixgbe_sfp_type_da_cu_core1 = 4, ixgbe_sfp_type_srlr_core0 = 5, ixgbe_sfp_type_srlr_core1 = 6, + ixgbe_sfp_type_da_act_lmt_core0 = 7, + ixgbe_sfp_type_da_act_lmt_core1 = 8, ixgbe_sfp_type_not_present = 0xFFFE, ixgbe_sfp_type_unknown = 0xFFFF }; @@ -2354,25 +2387,25 @@ enum ixgbe_bus_type { /* PCI bus speeds */ enum ixgbe_bus_speed { ixgbe_bus_speed_unknown = 0, - ixgbe_bus_speed_33, - ixgbe_bus_speed_66, - ixgbe_bus_speed_100, - ixgbe_bus_speed_120, - ixgbe_bus_speed_133, - ixgbe_bus_speed_2500, - ixgbe_bus_speed_5000, + ixgbe_bus_speed_33 = 33, + ixgbe_bus_speed_66 = 66, + ixgbe_bus_speed_100 = 100, + ixgbe_bus_speed_120 = 120, + ixgbe_bus_speed_133 = 133, + ixgbe_bus_speed_2500 = 2500, + ixgbe_bus_speed_5000 = 5000, ixgbe_bus_speed_reserved }; /* PCI bus widths */ enum ixgbe_bus_width { ixgbe_bus_width_unknown = 0, - ixgbe_bus_width_pcie_x1, - ixgbe_bus_width_pcie_x2, + ixgbe_bus_width_pcie_x1 = 1, + ixgbe_bus_width_pcie_x2 = 2, ixgbe_bus_width_pcie_x4 = 4, ixgbe_bus_width_pcie_x8 = 8, - ixgbe_bus_width_32, - ixgbe_bus_width_64, + ixgbe_bus_width_32 = 32, + ixgbe_bus_width_64 = 64, ixgbe_bus_width_reserved }; @@ -2503,6 +2536,7 @@ struct ixgbe_mac_operations { s32 (*reset_hw)(struct ixgbe_hw *); s32 (*start_hw)(struct ixgbe_hw *); s32 (*clear_hw_cntrs)(struct ixgbe_hw *); + void (*enable_relaxed_ordering)(struct ixgbe_hw *); enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *); u32 (*get_supported_physical_layer)(struct ixgbe_hw *); s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *); @@ -2570,6 +2604,8 @@ struct ixgbe_phy_operations { s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); void (*i2c_bus_clear)(struct ixgbe_hw *); + s32 (*check_overtemp)(struct ixgbe_hw *); + s32 (*set_low_power_state)(struct ixgbe_hw *); }; struct ixgbe_eeprom_info { @@ -2580,6 +2616,7 @@ struct ixgbe_eeprom_info { u16 address_bits; }; +#define IXGBE_FLAGS_DOUBLE_RESET_REQUIRED 0x01 struct ixgbe_mac_info { struct ixgbe_mac_operations ops; enum ixgbe_mac_type type; @@ -2603,6 +2640,7 @@ struct ixgbe_mac_info { u32 orig_autoc2; bool orig_link_settings_stored; bool autotry_restart; + u8 flags; }; struct ixgbe_phy_info { @@ -2668,6 +2706,8 @@ struct ixgbe_hw { #define IXGBE_ERR_NO_SAN_ADDR_PTR -22 #define IXGBE_ERR_FDIR_REINIT_FAILED -23 #define IXGBE_ERR_EEPROM_VERSION -24 +#define IXGBE_ERR_NO_SPACE -25 +#define IXGBE_ERR_OVERTEMP -26 #define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF diff --git a/sys/modules/ixgbe/Makefile b/sys/modules/ixgbe/Makefile index 844ac7c5e55d..2de7549a581b 100644 --- a/sys/modules/ixgbe/Makefile +++ b/sys/modules/ixgbe/Makefile @@ -6,7 +6,7 @@ SRCS += ixgbe.c # Shared source SRCS += ixgbe_common.c ixgbe_api.c ixgbe_phy.c SRCS += ixgbe_82599.c ixgbe_82598.c -CFLAGS+= -I${.CURDIR}/../../dev/ixgbe -DSMP +CFLAGS+= -I${.CURDIR}/../../dev/ixgbe -DSMP -DIXGBE_FDIR clean: rm -f device_if.h bus_if.h pci_if.h setdef* *_StripErr