Performance enhancements to reduce CPU utililization for large number of

TCP connections (order of tens of thousands), with predominantly Transmits.

Choice to perform receive operations either in IThread or Taskqueue Thread.

Submitted by:Vaishali.Kulkarni@cavium.com
MFC after:5 days
This commit is contained in:
David C Somayajulu 2017-08-11 17:43:25 +00:00
parent 9de5f67de2
commit 45f1312387
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=322408
4 changed files with 488 additions and 126 deletions

View File

@ -50,9 +50,10 @@ struct qlnx_ivec {
typedef struct qlnx_ivec qlnx_ivec_t;
//#define QLNX_MAX_RSS 30
#define QLNX_MAX_RSS 16
#define QLNX_MAX_TC 1
//#define QLNX_MAX_RSS 30
#define QLNX_MAX_RSS 36
#define QLNX_DEFAULT_RSS 16
#define QLNX_MAX_TC 1
enum QLNX_STATE {
QLNX_STATE_CLOSED,
@ -201,6 +202,17 @@ struct qlnx_fastpath {
uint64_t tx_pkts_freed;
uint64_t tx_pkts_transmitted;
uint64_t tx_pkts_completed;
uint64_t tx_tso_pkts;
uint64_t tx_non_tso_pkts;
#ifdef QLNX_TRACE_PERF_DATA
uint64_t tx_pkts_trans_ctx;
uint64_t tx_pkts_compl_ctx;
uint64_t tx_pkts_trans_fp;
uint64_t tx_pkts_compl_fp;
uint64_t tx_pkts_compl_intr;
#endif
uint64_t tx_lso_wnd_min_len;
uint64_t tx_defrag;
uint64_t tx_nsegs_gt_elem_left;
@ -209,6 +221,13 @@ struct qlnx_fastpath {
uint32_t tx_tso_max_pkt_len;
uint32_t tx_tso_min_pkt_len;
uint64_t tx_pkts[QLNX_FP_MAX_SEGS];
#ifdef QLNX_TRACE_PERF_DATA
uint64_t tx_pkts_hist[QLNX_FP_MAX_SEGS];
uint64_t tx_comInt[QLNX_FP_MAX_SEGS];
uint64_t tx_pkts_q[QLNX_FP_MAX_SEGS];
#endif
uint64_t err_tx_nsegs_gt_elem_left;
uint64_t err_tx_dmamap_create;
uint64_t err_tx_defrag_dmamap_load;
@ -301,7 +320,12 @@ typedef struct qlnx_link_output qlnx_link_output_t;
#define QLNX_MFW_VERSION_LENGTH 32
#define QLNX_STORMFW_VERSION_LENGTH 32
#define QLNX_TX_ELEM_RESERVE 2
#define QLNX_TX_ELEM_RESERVE 2
#define QLNX_TX_ELEM_THRESH 128
#define QLNX_TX_ELEM_MAX_THRESH 512
#define QLNX_TX_ELEM_MIN_THRESH 32
#define QLNX_TX_COMPL_THRESH 32
#define QLNX_TPA_MAX_AGG_BUFFERS (20)
@ -454,6 +478,7 @@ struct qlnx_host {
qlnx_storm_stats_t storm_stats[QLNX_STORM_STATS_TOTAL];
uint32_t storm_stats_index;
uint32_t storm_stats_enable;
uint32_t storm_stats_gather;
uint32_t personality;
};
@ -470,7 +495,10 @@ typedef struct qlnx_host qlnx_host_t;
#define QLNX_MAX_MTU 9000
#define QLNX_MAX_SEGMENTS_NON_TSO (ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1)
#define QLNX_MAX_TSO_FRAME_SIZE ((64 * 1024 - 1) + 22)
//#define QLNX_MAX_TSO_FRAME_SIZE ((64 * 1024 - 1) + 22)
#define QLNX_MAX_TSO_FRAME_SIZE 65536
#define QLNX_MAX_TX_MBUF_SIZE 65536 /* bytes - bd_len = 16bits */
#define QL_MAC_CMP(mac1, mac2) \
((((*(uint32_t *) mac1) == (*(uint32_t *) mac2) && \
@ -703,5 +731,17 @@ extern void qlnx_fill_link(struct ecore_hwfn *hwfn,
((flags) & (PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK \
<< PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT))
#if defined(__i386__) || defined(__amd64__)
static __inline
void prefetch(void *x)
{
__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
}
#else
#define prefetch(x)
#endif
#endif /* #ifndef _QLNX_DEF_H_ */

View File

@ -94,6 +94,8 @@ static int qlnx_get_ifq_snd_maxlen(qlnx_host_t *ha);
static uint32_t qlnx_get_optics(qlnx_host_t *ha,
struct qlnx_link_output *if_link);
static int qlnx_transmit(struct ifnet *ifp, struct mbuf *mp);
static int qlnx_transmit_locked(struct ifnet *ifp, struct qlnx_fastpath *fp,
struct mbuf *mp);
static void qlnx_qflush(struct ifnet *ifp);
static int qlnx_alloc_parent_dma_tag(qlnx_host_t *ha);
@ -133,6 +135,8 @@ static void qlnx_timer(void *arg);
static int qlnx_alloc_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp);
static void qlnx_free_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp);
static void qlnx_trigger_dump(qlnx_host_t *ha);
static uint16_t qlnx_num_tx_compl(qlnx_host_t *ha, struct qlnx_fastpath *fp,
struct qlnx_tx_queue *txq);
static void qlnx_tx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp,
struct qlnx_tx_queue *txq);
static int qlnx_rx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, int budget,
@ -215,6 +219,12 @@ char qlnx_name_str[NAME_SIZE];
#define QLOGIC_PCI_DEVICE_ID_8070 0x8070
#endif
SYSCTL_NODE(_hw, OID_AUTO, qlnxe, CTLFLAG_RD, 0, "qlnxe driver parameters");
/* Number of Queues: 0 (Auto) or 1 to 32 (fixed queue number) */
static int qlnxe_queue_count = QLNX_DEFAULT_RSS;
SYSCTL_INT(_hw_qlnxe, OID_AUTO, queue_count, CTLFLAG_RDTUN,
&qlnxe_queue_count, 0, "Multi-Queue queue count");
static int
qlnx_valid_device(device_t dev)
{
@ -302,6 +312,25 @@ qlnx_pci_probe(device_t dev)
return (BUS_PROBE_DEFAULT);
}
static uint16_t
qlnx_num_tx_compl(qlnx_host_t *ha, struct qlnx_fastpath *fp,
struct qlnx_tx_queue *txq)
{
u16 hw_bd_cons;
u16 ecore_cons_idx;
uint16_t diff;
hw_bd_cons = le16toh(*txq->hw_cons_ptr);
ecore_cons_idx = ecore_chain_get_cons_idx(&txq->tx_pbl);
if (hw_bd_cons < ecore_cons_idx) {
diff = (1 << 16) - (ecore_cons_idx - hw_bd_cons);
} else {
diff = hw_bd_cons - ecore_cons_idx;
}
return diff;
}
static void
qlnx_sp_intr(void *arg)
@ -395,14 +424,11 @@ qlnx_fp_taskqueue(void *context, int pending)
struct qlnx_fastpath *fp;
qlnx_host_t *ha;
struct ifnet *ifp;
struct mbuf *mp;
int ret = -1;
struct thread *cthread;
#ifdef QLNX_RCV_IN_TASKQ
int lro_enable;
int rx_int = 0, total_rx_count = 0;
struct thread *cthread;
#endif /* #ifdef QLNX_RCV_IN_TASKQ */
fp = context;
@ -410,6 +436,12 @@ qlnx_fp_taskqueue(void *context, int pending)
if (fp == NULL)
return;
ha = (qlnx_host_t *)fp->edev;
ifp = ha->ifp;
#ifdef QLNX_RCV_IN_TASKQ
cthread = curthread;
thread_lock(cthread);
@ -419,112 +451,81 @@ qlnx_fp_taskqueue(void *context, int pending)
thread_unlock(cthread);
ha = (qlnx_host_t *)fp->edev;
lro_enable = ifp->if_capenable & IFCAP_LRO;
ifp = ha->ifp;
rx_int = qlnx_rx_int(ha, fp, ha->rx_pkt_threshold, lro_enable);
#ifdef QLNX_RCV_IN_TASKQ
{
lro_enable = ifp->if_capenable & IFCAP_LRO;
rx_int = qlnx_rx_int(ha, fp, ha->rx_pkt_threshold, lro_enable);
if (rx_int) {
fp->rx_pkts += rx_int;
total_rx_count += rx_int;
}
if (rx_int) {
fp->rx_pkts += rx_int;
total_rx_count += rx_int;
}
#ifdef QLNX_SOFT_LRO
{
struct lro_ctrl *lro;
lro = &fp->rxq->lro;
{
struct lro_ctrl *lro;
if (lro_enable && total_rx_count) {
lro = &fp->rxq->lro;
if (lro_enable && total_rx_count) {
#if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO)
if (ha->dbg_trace_lro_cnt) {
if (lro->lro_mbuf_count & ~1023)
fp->lro_cnt_1024++;
else if (lro->lro_mbuf_count & ~511)
fp->lro_cnt_512++;
else if (lro->lro_mbuf_count & ~255)
fp->lro_cnt_256++;
else if (lro->lro_mbuf_count & ~127)
fp->lro_cnt_128++;
else if (lro->lro_mbuf_count & ~63)
fp->lro_cnt_64++;
}
tcp_lro_flush_all(lro);
if (ha->dbg_trace_lro_cnt) {
if (lro->lro_mbuf_count & ~1023)
fp->lro_cnt_1024++;
else if (lro->lro_mbuf_count & ~511)
fp->lro_cnt_512++;
else if (lro->lro_mbuf_count & ~255)
fp->lro_cnt_256++;
else if (lro->lro_mbuf_count & ~127)
fp->lro_cnt_128++;
else if (lro->lro_mbuf_count & ~63)
fp->lro_cnt_64++;
}
tcp_lro_flush_all(lro);
#else
struct lro_entry *queued;
struct lro_entry *queued;
while ((!SLIST_EMPTY(&lro->lro_active))) {
queued = SLIST_FIRST(&lro->lro_active);
SLIST_REMOVE_HEAD(&lro->lro_active, next);
tcp_lro_flush(lro, queued);
}
#endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */
while ((!SLIST_EMPTY(&lro->lro_active))) {
queued = SLIST_FIRST(&lro->lro_active);
SLIST_REMOVE_HEAD(&lro->lro_active, next);
tcp_lro_flush(lro, queued);
}
#endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */
}
}
#endif /* #ifdef QLNX_SOFT_LRO */
ecore_sb_update_sb_idx(fp->sb_info);
rmb();
}
ecore_sb_update_sb_idx(fp->sb_info);
rmb();
#endif /* #ifdef QLNX_RCV_IN_TASKQ */
mtx_lock(&fp->tx_mtx);
if(ifp->if_drv_flags & IFF_DRV_RUNNING) {
if (((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING) || (!ha->link_up)) {
if (!drbr_empty(ifp, fp->tx_br)) {
mtx_unlock(&fp->tx_mtx);
goto qlnx_fp_taskqueue_exit;
}
if(mtx_trylock(&fp->tx_mtx)) {
mp = drbr_peek(ifp, fp->tx_br);
#ifdef QLNX_TRACE_PERF_DATA
tx_pkts = fp->tx_pkts_transmitted;
tx_compl = fp->tx_pkts_completed;
#endif
while (mp != NULL) {
qlnx_transmit_locked(ifp, fp, NULL);
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
ret = qlnx_send(ha, fp, &mp);
} else {
ret = -1;
}
if (ret) {
if (mp != NULL) {
drbr_putback(ifp, fp->tx_br, mp);
} else {
fp->tx_pkts_processed++;
drbr_advance(ifp, fp->tx_br);
#ifdef QLNX_TRACE_PERF_DATA
fp->tx_pkts_trans_fp +=
(fp->tx_pkts_transmitted - tx_pkts);
fp->tx_pkts_compl_fp +=
(fp->tx_pkts_completed - tx_compl);
#endif
mtx_unlock(&fp->tx_mtx);
}
mtx_unlock(&fp->tx_mtx);
goto qlnx_fp_taskqueue_exit;
} else {
drbr_advance(ifp, fp->tx_br);
fp->tx_pkts_transmitted++;
fp->tx_pkts_processed++;
}
if (fp->tx_ring_full)
break;
mp = drbr_peek(ifp, fp->tx_br);
}
mtx_unlock(&fp->tx_mtx);
qlnx_fp_taskqueue_exit:
#ifdef QLNX_RCV_IN_TASKQ
if (rx_int) {
if (fp->fp_taskqueue != NULL)
@ -537,7 +538,7 @@ qlnx_fp_taskqueue(void *context, int pending)
}
#endif /* #ifdef QLNX_RCV_IN_TASKQ */
QL_DPRINT2(ha, "exit ret = %d\n", ret);
QL_DPRINT2(ha, "exit \n");
return;
}
@ -611,6 +612,17 @@ qlnx_drain_fp_taskqueues(qlnx_host_t *ha)
return;
}
static void
qlnx_get_params(qlnx_host_t *ha)
{
if ((qlnxe_queue_count < 0) || (qlnxe_queue_count > QLNX_MAX_RSS)) {
device_printf(ha->pci_dev, "invalid queue_count value (%d)\n",
qlnxe_queue_count);
qlnxe_queue_count = 0;
}
return;
}
/*
* Name: qlnx_pci_attach
* Function: attaches the device to the operating system
@ -706,10 +718,21 @@ qlnx_pci_attach(device_t dev)
if (qlnx_init_hw(ha) != 0)
goto qlnx_pci_attach_err;
qlnx_get_params(ha);
if((pci_get_device(dev) == QLOGIC_PCI_DEVICE_ID_1644) &&
(qlnxe_queue_count == QLNX_DEFAULT_RSS)) {
qlnxe_queue_count = QLNX_MAX_RSS;
}
/*
* Allocate MSI-x vectors
*/
ha->num_rss = QLNX_MAX_RSS;
if(qlnxe_queue_count == 0)
ha->num_rss = QLNX_DEFAULT_RSS;
else
ha->num_rss = qlnxe_queue_count;
ha->num_tc = QLNX_MAX_TC;
ha->msix_count = pci_msix_count(dev);
@ -1236,6 +1259,44 @@ qlnx_add_fp_stats_sysctls(qlnx_host_t *ha)
CTLFLAG_RD, &ha->fp_array[i].tx_pkts_completed,
"No. of transmit completions");
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "tx_non_tso_pkts",
CTLFLAG_RD, &ha->fp_array[i].tx_non_tso_pkts,
"No. of non LSO transmited packets");
#ifdef QLNX_TRACE_PERF_DATA
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "tx_pkts_trans_ctx",
CTLFLAG_RD, &ha->fp_array[i].tx_pkts_trans_ctx,
"No. of transmitted packets in transmit context");
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "tx_pkts_compl_ctx",
CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_ctx,
"No. of transmit completions in transmit context");
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "tx_pkts_trans_fp",
CTLFLAG_RD, &ha->fp_array[i].tx_pkts_trans_fp,
"No. of transmitted packets in taskqueue");
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "tx_pkts_compl_fp",
CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_fp,
"No. of transmit completions in taskqueue");
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "tx_pkts_compl_intr",
CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_intr,
"No. of transmit completions in interrupt ctx");
#endif
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "tx_tso_pkts",
CTLFLAG_RD, &ha->fp_array[i].tx_tso_pkts,
"No. of LSO transmited packets");
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "tx_lso_wnd_min_len",
CTLFLAG_RD, &ha->fp_array[i].tx_lso_wnd_min_len,
@ -1284,6 +1345,39 @@ qlnx_add_fp_stats_sysctls(qlnx_host_t *ha)
&ha->fp_array[i].tx_pkts[j], name_str);
}
#ifdef QLNX_TRACE_PERF_DATA
for (j = 0; j < 18; j++) {
bzero(name_str, (sizeof(uint8_t) * sizeof(name_str)));
snprintf(name_str, sizeof(name_str),
"tx_pkts_hist_%02d", (j+1));
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, name_str, CTLFLAG_RD,
&ha->fp_array[i].tx_pkts_hist[j], name_str);
}
for (j = 0; j < 5; j++) {
bzero(name_str, (sizeof(uint8_t) * sizeof(name_str)));
snprintf(name_str, sizeof(name_str),
"tx_comInt_%02d", (j+1));
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, name_str, CTLFLAG_RD,
&ha->fp_array[i].tx_comInt[j], name_str);
}
for (j = 0; j < 18; j++) {
bzero(name_str, (sizeof(uint8_t) * sizeof(name_str)));
snprintf(name_str, sizeof(name_str),
"tx_pkts_q_%02d", (j+1));
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, name_str, CTLFLAG_RD,
&ha->fp_array[i].tx_pkts_q[j], name_str);
}
#endif
SYSCTL_ADD_QUAD(ctx, node_children,
OID_AUTO, "err_tx_nsegs_gt_elem_left",
CTLFLAG_RD, &ha->fp_array[i].err_tx_nsegs_gt_elem_left,
@ -1979,6 +2073,12 @@ qlnx_init_ifnet(device_t dev, qlnx_host_t *ha)
ifp->if_capabilities |= IFCAP_TSO6;
ifp->if_capabilities |= IFCAP_LRO;
ifp->if_hw_tsomax = QLNX_MAX_TSO_FRAME_SIZE -
(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
ifp->if_hw_tsomaxsegcount = QLNX_MAX_SEGMENTS - 1 /* hdr */;
ifp->if_hw_tsomaxsegsize = QLNX_MAX_TX_MBUF_SIZE;
ifp->if_capenable = ifp->if_capabilities;
ifp->if_hwassist = CSUM_IP;
@ -2543,6 +2643,7 @@ qlnx_tx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp,
u16 hw_bd_cons;
u16 ecore_cons_idx;
uint16_t diff;
uint16_t idx, idx2;
hw_bd_cons = le16toh(*txq->hw_cons_ptr);
@ -2580,6 +2681,11 @@ qlnx_tx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp,
qlnx_trigger_dump(ha);
}
idx = (txq->sw_tx_cons + 1) & (TX_RING_SIZE - 1);
idx2 = (txq->sw_tx_cons + 2) & (TX_RING_SIZE - 1);
prefetch(txq->sw_tx_ring[idx].mp);
prefetch(txq->sw_tx_ring[idx2].mp);
qlnx_free_tx_pkt(ha, fp, txq);
txq->sw_tx_cons = (txq->sw_tx_cons + 1) & (TX_RING_SIZE - 1);
@ -2587,6 +2693,61 @@ qlnx_tx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp,
return;
}
static int
qlnx_transmit_locked(struct ifnet *ifp,struct qlnx_fastpath *fp, struct mbuf *mp)
{
int ret = 0;
struct qlnx_tx_queue *txq;
qlnx_host_t * ha;
uint16_t elem_left;
txq = fp->txq[0];
ha = (qlnx_host_t *)fp->edev;
if ((!(ifp->if_drv_flags & IFF_DRV_RUNNING)) || (!ha->link_up)) {
if(mp != NULL)
ret = drbr_enqueue(ifp, fp->tx_br, mp);
return (ret);
}
if(mp != NULL)
ret = drbr_enqueue(ifp, fp->tx_br, mp);
mp = drbr_peek(ifp, fp->tx_br);
while (mp != NULL) {
if (qlnx_send(ha, fp, &mp)) {
if (mp != NULL) {
drbr_putback(ifp, fp->tx_br, mp);
} else {
fp->tx_pkts_processed++;
drbr_advance(ifp, fp->tx_br);
}
goto qlnx_transmit_locked_exit;
} else {
drbr_advance(ifp, fp->tx_br);
fp->tx_pkts_transmitted++;
fp->tx_pkts_processed++;
}
mp = drbr_peek(ifp, fp->tx_br);
}
qlnx_transmit_locked_exit:
if((qlnx_num_tx_compl(ha,fp, fp->txq[0]) > QLNX_TX_COMPL_THRESH) ||
((int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl))
< QLNX_TX_ELEM_MAX_THRESH))
(void)qlnx_tx_int(ha, fp, fp->txq[0]);
QL_DPRINT2(ha, "%s: exit ret = %d\n", __func__, ret);
return ret;
}
static int
qlnx_transmit(struct ifnet *ifp, struct mbuf *mp)
{
@ -2594,6 +2755,10 @@ qlnx_transmit(struct ifnet *ifp, struct mbuf *mp)
struct qlnx_fastpath *fp;
int rss_id = 0, ret = 0;
#ifdef QLNX_TRACEPERF_DATA
uint64_t tx_pkts = 0, tx_compl = 0;
#endif
QL_DPRINT2(ha, "enter\n");
#if __FreeBSD_version >= 1100000
@ -2611,15 +2776,27 @@ qlnx_transmit(struct ifnet *ifp, struct mbuf *mp)
goto qlnx_transmit_exit;
}
if (mp != NULL) {
ret = drbr_enqueue(ifp, fp->tx_br, mp);
if (mtx_trylock(&fp->tx_mtx)) {
#ifdef QLNX_TRACEPERF_DATA
tx_pkts = fp->tx_pkts_transmitted;
tx_compl = fp->tx_pkts_completed;
#endif
ret = qlnx_transmit_locked(ifp, fp, mp);
#ifdef QLNX_TRACEPERF_DATA
fp->tx_pkts_trans_ctx += (fp->tx_pkts_transmitted - tx_pkts);
fp->tx_pkts_compl_ctx += (fp->tx_pkts_completed - tx_compl);
#endif
mtx_unlock(&fp->tx_mtx);
} else {
if (mp != NULL && (fp->fp_taskqueue != NULL)) {
ret = drbr_enqueue(ifp, fp->tx_br, mp);
taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task);
}
}
if (fp->fp_taskqueue != NULL)
taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task);
ret = 0;
qlnx_transmit_exit:
QL_DPRINT2(ha, "exit ret = %d\n", ret);
@ -2799,6 +2976,10 @@ qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp)
uint32_t nbds_in_hdr = 0;
uint32_t offset = 0;
#ifdef QLNX_TRACE_PERF_DATA
uint16_t bd_used;
#endif
QL_DPRINT8(ha, "enter\n");
if (!ha->link_up)
@ -2811,14 +2992,14 @@ qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp)
txq = fp->txq[0];
if (fp->tx_ring_full) {
elem_left = ecore_chain_get_elem_left(&txq->tx_pbl);
if ((int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl)) <
QLNX_TX_ELEM_MIN_THRESH) {
if (elem_left < (TX_RING_SIZE >> 4))
return (-1);
else
fp->tx_ring_full = 0;
}
fp->tx_nsegs_gt_elem_left++;
fp->err_tx_nsegs_gt_elem_left++;
return (ENOBUFS);
}
idx = txq->sw_tx_prod;
@ -2829,14 +3010,18 @@ qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp)
BUS_DMA_NOWAIT);
if (ha->dbg_trace_tso_pkt_len) {
if (!fp->tx_tso_min_pkt_len) {
fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len;
fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len;
} else {
if (fp->tx_tso_min_pkt_len > m_head->m_pkthdr.len)
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
if (!fp->tx_tso_min_pkt_len) {
fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len;
if (fp->tx_tso_max_pkt_len < m_head->m_pkthdr.len)
fp->tx_tso_max_pkt_len = m_head->m_pkthdr.len;
fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len;
} else {
if (fp->tx_tso_min_pkt_len > m_head->m_pkthdr.len)
fp->tx_tso_min_pkt_len =
m_head->m_pkthdr.len;
if (fp->tx_tso_max_pkt_len < m_head->m_pkthdr.len)
fp->tx_tso_max_pkt_len =
m_head->m_pkthdr.len;
}
}
}
@ -2923,6 +3108,105 @@ qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp)
fp->tx_pkts[(QLNX_FP_MAX_SEGS - 1)]++;
}
#ifdef QLNX_TRACE_PERF_DATA
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
if(m_head->m_pkthdr.len <= 2048)
fp->tx_pkts_hist[0]++;
else if((m_head->m_pkthdr.len > 2048) &&
(m_head->m_pkthdr.len <= 4096))
fp->tx_pkts_hist[1]++;
else if((m_head->m_pkthdr.len > 4096) &&
(m_head->m_pkthdr.len <= 8192))
fp->tx_pkts_hist[2]++;
else if((m_head->m_pkthdr.len > 8192) &&
(m_head->m_pkthdr.len <= 12288 ))
fp->tx_pkts_hist[3]++;
else if((m_head->m_pkthdr.len > 11288) &&
(m_head->m_pkthdr.len <= 16394))
fp->tx_pkts_hist[4]++;
else if((m_head->m_pkthdr.len > 16384) &&
(m_head->m_pkthdr.len <= 20480))
fp->tx_pkts_hist[5]++;
else if((m_head->m_pkthdr.len > 20480) &&
(m_head->m_pkthdr.len <= 24576))
fp->tx_pkts_hist[6]++;
else if((m_head->m_pkthdr.len > 24576) &&
(m_head->m_pkthdr.len <= 28672))
fp->tx_pkts_hist[7]++;
else if((m_head->m_pkthdr.len > 28762) &&
(m_head->m_pkthdr.len <= 32768))
fp->tx_pkts_hist[8]++;
else if((m_head->m_pkthdr.len > 32768) &&
(m_head->m_pkthdr.len <= 36864))
fp->tx_pkts_hist[9]++;
else if((m_head->m_pkthdr.len > 36864) &&
(m_head->m_pkthdr.len <= 40960))
fp->tx_pkts_hist[10]++;
else if((m_head->m_pkthdr.len > 40960) &&
(m_head->m_pkthdr.len <= 45056))
fp->tx_pkts_hist[11]++;
else if((m_head->m_pkthdr.len > 45056) &&
(m_head->m_pkthdr.len <= 49152))
fp->tx_pkts_hist[12]++;
else if((m_head->m_pkthdr.len > 49512) &&
m_head->m_pkthdr.len <= 53248))
fp->tx_pkts_hist[13]++;
else if((m_head->m_pkthdr.len > 53248) &&
(m_head->m_pkthdr.len <= 57344))
fp->tx_pkts_hist[14]++;
else if((m_head->m_pkthdr.len > 53248) &&
(m_head->m_pkthdr.len <= 57344))
fp->tx_pkts_hist[15]++;
else if((m_head->m_pkthdr.len > 57344) &&
(m_head->m_pkthdr.len <= 61440))
fp->tx_pkts_hist[16]++;
else
fp->tx_pkts_hist[17]++;
}
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
elem_left = ecore_chain_get_elem_left(&txq->tx_pbl);
bd_used = TX_RING_SIZE - elem_left;
if(bd_used <= 100)
fp->tx_pkts_q[0]++;
else if((bd_used > 100) && (bd_used <= 500))
fp->tx_pkts_q[1]++;
else if((bd_used > 500) && (bd_used <= 1000))
fp->tx_pkts_q[2]++;
else if((bd_used > 1000) && (bd_used <= 2000))
fp->tx_pkts_q[3]++;
else if((bd_used > 3000) && (bd_used <= 4000))
fp->tx_pkts_q[4]++;
else if((bd_used > 4000) && (bd_used <= 5000))
fp->tx_pkts_q[5]++;
else if((bd_used > 6000) && (bd_used <= 7000))
fp->tx_pkts_q[6]++;
else if((bd_used > 7000) && (bd_used <= 8000))
fp->tx_pkts_q[7]++;
else if((bd_used > 8000) && (bd_used <= 9000))
fp->tx_pkts_q[8]++;
else if((bd_used > 9000) && (bd_used <= 10000))
fp->tx_pkts_q[9]++;
else if((bd_used > 10000) && (bd_used <= 11000))
fp->tx_pkts_q[10]++;
else if((bd_used > 11000) && (bd_used <= 12000))
fp->tx_pkts_q[11]++;
else if((bd_used > 12000) && (bd_used <= 13000))
fp->tx_pkts_q[12]++;
else if((bd_used > 13000) && (bd_used <= 14000))
fp->tx_pkts_q[13]++;
else if((bd_used > 14000) && (bd_used <= 15000))
fp->tx_pkts_q[14]++;
else if((bd_used > 15000) && (bd_used <= 16000))
fp->tx_pkts_q[15]++;
else
fp->tx_pkts_q[16]++;
}
#endif /* end of QLNX_TRACE_PERF_DATA */
if ((nsegs + QLNX_TX_ELEM_RESERVE) >
(int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl))) {
@ -2943,7 +3227,8 @@ qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp)
fp->err_tx_nsegs_gt_elem_left++;
fp->tx_ring_full = 1;
ha->storm_stats_enable = 1;
if (ha->storm_stats_enable)
ha->storm_stats_gather = 1;
return (ENOBUFS);
}
}
@ -3131,6 +3416,7 @@ qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp)
third_bd->data.bitfields |=
(nbds_in_hdr<<ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT);
}
fp->tx_tso_pkts++;
} else {
segs++;
for (seg_idx = 1; seg_idx < nsegs; seg_idx++) {
@ -3147,6 +3433,7 @@ qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp)
<< ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
first_bd->data.bitfields =
htole16(first_bd->data.bitfields);
fp->tx_non_tso_pkts++;
}
@ -4303,8 +4590,10 @@ qlnx_fp_isr(void *arg)
if (fp->fp_taskqueue != NULL)
taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task);
#else
int rx_int = 0, total_rx_count = 0;
int lro_enable, tc;
int rx_int = 0, total_rx_count = 0;
int lro_enable, tc;
struct qlnx_tx_queue *txq;
uint16_t elem_left;
lro_enable = ha->ifp->if_capenable & IFCAP_LRO;
@ -4312,10 +4601,36 @@ qlnx_fp_isr(void *arg)
do {
for (tc = 0; tc < ha->num_tc; tc++) {
if (mtx_trylock(&fp->tx_mtx)) {
qlnx_tx_int(ha, fp, fp->txq[tc]);
mtx_unlock(&fp->tx_mtx);
}
txq = fp->txq[tc];
if((int)(elem_left =
ecore_chain_get_elem_left(&txq->tx_pbl)) <
QLNX_TX_ELEM_THRESH) {
if (mtx_trylock(&fp->tx_mtx)) {
#ifdef QLNX_TRACE_PERF_DATA
tx_compl = fp->tx_pkts_completed;
#endif
qlnx_tx_int(ha, fp, fp->txq[tc]);
#ifdef QLNX_TRACE_PERF_DATA
fp->tx_pkts_compl_intr +=
(fp->tx_pkts_completed - tx_compl);
if ((fp->tx_pkts_completed - tx_compl) <= 32)
fp->tx_comInt[0]++;
else if (((fp->tx_pkts_completed - tx_compl) > 32) &&
((fp->tx_pkts_completed - tx_compl) <= 64))
fp->tx_comInt[1]++;
else if(((fp->tx_pkts_completed - tx_compl) > 64) &&
((fp->tx_pkts_completed - tx_compl) <= 128))
fp->tx_comInt[2]++;
else if(((fp->tx_pkts_completed - tx_compl) > 128))
fp->tx_comInt[3]++;
#endif
mtx_unlock(&fp->tx_mtx);
}
}
}
rx_int = qlnx_rx_int(ha, fp, ha->rx_pkt_threshold,
@ -4328,7 +4643,6 @@ qlnx_fp_isr(void *arg)
} while (rx_int);
#ifdef QLNX_SOFT_LRO
{
struct lro_ctrl *lro;
@ -4608,8 +4922,8 @@ qlnx_alloc_tx_dma_tag(qlnx_host_t *ha)
NULL, NULL, /* filter, filterarg */
QLNX_MAX_TSO_FRAME_SIZE, /* maxsize */
QLNX_MAX_SEGMENTS, /* nsegments */
(PAGE_SIZE * 4), /* maxsegsize */
BUS_DMA_ALLOCNOW, /* flags */
QLNX_MAX_TX_MBUF_SIZE, /* maxsegsize */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&ha->tx_tag)) {
@ -4642,7 +4956,7 @@ qlnx_alloc_rx_dma_tag(qlnx_host_t *ha)
MJUM9BYTES, /* maxsize */
1, /* nsegments */
MJUM9BYTES, /* maxsegsize */
BUS_DMA_ALLOCNOW, /* flags */
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
&ha->rx_tag)) {
@ -5255,6 +5569,14 @@ qlnx_init_fp(qlnx_host_t *ha)
fp->tx_pkts_freed = 0;
fp->tx_pkts_transmitted = 0;
fp->tx_pkts_completed = 0;
#ifdef QLNX_TRACE_PERF_DATA
fp->tx_pkts_trans_ctx = 0;
fp->tx_pkts_compl_ctx = 0;
fp->tx_pkts_trans_fp = 0;
fp->tx_pkts_compl_fp = 0;
fp->tx_pkts_compl_intr = 0;
#endif
fp->tx_lso_wnd_min_len = 0;
fp->tx_defrag = 0;
fp->tx_nsegs_gt_elem_left = 0;
@ -6606,7 +6928,7 @@ qlnx_timer(void *arg)
ecore_get_vport_stats(&ha->cdev, &ha->hw_stats);
if (ha->storm_stats_enable)
if (ha->storm_stats_gather)
qlnx_sample_storm_stats(ha);
callout_reset(&ha->qlnx_callout, hz, qlnx_timer, ha);
@ -6855,7 +7177,7 @@ qlnx_sample_storm_stats(qlnx_host_t *ha)
struct ecore_hwfn *hwfn;
if (ha->storm_stats_index >= QLNX_STORM_STATS_SAMPLES_PER_HWFN) {
ha->storm_stats_enable = 0;
ha->storm_stats_gather = 0;
return;
}

View File

@ -39,5 +39,5 @@
#define QLNX_VERSION_MAJOR 1
#define QLNX_VERSION_MINOR 4
#define QLNX_VERSION_BUILD 6
#define QLNX_VERSION_BUILD 7

View File

@ -52,7 +52,7 @@ SRCS+= pci_if.h
.include <bsd.kmod.mk>
CFLAGS += -DQLNX_DEBUG
#CFLAGS += -DQLNX_DEBUG
CFLAGS += -DECORE_PACKAGE
CFLAGS += -DCONFIG_ECORE_L2
CFLAGS += -DECORE_CONFIG_DIRECT_HWFN