From edd04c619685bdb47af787ffd48565111f3a4ab9 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Sat, 27 May 2017 09:16:25 +0530 Subject: [PATCH] net/cxgbe: update Rx path for Chelsio T6 Update RX path to reflect Chelsio T6 register value changes. Update ingress pack boundary value based on maximum payload size that can be accommodated by underlying PCI. Update ingress pad boundary value based on smallest memory controller bus width possible. Enforce alignment for free list pointer start address. Signed-off-by: Rahul Lakkireddy Signed-off-by: Kumar Sanghvi --- drivers/net/cxgbe/base/adapter.h | 4 +- drivers/net/cxgbe/base/common.h | 1 + drivers/net/cxgbe/base/t4_hw.c | 104 ++++++++++++++++++++++-- drivers/net/cxgbe/base/t4_regs_values.h | 7 +- drivers/net/cxgbe/sge.c | 48 +++-------- 5 files changed, 117 insertions(+), 47 deletions(-) diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h index 26807900d7..cc89e49228 100644 --- a/drivers/net/cxgbe/base/adapter.h +++ b/drivers/net/cxgbe/base/adapter.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2014-2016 Chelsio Communications. + * Copyright(c) 2014-2017 Chelsio Communications. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -459,7 +459,9 @@ static inline void t4_write_reg64(struct adapter *adapter, u32 reg_addr, #define PCI_CAP_ID_EXP 0x10 /* PCI Express */ #define PCI_CAP_LIST_ID 0 /* Capability ID */ #define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ +#define PCI_EXP_DEVCTL 0x0008 /* Device control */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ +#define PCI_EXP_DEVCTL_PAYLOAD 0x00E0 /* Max payload */ #define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ #define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ #define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ diff --git a/drivers/net/cxgbe/base/common.h b/drivers/net/cxgbe/base/common.h index 938362832b..5765bf3aa3 100644 --- a/drivers/net/cxgbe/base/common.h +++ b/drivers/net/cxgbe/base/common.h @@ -282,6 +282,7 @@ int t4_fw_bye(struct adapter *adap, unsigned int mbox); int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset); int t4_fw_halt(struct adapter *adap, unsigned int mbox, int reset); int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset); +int t4_fl_pkt_align(struct adapter *adap); int t4_fixup_host_params_compat(struct adapter *adap, unsigned int page_size, unsigned int cache_line_size, enum chip_type chip_compat); diff --git a/drivers/net/cxgbe/base/t4_hw.c b/drivers/net/cxgbe/base/t4_hw.c index 4e1545ab35..96d4bfd0b6 100644 --- a/drivers/net/cxgbe/base/t4_hw.c +++ b/drivers/net/cxgbe/base/t4_hw.c @@ -3357,6 +3357,49 @@ int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset) return 0; } +/** + * t4_fl_pkt_align - return the fl packet alignment + * @adap: the adapter + * + * T4 has a single field to specify the packing and padding boundary. + * T5 onwards has separate fields for this and hence the alignment for + * next packet offset is maximum of these two. + */ +int t4_fl_pkt_align(struct adapter *adap) +{ + u32 sge_control, sge_control2; + unsigned int ingpadboundary, ingpackboundary, fl_align, ingpad_shift; + + sge_control = t4_read_reg(adap, A_SGE_CONTROL); + + /* T4 uses a single control field to specify both the PCIe Padding and + * Packing Boundary. T5 introduced the ability to specify these + * separately. The actual Ingress Packet Data alignment boundary + * within Packed Buffer Mode is the maximum of these two + * specifications. + */ + if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + ingpad_shift = X_INGPADBOUNDARY_SHIFT; + else + ingpad_shift = X_T6_INGPADBOUNDARY_SHIFT; + + ingpadboundary = 1 << (G_INGPADBOUNDARY(sge_control) + ingpad_shift); + + fl_align = ingpadboundary; + if (!is_t4(adap->params.chip)) { + sge_control2 = t4_read_reg(adap, A_SGE_CONTROL2); + ingpackboundary = G_INGPACKBOUNDARY(sge_control2); + if (ingpackboundary == X_INGPACKBOUNDARY_16B) + ingpackboundary = 16; + else + ingpackboundary = 1 << (ingpackboundary + + X_INGPACKBOUNDARY_SHIFT); + + fl_align = max(ingpadboundary, ingpackboundary); + } + return fl_align; +} + /** * t4_fixup_host_params_compat - fix up host-dependent parameters * @adap: the adapter @@ -3402,6 +3445,10 @@ int t4_fixup_host_params_compat(struct adapter *adap, X_INGPADBOUNDARY_SHIFT) | V_EGRSTATUSPAGESIZE(stat_len != 64)); else { + unsigned int pack_align; + unsigned int ingpad, ingpack; + unsigned int pcie_cap; + /* * T5 introduced the separation of the Free List Padding and * Packing Boundaries. Thus, we can select a smaller Padding @@ -3415,12 +3462,34 @@ int t4_fixup_host_params_compat(struct adapter *adap, * Size (the minimum unit of transfer to/from Memory). If we * have a Padding Boundary which is smaller than the Memory * Line Size, that'll involve a Read-Modify-Write cycle on the - * Memory Controller which is never good. For T5 the smallest - * Padding Boundary which we can select is 32 bytes which is - * larger than any known Memory Controller Line Size so we'll - * use that. + * Memory Controller which is never good. */ + /* We want the Packing Boundary to be based on the Cache Line + * Size in order to help avoid False Sharing performance + * issues between CPUs, etc. We also want the Packing + * Boundary to incorporate the PCI-E Maximum Payload Size. We + * get best performance when the Packing Boundary is a + * multiple of the Maximum Payload Size. + */ + pack_align = fl_align; + pcie_cap = t4_os_find_pci_capability(adap, PCI_CAP_ID_EXP); + if (pcie_cap) { + unsigned int mps, mps_log; + u16 devctl; + + /* The PCIe Device Control Maximum Payload Size field + * [bits 7:5] encodes sizes as powers of 2 starting at + * 128 bytes. + */ + t4_os_pci_read_cfg2(adap, pcie_cap + PCI_EXP_DEVCTL, + &devctl); + mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7; + mps = 1 << mps_log; + if (mps > pack_align) + pack_align = mps; + } + /* * N.B. T5 has a different interpretation of the "0" value for * the Packing Boundary. This corresponds to 16 bytes instead @@ -3429,19 +3498,36 @@ int t4_fixup_host_params_compat(struct adapter *adap, * on the other hand, if we wanted 32 bytes, the best we can * really do is 64 bytes ... */ - if (fl_align <= 32) { + if (pack_align <= 16) { + ingpack = X_INGPACKBOUNDARY_16B; + fl_align = 16; + } else if (pack_align == 32) { + ingpack = X_INGPACKBOUNDARY_64B; fl_align = 64; - fl_align_log = 6; + } else { + unsigned int pack_align_log = cxgbe_fls(pack_align) - 1; + + ingpack = pack_align_log - X_INGPACKBOUNDARY_SHIFT; + fl_align = pack_align; } + + /* Use the smallest Ingress Padding which isn't smaller than + * the Memory Controller Read/Write Size. We'll take that as + * being 8 bytes since we don't know of any system with a + * wider Memory Controller Bus Width. + */ + if (is_t5(adap->params.chip)) + ingpad = X_INGPADBOUNDARY_32B; + else + ingpad = X_T6_INGPADBOUNDARY_8B; t4_set_reg_field(adap, A_SGE_CONTROL, V_INGPADBOUNDARY(M_INGPADBOUNDARY) | F_EGRSTATUSPAGESIZE, - V_INGPADBOUNDARY(X_INGPCIEBOUNDARY_32B) | + V_INGPADBOUNDARY(ingpad) | V_EGRSTATUSPAGESIZE(stat_len != 64)); t4_set_reg_field(adap, A_SGE_CONTROL2, V_INGPACKBOUNDARY(M_INGPACKBOUNDARY), - V_INGPACKBOUNDARY(fl_align_log - - X_INGPACKBOUNDARY_SHIFT)); + V_INGPACKBOUNDARY(ingpack)); } /* diff --git a/drivers/net/cxgbe/base/t4_regs_values.h b/drivers/net/cxgbe/base/t4_regs_values.h index d7d3144c46..1326594767 100644 --- a/drivers/net/cxgbe/base/t4_regs_values.h +++ b/drivers/net/cxgbe/base/t4_regs_values.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2014-2015 Chelsio Communications. + * Copyright(c) 2014-2017 Chelsio Communications. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,10 +55,15 @@ #define X_RXPKTCPLMODE_SPLIT 1 #define X_INGPCIEBOUNDARY_32B 0 #define X_INGPADBOUNDARY_SHIFT 5 +#define X_INGPADBOUNDARY_32B 0 + +#define X_T6_INGPADBOUNDARY_SHIFT 3 +#define X_T6_INGPADBOUNDARY_8B 0 /* CONTROL2 register */ #define X_INGPACKBOUNDARY_SHIFT 5 #define X_INGPACKBOUNDARY_16B 0 +#define X_INGPACKBOUNDARY_64B 1 /* GTS register */ #define X_TIMERREG_RESTART_COUNTER 6 diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c index 699b577715..b16a0bf5f5 100644 --- a/drivers/net/cxgbe/sge.c +++ b/drivers/net/cxgbe/sge.c @@ -420,7 +420,9 @@ static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q, mbuf->nb_segs = 1; mbuf->port = rxq->rspq.port_id; - mapping = (dma_addr_t)(mbuf->buf_physaddr + mbuf->data_off); + mapping = (dma_addr_t)RTE_ALIGN(mbuf->buf_physaddr + + mbuf->data_off, + adap->sge.fl_align); mapping |= buf_size_idx; *d++ = cpu_to_be64(mapping); set_rx_sw_desc(sd, mbuf, mapping); @@ -1684,8 +1686,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, if (fl) { struct sge_eth_rxq *rxq = container_of(fl, struct sge_eth_rxq, fl); - enum chip_type chip = (enum chip_type)CHELSIO_CHIP_VERSION( - adap->params.chip); + unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip); /* * Allocate the ring for the hardware free list (with space @@ -1731,9 +1732,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, * Hence maximum allowed burst size will be 448 bytes. */ c.fl0dcaen_to_fl0cidxfthresh = - htons(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_128B) | - V_FW_IQ_CMD_FL0FBMAX((chip <= CHELSIO_T5) ? - X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); + htons(V_FW_IQ_CMD_FL0FBMIN(chip_ver <= CHELSIO_T5 ? + X_FETCHBURSTMIN_128B : + X_FETCHBURSTMIN_64B) | + V_FW_IQ_CMD_FL0FBMAX(chip_ver <= CHELSIO_T5 ? + X_FETCHBURSTMAX_512B : + X_FETCHBURSTMAX_256B)); c.fl0size = htons(flsz); c.fl0addr = cpu_to_be64(fl->addr); } @@ -2189,8 +2193,7 @@ static int t4_sge_init_soft(struct adapter *adap) int t4_sge_init(struct adapter *adap) { struct sge *s = &adap->sge; - u32 sge_control, sge_control2, sge_conm_ctrl; - unsigned int ingpadboundary, ingpackboundary; + u32 sge_control, sge_conm_ctrl; int ret, egress_threshold; /* @@ -2200,34 +2203,7 @@ int t4_sge_init(struct adapter *adap) sge_control = t4_read_reg(adap, A_SGE_CONTROL); s->pktshift = G_PKTSHIFT(sge_control); s->stat_len = (sge_control & F_EGRSTATUSPAGESIZE) ? 128 : 64; - - /* - * T4 uses a single control field to specify both the PCIe Padding and - * Packing Boundary. T5 introduced the ability to specify these - * separately. The actual Ingress Packet Data alignment boundary - * within Packed Buffer Mode is the maximum of these two - * specifications. - */ - ingpadboundary = 1 << (G_INGPADBOUNDARY(sge_control) + - X_INGPADBOUNDARY_SHIFT); - s->fl_align = ingpadboundary; - - if (!is_t4(adap->params.chip) && !adap->use_unpacked_mode) { - /* - * T5 has a weird interpretation of one of the PCIe Packing - * Boundary values. No idea why ... - */ - sge_control2 = t4_read_reg(adap, A_SGE_CONTROL2); - ingpackboundary = G_INGPACKBOUNDARY(sge_control2); - if (ingpackboundary == X_INGPACKBOUNDARY_16B) - ingpackboundary = 16; - else - ingpackboundary = 1 << (ingpackboundary + - X_INGPACKBOUNDARY_SHIFT); - - s->fl_align = max(ingpadboundary, ingpackboundary); - } - + s->fl_align = t4_fl_pkt_align(adap); ret = t4_sge_init_soft(adap); if (ret < 0) { dev_err(adap, "%s: t4_sge_init_soft failed, error %d\n",