diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 8cebdaa7d8ae..17b3d593d707 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -148,7 +148,7 @@ enum { #else SW_ZONE_SIZES = 3, /* cluster, jumbo9k, jumbo16k */ #endif - CL_METADATA_SIZE = CACHE_LINE_SIZE, + CL_METADATA_SIZE = 256, /* same as MSIZE for now */ SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */ TX_SGL_SEGS = 36, @@ -695,6 +695,7 @@ struct sge { struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ + int pad_boundary; int pack_boundary; int8_t safe_hwidx1; /* may not have room for metadata */ int8_t safe_hwidx2; /* with room for metadata and maybe more */ diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 58940fe4959c..bfdb1eaf9b92 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -120,19 +120,10 @@ TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); /* * Start next frame in a packed buffer at this boundary. * -1: driver should figure out a good value. - * T4: - * --- - * if fl_pad != 0 - * value specified here will be overridden by fl_pad. - * else - * power of 2 from 32 to 4096 (both inclusive) is a valid value here. - * T5: - * --- - * 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. + * T4: driver will ignore this and use the same value as fl_pad above. + * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. */ static int fl_pack = -1; -static int t4_fl_pack; -static int t5_fl_pack; TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); /* @@ -175,8 +166,7 @@ static int service_iq(struct sge_iq *, int); static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); -static inline void init_fl(struct adapter *, struct sge_fl *, int, int, int, - char *); +static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t, char *); static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, @@ -264,15 +254,6 @@ static counter_u64_t extfree_rels; void t4_sge_modload(void) { - int pad; - - /* set pad to a reasonable powerof2 between 16 and 4096 (inclusive) */ -#if defined(__i386__) || defined(__amd64__) - pad = max(cpu_clflush_line_size, 16); -#else - pad = max(CACHE_LINE_SIZE, 16); -#endif - pad = min(pad, 4096); if (fl_pktshift < 0 || fl_pktshift > 7) { printf("Invalid hw.cxgbe.fl_pktshift value (%d)," @@ -280,35 +261,6 @@ t4_sge_modload(void) fl_pktshift = 2; } - if (fl_pad != 0 && - (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad))) { - - if (fl_pad != -1) { - printf("Invalid hw.cxgbe.fl_pad value (%d)," - " using %d instead.\n", fl_pad, max(pad, 32)); - } - fl_pad = max(pad, 32); - } - - /* - * T4 has the same pad and pack boundary. If a pad boundary is set, - * pack boundary must be set to the same value. Otherwise take the - * specified value or auto-calculate something reasonable. - */ - if (fl_pad) - t4_fl_pack = fl_pad; - else if (fl_pack < 32 || fl_pack > 4096 || !powerof2(fl_pack)) - t4_fl_pack = max(pad, 32); - else - t4_fl_pack = fl_pack; - - /* T5's pack boundary is independent of the pad boundary. */ - if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || - !powerof2(fl_pack)) - t5_fl_pack = max(pad, CACHE_LINE_SIZE); - else - t5_fl_pack = fl_pack; - if (spg_len != 64 && spg_len != 128) { int len; @@ -366,6 +318,71 @@ t4_init_sge_cpl_handlers(struct adapter *sc) t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); } +static inline void +setup_pad_and_pack_boundaries(struct adapter *sc) +{ + uint32_t v, m; + int pad, pack; + + pad = fl_pad; + if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) { + /* + * If there is any chance that we might use buffer packing and + * the chip is a T4, then pick 64 as the pad/pack boundary. Set + * it to 32 in all other cases. + */ + pad = is_t4(sc) && buffer_packing ? 64 : 32; + + /* + * For fl_pad = 0 we'll still write a reasonable value to the + * register but all the freelists will opt out of padding. + * We'll complain here only if the user tried to set it to a + * value greater than 0 that was invalid. + */ + if (fl_pad > 0) { + device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" + " (%d), using %d instead.\n", fl_pad, pad); + } + } + m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); + v = V_INGPADBOUNDARY(ilog2(pad) - 5); + t4_set_reg_field(sc, A_SGE_CONTROL, m, v); + + if (is_t4(sc)) { + if (fl_pack != -1 && fl_pack != pad) { + /* Complain but carry on. */ + device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," + " using %d instead.\n", fl_pack, pad); + } + return; + } + + pack = fl_pack; + if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || + !powerof2(fl_pack)) { + pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); + MPASS(powerof2(pack)); + if (pack < 16) + pack = 16; + if (pack == 32) + pack = 64; + if (pack > 4096) + pack = 4096; + if (fl_pack != -1) { + device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" + " (%d), using %d instead.\n", fl_pack, pack); + } + } + m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); + if (pack == 16) + v = V_INGPACKBOUNDARY(0); + else + v = V_INGPACKBOUNDARY(ilog2(pack) - 5); + + MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ + t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); +} + /* * adap->params.vpd.cclk must be set up before this is called. */ @@ -398,24 +415,9 @@ t4_tweak_chip_settings(struct adapter *sc) m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | V_EGRSTATUSPAGESIZE(spg_len == 128); - if (is_t4(sc) && (fl_pad || buffer_packing)) { - /* t4_fl_pack has the correct value even when fl_pad = 0 */ - m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); - v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5); - } else if (is_t5(sc) && fl_pad) { - m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); - v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5); - } t4_set_reg_field(sc, A_SGE_CONTROL, m, v); - if (is_t5(sc) && buffer_packing) { - m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); - if (t5_fl_pack == 16) - v = V_INGPACKBOUNDARY(0); - else - v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5); - t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); - } + setup_pad_and_pack_boundaries(sc); v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | @@ -486,15 +488,25 @@ t4_tweak_chip_settings(struct adapter *sc) } /* - * SGE wants the buffer to be at least 64B and then a multiple of the pad - * boundary or 16, whichever is greater. + * SGE wants the buffer to be at least 64B and then a multiple of 16. If + * padding and packing are enabled, the buffer's start and end need to be + * correctly aligned as well. We'll just make sure that the size is a multiple + * of the alignment, it is up to other parts . */ static inline int -hwsz_ok(int hwsz) +hwsz_ok(struct adapter *sc, int hwsz) { - int mask = max(fl_pad, 16) - 1; + int align = 16; + + if (fl_pad) { + MPASS(sc->sge.pad_boundary > align); + align = sc->sge.pad_boundary; + } + if (buffer_packing && sc->sge.pack_boundary > align) + align = sc->sge.pack_boundary; + align--; /* now a mask */ + return (hwsz >= 64 && (hwsz & align) == 0); - return (hwsz >= 64 && (hwsz & mask) == 0); } /* @@ -521,33 +533,22 @@ t4_read_chip_settings(struct adapter *sc) m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | V_EGRSTATUSPAGESIZE(spg_len == 128); - if (is_t4(sc) && (fl_pad || buffer_packing)) { - m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); - v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5); - } else if (is_t5(sc) && fl_pad) { - m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); - v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5); - } r = t4_read_reg(sc, A_SGE_CONTROL); if ((r & m) != v) { device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); rc = EINVAL; } + s->pad_boundary = 1 << (G_INGPADBOUNDARY(r) + 5); - if (is_t5(sc) && buffer_packing) { - m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); - if (t5_fl_pack == 16) - v = V_INGPACKBOUNDARY(0); - else - v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5); + if (is_t4(sc)) + s->pack_boundary = s->pad_boundary; + else { r = t4_read_reg(sc, A_SGE_CONTROL2); - if ((r & m) != v) { - device_printf(sc->dev, - "invalid SGE_CONTROL2(0x%x)\n", r); - rc = EINVAL; - } + if (G_INGPACKBOUNDARY(r) == 0) + s->pack_boundary = 16; + else + s->pack_boundary = 1 << (G_INGPACKBOUNDARY(r) + 5); } - s->pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack; v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | @@ -568,13 +569,22 @@ t4_read_chip_settings(struct adapter *sc) for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i)); hwb->size = r; - hwb->zidx = hwsz_ok(r) ? -1 : -2; + hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; hwb->next = -1; } /* * Create a sorted list in decreasing order of hw buffer sizes (and so * increasing order of spare area) for each software zone. + * + * If padding is enabled then the start and end of the buffer must align + * to the pad boundary; if packing is enabled then they must align with + * the pack boundary as well. Allocations from the cluster zones are + * aligned to min(size, 4K), so the buffer starts at that alignment and + * ends at hwb->size alignment. If mbuf inlining is allowed the + * starting alignment will be reduced to MSIZE and the driver will + * exercise appropriate caution when deciding on the best buffer layout + * to use. */ n = 0; /* no usable buffer size to begin with */ swz = &s->sw_zone_info[0]; @@ -586,6 +596,15 @@ t4_read_chip_settings(struct adapter *sc) swz->zone = m_getzone(swz->size); swz->type = m_gettype(swz->size); + if (swz->size < PAGE_SIZE) { + MPASS(powerof2(swz->size)); + if (fl_pad && (swz->size % sc->sge.pad_boundary != 0)) + continue; + if (buffer_packing && + (swz->size % sc->sge.pack_boundary != 0)) + continue; + } + if (swz->size == safest_rx_cluster) safe_swz = swz; @@ -593,6 +612,12 @@ t4_read_chip_settings(struct adapter *sc) for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { if (hwb->zidx != -1 || hwb->size > swz->size) continue; +#ifdef INVARIANTS + if (fl_pad) + MPASS(hwb->size % sc->sge.pad_boundary == 0); + if (buffer_packing) + MPASS(hwb->size % sc->sge.pack_boundary == 0); +#endif hwb->zidx = i; if (head == -1) head = tail = j; @@ -640,14 +665,17 @@ t4_read_chip_settings(struct adapter *sc) int spare; hwb = &s->hw_buf_info[i]; +#ifdef INVARIANTS + if (fl_pad) + MPASS(hwb->size % sc->sge.pad_boundary == 0); + if (buffer_packing) + MPASS(hwb->size % sc->sge.pack_boundary == 0); +#endif spare = safe_swz->size - hwb->size; - if (spare < CL_METADATA_SIZE) - continue; - if (s->safe_hwidx2 == -1 || - spare == CL_METADATA_SIZE + MSIZE) + if (spare >= CL_METADATA_SIZE) { s->safe_hwidx2 = i; - if (spare >= CL_METADATA_SIZE + MSIZE) break; + } } } @@ -745,17 +773,6 @@ t4_create_dma_tag(struct adapter *sc) return (rc); } -static inline int -enable_buffer_packing(struct adapter *sc) -{ - - if (sc->flags & BUF_PACKING_OK && - ((is_t5(sc) && buffer_packing) || /* 1 or -1 both ok for T5 */ - (is_t4(sc) && buffer_packing == 1))) - return (1); - return (0); -} - void t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *children) @@ -769,7 +786,7 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, - NULL, fl_pad, "payload pad boundary (bytes)"); + NULL, sc->sge.pad_boundary, "payload pad boundary (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, NULL, spg_len, "status page size (bytes)"); @@ -777,10 +794,6 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, NULL, cong_drop, "congestion drop setting"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "buffer_packing", CTLFLAG_RD, - NULL, enable_buffer_packing(sc), - "pack multiple frames in one fl buffer"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, NULL, sc->sge.pack_boundary, "payload pack boundary (bytes)"); } @@ -958,7 +971,6 @@ mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) #ifdef TCP_OFFLOAD } #endif - payload = roundup2(payload, fl_pad); return (payload); } @@ -983,7 +995,7 @@ t4_setup_port_queues(struct port_info *pi) struct ifnet *ifp = pi->ifp; struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - int maxp, pack, mtu = ifp->if_mtu; + int maxp, mtu = ifp->if_mtu; /* Interrupt vector to start from (when using multiple vectors) */ intr_idx = first_vector(pi); @@ -994,7 +1006,6 @@ t4_setup_port_queues(struct port_info *pi) * b) allocate queue iff it will take direct interrupts. */ maxp = mtu_to_max_payload(sc, mtu, 0); - pack = enable_buffer_packing(sc); if (pi->flags & INTR_RXQ) { oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); @@ -1005,7 +1016,7 @@ t4_setup_port_queues(struct port_info *pi) snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(pi->dev), i); - init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, pack, name); + init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, name); if (pi->flags & INTR_RXQ) { rxq->iq.flags |= IQ_INTR; @@ -1029,7 +1040,7 @@ t4_setup_port_queues(struct port_info *pi) snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", device_get_nameunit(pi->dev), i); - init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, pack, name); + init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, name); if (pi->flags & INTR_OFLD_RXQ) { ofld_rxq->iq.flags |= IQ_INTR; @@ -1572,8 +1583,14 @@ get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags) caddr_t payload; len = min(total, hwb->size - fl->rx_offset); - padded_len = roundup2(len, fl->buf_boundary); payload = sd->cl + cll->region1 + fl->rx_offset; + if (fl->flags & FL_BUF_PACKING) { + padded_len = roundup2(len, fl->buf_boundary); + MPASS(fl->rx_offset + padded_len <= hwb->size); + } else { + padded_len = hwb->size; + MPASS(fl->rx_offset == 0); /* not packing */ + } if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { @@ -2121,14 +2138,15 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, } static inline void -init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, int pack, - char *name) +init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) { fl->qsize = qsize; fl->sidx = qsize - spg_len / EQ_ESIZE; strlcpy(fl->lockname, name, sizeof(fl->lockname)); - if (pack) + if (sc->flags & BUF_PACKING_OK && + ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ + (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ fl->flags |= FL_BUF_PACKING; find_best_refill_source(sc, fl, maxp); find_safe_refill_source(sc, fl); @@ -2277,11 +2295,13 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, if (fl->flags & FL_BUF_PACKING) { fl->lowat = roundup2(sc->sge.fl_starve_threshold2, 8); - fl->buf_boundary = max(fl_pad, sc->sge.pack_boundary); + fl->buf_boundary = sc->sge.pack_boundary; } else { fl->lowat = roundup2(sc->sge.fl_starve_threshold, 8); - fl->buf_boundary = fl_pad; + fl->buf_boundary = 16; } + if (fl_pad && fl->buf_boundary < sc->sge.pad_boundary) + fl->buf_boundary = sc->sge.pad_boundary; c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | @@ -2452,6 +2472,10 @@ add_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the freelist"); + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, + fl_pad ? 1 : 0, "padding enabled"); + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, + fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 0, "consumer index"); if (fl->flags & FL_BUF_PACKING) { @@ -4367,6 +4391,17 @@ done: if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) break; + + /* + * Do not inline mbufs if doing so would violate the pad/pack + * boundary alignment requirement. + */ + if (fl_pad && (MSIZE % sc->sge.pad_boundary) != 0) + continue; + if (fl->flags & FL_BUF_PACKING && + (MSIZE % sc->sge.pack_boundary) != 0) + continue; + if (spare < CL_METADATA_SIZE + MSIZE) continue; n = (spare - CL_METADATA_SIZE) / MSIZE; @@ -4449,7 +4484,9 @@ find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) spare = swz->size - hwb->size; fl->cll_alt.hwidx = hwidx; fl->cll_alt.zidx = hwb->zidx; - if (allow_mbufs_in_cluster) + if (allow_mbufs_in_cluster && + (fl_pad == 0 || (MSIZE % sc->sge.pad_boundary) == 0) && + (!(fl->flags & FL_BUF_PACKING) || (MSIZE % sc->sge.pack_boundary) == 0)) fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; else fl->cll_alt.region1 = 0;