From 79b93bf6a3af3a710f394be44c70c74f3e2d412e Mon Sep 17 00:00:00 2001
From: Navdeep Parhar <np@FreeBSD.org>
Date: Sat, 3 Jan 2015 00:09:21 +0000
Subject: [PATCH] cxgbe/tom: do not engage the TOE's payload chopper for
 payload < 2 MSS or for 10Gbps ports.

MFC after:	2 weeks
---
 sys/dev/cxgbe/offload.h       |  1 +
 sys/dev/cxgbe/t4_main.c       |  4 ++++
 sys/dev/cxgbe/tom/t4_cpl_io.c | 25 ++++++++++++++++++++-----
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h
index e5f3ec2ea39e..ea681fee5284 100644
--- a/sys/dev/cxgbe/offload.h
+++ b/sys/dev/cxgbe/offload.h
@@ -147,6 +147,7 @@ struct tom_tunables {
 	int indsz;
 	int ddp_thres;
 	int rx_coalesce;
+	int tx_align;
 };
 
 int t4_register_uld(struct uld_info *);
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 4e968a957616..b29fa9461169 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -4801,6 +4801,10 @@ t4_sysctls(struct adapter *sc)
 		sc->tt.rx_coalesce = 1;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
 		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
+
+		sc->tt.tx_align = 1;
+		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
+		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
 	}
 #endif
 
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 3cee10ae06a1..0c5eea70cac2 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -491,7 +491,7 @@ max_dsgl_nsegs(int tx_credits)
 
 static inline void
 write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen,
-    unsigned int plen, uint8_t credits, int shove, int ulp_mode)
+    unsigned int plen, uint8_t credits, int shove, int ulp_mode, int txalign)
 {
 	struct fw_ofld_tx_data_wr *txwr = dst;
 	unsigned int wr_ulp_mode;
@@ -513,6 +513,19 @@ write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen,
 		V_FW_OFLD_TX_DATA_WR_URGENT(0) |	/* XXX */
 		V_FW_OFLD_TX_DATA_WR_SHOVE(shove));
 	txwr->plen = htobe32(plen);
+
+	if (txalign > 0) {
+		struct tcpcb *tp = intotcpcb(toep->inp);
+
+		if (plen < 2 * tp->t_maxseg || is_10G_port(toep->port))
+			txwr->lsodisable_to_proxy |=
+			    htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE);
+		else
+			txwr->lsodisable_to_proxy |=
+			    htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD |
+				(tp->t_flags & TF_NODELAY ? 0 :
+				F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE));
+	}
 }
 
 /*
@@ -716,7 +729,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr->wr_len, 16);
-			write_tx_wr(txwr, toep, plen, plen, credits, shove, 0);
+			write_tx_wr(txwr, toep, plen, plen, credits, shove, 0,
+			    sc->tt.tx_align);
 			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
 			nsegs = 0;
 		} else {
@@ -734,7 +748,8 @@ t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr_len, 16);
-			write_tx_wr(txwr, toep, 0, plen, credits, shove, 0);
+			write_tx_wr(txwr, toep, 0, plen, credits, shove, 0,
+			    sc->tt.tx_align);
 			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
 			    max_nsegs_1mbuf);
 			if (wr_len & 0xf) {
@@ -890,7 +905,7 @@ t4_ulp_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 			txwr = wrtod(wr);
 			credits = howmany(wr->wr_len, 16);
 			write_tx_wr(txwr, toep, plen, ulp_len, credits, shove,
-								ulp_mode);
+								ulp_mode, 0);
 			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
 		} else {
 			int wr_len;
@@ -907,7 +922,7 @@ t4_ulp_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 			txwr = wrtod(wr);
 			credits = howmany(wr_len, 16);
 			write_tx_wr(txwr, toep, 0, ulp_len, credits, shove,
-								ulp_mode);
+								ulp_mode, 0);
 			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
 			    max_nsegs_1mbuf);
 			if (wr_len & 0xf) {