r269413, r269428, r269440, r269537, r269644, r269731, and the cxgbe portion of r270063. r266571: cxgbe(4): Remove stray if_up from the code that creates the tracing ifnet. r266757: cxgbe(4): netmap support for Terminator 5 (T5) based 10G/40G cards. Netmap gets its own hardware-assisted virtual interface and won't take over or disrupt the "normal" interface in any way. You can use both simultaneously. For kernels with DEV_NETMAP, cxgbe(4) carves out an ncxl<N> interface (note the 'n' prefix) in the hardware to accompany each cxl<N> interface. These two ifnet's per port share the same wire but really are separate interfaces in the hardware and software. Each gets its own L2 MAC addresses (unicast and multicast), MTU, checksum caps, etc. You should run netmap on the 'n' interfaces only, that's what they are for. With this, pkt-gen is able to transmit > 45Mpps out of a single 40G port of a T580 card. 2 port tx is at ~56Mpps total (28M + 28M) as of now. Single port receive is at 33Mpps but this is very much a work in progress. I expect it to be closer to 40Mpps once done. In any case the current effort can already saturate multiple 10G ports of a T5 card at the smallest legal packet size. T4 gear is totally untested. trantor:~# ./pkt-gen -i ncxl0 -f tx -D 00:07:43🆎cd:ef 881.952141 main [1621] interface is ncxl0 881.952250 extract_ip_range [275] range is 10.0.0.1:0 to 10.0.0.1:0 881.952253 extract_ip_range [275] range is 10.1.0.1:0 to 10.1.0.1:0 881.962540 main [1804] mapped 334980KB at 0x801dff000 Sending on netmap:ncxl0: 4 queues, 1 threads and 1 cpus. 10.0.0.1 -> 10.1.0.1 (00:00:00:00:00:00 -> 00:07:43🆎cd:ef) 881.962562 main [1882] Sending 512 packets every 0.000000000 s 881.962563 main [1884] Wait 2 secs for phy reset 884.088516 main [1886] Ready... 884.088535 nm_open [457] overriding ifname ncxl0 ringid 0x0 flags 0x1 884.088607 sender_body [996] start 884.093246 sender_body [1064] drop copy 885.090435 main_thread [1418] 45206353 pps (45289533 pkts in 1001840 usec) 886.091600 main_thread [1418] 45322792 pps (45375593 pkts in 1001165 usec) 887.092435 main_thread [1418] 45313992 pps (45351784 pkts in 1000834 usec) 888.094434 main_thread [1418] 45315765 pps (45406397 pkts in 1002000 usec) 889.095434 main_thread [1418] 45333218 pps (45378551 pkts in 1001000 usec) 890.097434 main_thread [1418] 45315247 pps (45405877 pkts in 1002000 usec) 891.099434 main_thread [1418] 45326515 pps (45417168 pkts in 1002000 usec) 892.101434 main_thread [1418] 45333039 pps (45423705 pkts in 1002000 usec) 893.103434 main_thread [1418] 45324105 pps (45414708 pkts in 1001999 usec) 894.105434 main_thread [1418] 45318042 pps (45408723 pkts in 1002001 usec) 895.106434 main_thread [1418] 45332430 pps (45377762 pkts in 1001000 usec) 896.107434 main_thread [1418] 45338072 pps (45383410 pkts in 1001000 usec) ... r268536: cxgbe(4): Add an iSCSI softc to the adapter structure. r269076: Some hooks in cxgbe(4) for the offloaded iSCSI driver. r269364: Improve compliance with style.Makefile(5). r269366: List one file per line in the Makefiles. This makes it easier to read diffs when a file is added or removed. r269411: cxgbe(4): minor optimizations in ingress queue processing. Reorganize struct sge_iq. Make the iq entry size a compile time constant. While here, eliminate RX_FL_ESIZE and use EQ_ESIZE directly. r269413: cxgbe(4): Fix an off by one error when looking for the BAR2 doorbell address of an egress queue. r269428: cxgbe(4): some optimizations in freelist handling. r269440: cxgbe(4): Remove an unused version of t4_enable_vi. r269537: cxgbe(4): Do not run any sleepable code in the SIOCSIFFLAGS handler when IFF_PROMISC or IFF_ALLMULTI is being flipped. bpf(4) holds its global mutex around ifpromisc in at least the bpf_dtor path. r269644: cxgbe(4): Let caller specify whether it's ok to sleep in t4_sched_config and t4_sched_params. r269731: cxgbe(4): Do not poke T4-only registers on a T5 (and vice versa). Relnotes: Yes (native netmap support for Chelsio T4/T5 cards)
159 lines
4.4 KiB
C
159 lines
4.4 KiB
C
/*-
|
|
* Copyright (c) 2010 Chelsio Communications, Inc.
|
|
* All rights reserved.
|
|
* Written by: Navdeep Parhar <np@FreeBSD.org>
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*
|
|
*/
|
|
|
|
#ifndef __T4_OFFLOAD_H__
|
|
#define __T4_OFFLOAD_H__
|
|
|
|
#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \
|
|
(w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
|
|
(w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
|
|
V_FW_WR_FLOWID(tid)); \
|
|
(w)->wr_lo = cpu_to_be64(0); \
|
|
} while (0)
|
|
|
|
#define INIT_ULPTX_WR(w, wrlen, atomic, tid) \
|
|
INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid)
|
|
|
|
#define INIT_TP_WR(w, tid) do { \
|
|
(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \
|
|
V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \
|
|
(w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(sizeof(*w), 16)) | \
|
|
V_FW_WR_FLOWID(tid)); \
|
|
(w)->wr.wr_lo = cpu_to_be64(0); \
|
|
} while (0)
|
|
|
|
#define INIT_TP_WR_MIT_CPL(w, cpl, tid) do { \
|
|
INIT_TP_WR(w, tid); \
|
|
OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \
|
|
} while (0)
|
|
|
|
TAILQ_HEAD(stid_head, stid_region);
|
|
struct listen_ctx;
|
|
|
|
struct stid_region {
|
|
TAILQ_ENTRY(stid_region) link;
|
|
u_int used; /* # of stids used by this region */
|
|
u_int free; /* # of contiguous stids free right after this region */
|
|
};
|
|
|
|
/*
|
|
* Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
|
|
*/
|
|
#define MAX_ATIDS 8192U
|
|
|
|
union aopen_entry {
|
|
void *data;
|
|
union aopen_entry *next;
|
|
};
|
|
|
|
/*
|
|
* Holds the size, base address, free list start, etc of the TID, server TID,
|
|
* and active-open TID tables. The tables themselves are allocated dynamically.
|
|
*/
|
|
struct tid_info {
|
|
void **tid_tab;
|
|
u_int ntids;
|
|
u_int tids_in_use;
|
|
|
|
struct mtx stid_lock __aligned(CACHE_LINE_SIZE);
|
|
struct listen_ctx **stid_tab;
|
|
u_int nstids;
|
|
u_int stid_base;
|
|
u_int stids_in_use;
|
|
u_int nstids_free_head; /* # of available stids at the begining */
|
|
struct stid_head stids;
|
|
|
|
struct mtx atid_lock __aligned(CACHE_LINE_SIZE);
|
|
union aopen_entry *atid_tab;
|
|
u_int natids;
|
|
union aopen_entry *afree;
|
|
u_int atids_in_use;
|
|
|
|
struct mtx ftid_lock __aligned(CACHE_LINE_SIZE);
|
|
struct filter_entry *ftid_tab;
|
|
u_int nftids;
|
|
u_int ftid_base;
|
|
u_int ftids_in_use;
|
|
|
|
struct mtx etid_lock __aligned(CACHE_LINE_SIZE);
|
|
struct etid_entry *etid_tab;
|
|
u_int netids;
|
|
u_int etid_base;
|
|
};
|
|
|
|
struct t4_range {
|
|
u_int start;
|
|
u_int size;
|
|
};
|
|
|
|
struct t4_virt_res { /* virtualized HW resources */
|
|
struct t4_range ddp;
|
|
struct t4_range iscsi;
|
|
struct t4_range stag;
|
|
struct t4_range rq;
|
|
struct t4_range pbl;
|
|
struct t4_range qp;
|
|
struct t4_range cq;
|
|
struct t4_range ocq;
|
|
struct t4_range l2t;
|
|
};
|
|
|
|
#ifdef TCP_OFFLOAD
|
|
enum {
|
|
ULD_TOM = 1,
|
|
ULD_IWARP = 2,
|
|
};
|
|
|
|
struct adapter;
|
|
struct port_info;
|
|
struct uld_info {
|
|
SLIST_ENTRY(uld_info) link;
|
|
int refcount;
|
|
int uld_id;
|
|
int (*activate)(struct adapter *);
|
|
int (*deactivate)(struct adapter *);
|
|
};
|
|
|
|
struct tom_tunables {
|
|
int sndbuf;
|
|
int ddp;
|
|
int indsz;
|
|
int ddp_thres;
|
|
int rx_coalesce;
|
|
};
|
|
|
|
int t4_register_uld(struct uld_info *);
|
|
int t4_unregister_uld(struct uld_info *);
|
|
int t4_activate_uld(struct adapter *, int);
|
|
int t4_deactivate_uld(struct adapter *, int);
|
|
void t4_iscsi_init(struct ifnet *, unsigned int, const unsigned int *);
|
|
#endif
|
|
#endif
|