Greatly simplify cxgb by removing almost all of the custom mbuf management logic

- remove mbuf iovec - useful, but adds too much complexity when isolated to
   the driver

- remove driver private caching - insufficient benefit over UMA to justify
  the added complexity and maintenance overhead

- remove separate logic for managing multiple transmit queues, with the
  new drbr routines the control flow can be made to much more closely resemble
  legacy drivers

- remove dedicated service threads, with per-cpu callouts one can get the same
  benefit much more simply by registering a callout 1 tick in the future if there
  are still buffered packets

- remove embedded mbuf usage - Jeffr's changes will (I hope) soon be integrated
  greatly reducing the overhead of using kernel APIs for reference counting
  clusters

- add hysteresis to descriptor coalescing logic

- add coalesce threshold sysctls to allow users to decide at run-time
  between optimizing for forwarding / UDP or optimizing for TCP

- add once per second watchdog to effectively close the very rare races
  occurring from coalescing

- incorporate Navdeep's changes to the initialization path required to
  convert port and adapter locks back to ordinary mutexes (silencing BPF
  LOR complaints)

- enable prefetches in get_packet and tx cleaning

Reviewed by:	navdeep@
MFC after:	2 weeks
This commit is contained in:
Kip Macy 2009-06-19 23:34:32 +00:00
parent d49cd9a18e
commit 3f345a5d09
11 changed files with 1141 additions and 2474 deletions

View File

@ -756,8 +756,6 @@ dev/cxgb/cxgb_offload.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/cxgb_sge.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/cxgb_multiq.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/common/cxgb_mc5.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/common/cxgb_vsc7323.c optional cxgb pci \
@ -776,8 +774,6 @@ dev/cxgb/common/cxgb_tn1010.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/sys/uipc_mvec.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/sys/cxgb_support.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cy/cy.c optional cy

View File

@ -237,10 +237,21 @@ struct rss_header {
#ifndef CHELSIO_FW
struct work_request_hdr {
__be32 wr_hi;
__be32 wr_lo;
union {
struct {
__be32 wr_hi;
__be32 wr_lo;
} ilp32;
struct {
__be64 wr_hilo;
} lp64;
} u;
};
#define wrh_hi u.ilp32.wr_hi
#define wrh_lo u.ilp32.wr_lo
#define wrh_hilo u.lp64.wr_hilo
/* wr_hi fields */
#define S_WR_SGE_CREDITS 0
#define M_WR_SGE_CREDITS 0xFF
@ -817,8 +828,7 @@ struct cpl_peer_close {
};
struct tx_data_wr {
__be32 wr_hi;
__be32 wr_lo;
WR_HDR;
__be32 len;
__be32 flags;
__be32 sndseq;
@ -936,8 +946,7 @@ struct cpl_rdma_ec_status {
};
struct mngt_pktsched_wr {
__be32 wr_hi;
__be32 wr_lo;
WR_HDR;
__u8 mngt_opcode;
__u8 rsvd[7];
__u8 sched;

View File

@ -35,7 +35,6 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sx.h>
#include <sys/rman.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
@ -63,8 +62,6 @@ POSSIBILITY OF SUCH DAMAGE.
#include <netinet/tcp_lro.h>
#endif
#define USE_SX
struct adapter;
struct sge_qset;
extern int cxgb_debug;
@ -82,22 +79,9 @@ extern int cxgb_debug;
mtx_destroy((lock)); \
} while (0)
#define SX_INIT(lock, lockname) \
do { \
printf("initializing %s at %s:%d\n", lockname, __FILE__, __LINE__); \
sx_init((lock), lockname); \
} while (0)
#define SX_DESTROY(lock) \
do { \
printf("destroying %s at %s:%d\n", (lock)->lock_object.lo_name, __FILE__, __LINE__); \
sx_destroy((lock)); \
} while (0)
#else
#define MTX_INIT mtx_init
#define MTX_DESTROY mtx_destroy
#define SX_INIT sx_init
#define SX_DESTROY sx_destroy
#endif
enum {
@ -110,20 +94,17 @@ struct port_info {
struct adapter *adapter;
struct ifnet *ifp;
int if_flags;
int flags;
const struct port_type_info *port_type;
struct cphy phy;
struct cmac mac;
struct link_config link_config;
struct ifmedia media;
#ifdef USE_SX
struct sx lock;
#else
struct mtx lock;
#endif
uint8_t port_id;
uint8_t tx_chan;
uint8_t txpkt_intf;
uint8_t first_qset;
uint32_t port_id;
uint32_t tx_chan;
uint32_t txpkt_intf;
uint32_t first_qset;
uint32_t nqsets;
int link_fault;
@ -135,19 +116,30 @@ struct port_info {
#define PORT_NAME_LEN 32
char lockbuf[PORT_LOCK_NAME_LEN];
char namebuf[PORT_NAME_LEN];
};
} __aligned(L1_CACHE_BYTES);
enum { /* adapter flags */
enum {
/* adapter flags */
FULL_INIT_DONE = (1 << 0),
USING_MSI = (1 << 1),
USING_MSIX = (1 << 2),
QUEUES_BOUND = (1 << 3),
FW_UPTODATE = (1 << 4),
TPS_UPTODATE = (1 << 5),
FW_UPTODATE = (1 << 4),
TPS_UPTODATE = (1 << 5),
CXGB_SHUTDOWN = (1 << 6),
CXGB_OFLD_INIT = (1 << 7),
TP_PARITY_INIT = (1 << 8),
TP_PARITY_INIT = (1 << 8),
CXGB_BUSY = (1 << 9),
/* port flags */
DOOMED = (1 << 0),
};
#define IS_DOOMED(p) (p->flags & DOOMED)
#define SET_DOOMED(p) do {p->flags |= DOOMED;} while (0)
#define DOOMED(p) (p->flags & DOOMED)
#define IS_BUSY(sc) (sc->flags & CXGB_BUSY)
#define SET_BUSY(sc) do {sc->flags |= CXGB_BUSY;} while (0)
#define CLR_BUSY(sc) do {sc->flags &= ~CXGB_BUSY;} while (0)
#define FL_Q_SIZE 4096
#define JUMBO_Q_SIZE 1024
@ -205,10 +197,6 @@ struct sge_rspq {
uint32_t rspq_dump_count;
};
#ifndef DISABLE_MBUF_IOVEC
#define rspq_mbuf rspq_mh.mh_head
#endif
struct rx_desc;
struct rx_sw_desc;
@ -253,7 +241,6 @@ struct sge_txq {
bus_addr_t phys_addr;
struct task qresume_task;
struct task qreclaim_task;
struct port_info *port;
uint32_t cntxt_id;
uint64_t stops;
uint64_t restarts;
@ -261,26 +248,21 @@ struct sge_txq {
bus_dmamap_t desc_map;
bus_dma_tag_t entry_tag;
struct mbuf_head sendq;
/*
* cleanq should really be an buf_ring to avoid extra
* mbuf touches
*/
struct mbuf_head cleanq;
struct buf_ring *txq_mr;
struct ifaltq *txq_ifq;
struct mbuf *immpkt;
struct callout txq_timer;
struct callout txq_watchdog;
uint64_t txq_coalesced;
uint32_t txq_drops;
uint32_t txq_skipped;
uint32_t txq_coalesced;
uint32_t txq_enqueued;
uint32_t txq_dump_start;
uint32_t txq_dump_count;
unsigned long txq_frees;
struct mtx lock;
uint64_t txq_direct_packets;
uint64_t txq_direct_bytes;
uint64_t txq_frees;
struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1];
#define TXQ_NAME_LEN 32
char lockbuf[TXQ_NAME_LEN];
};
@ -297,6 +279,8 @@ enum {
#define QS_EXITING 0x1
#define QS_RUNNING 0x2
#define QS_BOUND 0x4
#define QS_FLUSHING 0x8
#define QS_TIMEOUT 0x10
struct sge_qset {
struct sge_rspq rspq;
@ -309,10 +293,10 @@ struct sge_qset {
uint64_t port_stats[SGE_PSTAT_MAX];
struct port_info *port;
int idx; /* qset # */
int qs_cpuid;
int qs_flags;
int coalescing;
struct cv qs_cv;
struct mtx qs_mtx;
struct mtx lock;
#define QS_NAME_LEN 32
char namebuf[QS_NAME_LEN];
};
@ -328,7 +312,7 @@ struct adapter {
device_t dev;
int flags;
TAILQ_ENTRY(adapter) adapter_entry;
/* PCI register resources */
int regs_rid;
struct resource *regs_res;
@ -401,11 +385,7 @@ struct adapter {
char port_types[MAX_NPORTS + 1];
uint32_t open_device_map;
uint32_t registered_device_map;
#ifdef USE_SX
struct sx lock;
#else
struct mtx lock;
#endif
driver_intr_t *cxgb_intr;
int msi_count;
@ -422,31 +402,17 @@ struct t3_rx_mode {
struct port_info *port;
};
#define MDIO_LOCK(adapter) mtx_lock(&(adapter)->mdio_lock)
#define MDIO_UNLOCK(adapter) mtx_unlock(&(adapter)->mdio_lock)
#define ELMR_LOCK(adapter) mtx_lock(&(adapter)->elmer_lock)
#define ELMR_UNLOCK(adapter) mtx_unlock(&(adapter)->elmer_lock)
#ifdef USE_SX
#define PORT_LOCK(port) sx_xlock(&(port)->lock);
#define PORT_UNLOCK(port) sx_xunlock(&(port)->lock);
#define PORT_LOCK_INIT(port, name) SX_INIT(&(port)->lock, name)
#define PORT_LOCK_DEINIT(port) SX_DESTROY(&(port)->lock)
#define PORT_LOCK_ASSERT_OWNED(port) sx_assert(&(port)->lock, SA_LOCKED)
#define ADAPTER_LOCK(adap) sx_xlock(&(adap)->lock);
#define ADAPTER_UNLOCK(adap) sx_xunlock(&(adap)->lock);
#define ADAPTER_LOCK_INIT(adap, name) SX_INIT(&(adap)->lock, name)
#define ADAPTER_LOCK_DEINIT(adap) SX_DESTROY(&(adap)->lock)
#define ADAPTER_LOCK_ASSERT_NOTOWNED(adap) sx_assert(&(adap)->lock, SA_UNLOCKED)
#define ADAPTER_LOCK_ASSERT_OWNED(adap) sx_assert(&(adap)->lock, SA_LOCKED)
#else
#define PORT_LOCK(port) mtx_lock(&(port)->lock);
#define PORT_UNLOCK(port) mtx_unlock(&(port)->lock);
#define PORT_LOCK_INIT(port, name) mtx_init(&(port)->lock, name, 0, MTX_DEF)
#define PORT_LOCK_DEINIT(port) mtx_destroy(&(port)->lock)
#define PORT_LOCK_ASSERT_NOTOWNED(port) mtx_assert(&(port)->lock, MA_NOTOWNED)
#define PORT_LOCK_ASSERT_OWNED(port) mtx_assert(&(port)->lock, MA_OWNED)
#define ADAPTER_LOCK(adap) mtx_lock(&(adap)->lock);
@ -455,7 +421,6 @@ struct t3_rx_mode {
#define ADAPTER_LOCK_DEINIT(adap) mtx_destroy(&(adap)->lock)
#define ADAPTER_LOCK_ASSERT_NOTOWNED(adap) mtx_assert(&(adap)->lock, MA_NOTOWNED)
#define ADAPTER_LOCK_ASSERT_OWNED(adap) mtx_assert(&(adap)->lock, MA_OWNED)
#endif
static __inline uint32_t
@ -555,14 +520,11 @@ void t3_sge_stop(adapter_t *);
void t3b_intr(void *data);
void t3_intr_msi(void *data);
void t3_intr_msix(void *data);
int t3_encap(struct sge_qset *, struct mbuf **, int);
int t3_sge_init_adapter(adapter_t *);
int t3_sge_reset_adapter(adapter_t *);
int t3_sge_init_port(struct port_info *);
void t3_sge_deinit_sw(adapter_t *);
void t3_free_tx_desc(struct sge_txq *q, int n);
void t3_free_tx_desc_all(struct sge_txq *q);
void t3_free_tx_desc(struct sge_qset *qs, int n, int qid);
void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad);
@ -615,13 +577,8 @@ static inline int offload_running(adapter_t *adapter)
return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT);
}
int cxgb_pcpu_enqueue_packet(struct ifnet *ifp, struct mbuf *m);
int cxgb_pcpu_transmit(struct ifnet *ifp, struct mbuf *m);
void cxgb_pcpu_shutdown_threads(struct adapter *sc);
void cxgb_pcpu_startup_threads(struct adapter *sc);
int process_responses(adapter_t *adap, struct sge_qset *qs, int budget);
void t3_free_qset(adapter_t *sc, struct sge_qset *q);
void cxgb_tx_watchdog(void *arg);
int cxgb_transmit(struct ifnet *ifp, struct mbuf *m);
void cxgb_qflush(struct ifnet *ifp);
void cxgb_start(struct ifnet *ifp);
void refill_fl_service(adapter_t *adap, struct sge_fl *fl);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,594 +0,0 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/module.h>
#include <sys/pciio.h>
#include <sys/conf.h>
#include <machine/bus.h>
#include <machine/resource.h>
#include <sys/bus_dma.h>
#include <sys/kthread.h>
#include <sys/rman.h>
#include <sys/ioccom.h>
#include <sys/mbuf.h>
#include <sys/linker.h>
#include <sys/firmware.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/queue.h>
#include <sys/taskqueue.h>
#include <sys/unistd.h>
#include <sys/syslog.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/sctp_crc32.h>
#include <netinet/sctp.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pci_private.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <cxgb_include.h>
#include <sys/mvec.h>
extern int txq_fills;
int multiq_tx_enable = 1;
int coalesce_tx_enable = 1;
int wakeup_tx_thread = 0;
extern struct sysctl_oid_list sysctl__hw_cxgb_children;
static int sleep_ticks = 1;
TUNABLE_INT("hw.cxgb.sleep_ticks", &sleep_ticks);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, sleep_ticks, CTLFLAG_RDTUN, &sleep_ticks, 0,
"ticks to sleep between checking pcpu queues");
int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
"size of per-queue mbuf ring");
static void cxgb_pcpu_start_proc(void *arg);
static int cxgb_tx(struct sge_qset *qs, uint32_t txmax);
#ifdef IFNET_MULTIQUEUE
static int cxgb_pcpu_cookie_to_qidx(struct port_info *pi, uint32_t cookie);
#endif
static inline int
cxgb_pcpu_enqueue_packet_(struct sge_qset *qs, struct mbuf *m)
{
struct sge_txq *txq;
int err = 0;
KASSERT(m != NULL, ("null mbuf"));
KASSERT(m->m_type == MT_DATA, ("bad mbuf type %d", m->m_type));
if (qs->qs_flags & QS_EXITING) {
m_freem(m);
return (ENETDOWN);
}
txq = &qs->txq[TXQ_ETH];
err = drbr_enqueue(qs->port->ifp, txq->txq_mr, m);
if (err) {
txq->txq_drops++;
m_freem(m);
}
if (wakeup_tx_thread && !err &&
((txq->flags & TXQ_TRANSMITTING) == 0))
wakeup(qs);
return (err);
}
static int
cxgb_dequeue_packet(struct sge_txq *txq, struct mbuf **m_vec)
{
struct mbuf *m, *m0;
struct sge_qset *qs;
int count, size, coalesced;
struct adapter *sc;
#ifdef ALTQ
if (ALTQ_IS_ENABLED(txq->txq_ifq)) {
IFQ_DRV_DEQUEUE(txq->txq_ifq, m);
if (m == NULL)
return (0);
m_vec[0] = m;
return (1);
}
#endif
mtx_assert(&txq->lock, MA_OWNED);
coalesced = count = size = 0;
qs = txq_to_qset(txq, TXQ_ETH);
if (qs->qs_flags & QS_EXITING)
return (0);
if (txq->immpkt != NULL) {
m_vec[0] = txq->immpkt;
txq->immpkt = NULL;
return (1);
}
sc = qs->port->adapter;
m = buf_ring_dequeue_sc(txq->txq_mr);
if (m == NULL)
return (0);
count = 1;
m_vec[0] = m;
if (m->m_pkthdr.tso_segsz > 0 ||
m->m_pkthdr.len > TX_WR_SIZE_MAX ||
m->m_next != NULL ||
(coalesce_tx_enable == 0)) {
return (count);
}
size = m->m_pkthdr.len;
for (m = buf_ring_peek(txq->txq_mr); m != NULL;
m = buf_ring_peek(txq->txq_mr)) {
if (m->m_pkthdr.tso_segsz > 0
|| size + m->m_pkthdr.len > TX_WR_SIZE_MAX
|| m->m_next != NULL)
break;
m0 = buf_ring_dequeue_sc(txq->txq_mr);
#ifdef DEBUG_BUFRING
if (m0 != m)
panic("peek and dequeue don't match");
#endif
size += m->m_pkthdr.len;
m_vec[count++] = m;
if (count == TX_WR_COUNT_MAX)
break;
coalesced++;
}
txq->txq_coalesced += coalesced;
return (count);
}
static void
cxgb_pcpu_free(struct sge_qset *qs)
{
struct mbuf *m;
struct sge_txq *txq = &qs->txq[TXQ_ETH];
mtx_lock(&txq->lock);
while ((m = mbufq_dequeue(&txq->sendq)) != NULL)
m_freem(m);
while ((m = buf_ring_dequeue_sc(txq->txq_mr)) != NULL)
m_freem(m);
t3_free_tx_desc_all(txq);
mtx_unlock(&txq->lock);
}
static int
cxgb_pcpu_reclaim_tx(struct sge_txq *txq)
{
int reclaimable;
struct sge_qset *qs = txq_to_qset(txq, TXQ_ETH);
mtx_assert(&txq->lock, MA_OWNED);
reclaimable = desc_reclaimable(txq);
if (reclaimable == 0)
return (0);
t3_free_tx_desc(txq, reclaimable);
txq->cleaned += reclaimable;
txq->in_use -= reclaimable;
if (isset(&qs->txq_stopped, TXQ_ETH)) {
qs->port->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
clrbit(&qs->txq_stopped, TXQ_ETH);
}
return (reclaimable);
}
static int
cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
{
int i, err, initerr, flush, reclaimed, stopped;
struct port_info *pi;
struct sge_txq *txq;
adapter_t *sc;
uint32_t max_desc;
pi = qs->port;
initerr = err = i = reclaimed = 0;
sc = pi->adapter;
txq = &qs->txq[TXQ_ETH];
mtx_assert(&txq->lock, MA_OWNED);
retry:
if (!pi->link_config.link_ok)
initerr = ENETDOWN;
else if (qs->qs_flags & QS_EXITING)
initerr = ENETDOWN;
else if ((pi->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
initerr = ENETDOWN;
else if ((pi->ifp->if_flags & IFF_UP) == 0)
initerr = ENETDOWN;
else if (immpkt) {
if (!buf_ring_empty(txq->txq_mr)
|| ALTQ_IS_ENABLED(&pi->ifp->if_snd))
initerr = cxgb_pcpu_enqueue_packet_(qs, immpkt);
else
txq->immpkt = immpkt;
immpkt = NULL;
}
if (initerr) {
if (immpkt)
m_freem(immpkt);
if (initerr == ENOBUFS && !tx_flush)
wakeup(qs);
return (initerr);
}
if ((tx_flush && (desc_reclaimable(txq) > 0)) ||
(desc_reclaimable(txq) > (TX_ETH_Q_SIZE>>3))) {
cxgb_pcpu_reclaim_tx(txq);
}
stopped = isset(&qs->txq_stopped, TXQ_ETH);
flush = ((!drbr_empty(pi->ifp, txq->txq_mr)
&& !stopped) || txq->immpkt);
max_desc = tx_flush ? TX_ETH_Q_SIZE : TX_START_MAX_DESC;
err = flush ? cxgb_tx(qs, max_desc) : 0;
if ((tx_flush && flush && err == 0) &&
!drbr_empty(pi->ifp, txq->txq_mr)) {
struct thread *td = curthread;
if (++i > 1) {
thread_lock(td);
sched_prio(td, PRI_MIN_TIMESHARE);
thread_unlock(td);
}
if (i > 200) {
device_printf(qs->port->adapter->dev,
"exceeded max enqueue tries\n");
return (EBUSY);
}
goto retry;
}
return (err);
}
int
cxgb_pcpu_transmit(struct ifnet *ifp, struct mbuf *immpkt)
{
uint32_t cookie;
int err, qidx, locked, resid;
struct port_info *pi;
struct sge_qset *qs;
struct sge_txq *txq = NULL /* gcc is dumb */;
struct adapter *sc;
pi = ifp->if_softc;
sc = pi->adapter;
qs = NULL;
qidx = resid = err = cookie = locked = 0;
#ifdef IFNET_MULTIQUEUE
if (immpkt && (immpkt->m_pkthdr.flowid != 0)) {
cookie = immpkt->m_pkthdr.flowid;
qidx = cxgb_pcpu_cookie_to_qidx(pi, cookie);
qs = &pi->adapter->sge.qs[qidx];
} else
#endif
qs = &pi->adapter->sge.qs[pi->first_qset];
txq = &qs->txq[TXQ_ETH];
if (((sc->tunq_coalesce == 0) ||
(buf_ring_count(txq->txq_mr) >= TX_WR_COUNT_MAX) ||
(coalesce_tx_enable == 0)) && mtx_trylock(&txq->lock)) {
txq->flags |= TXQ_TRANSMITTING;
err = cxgb_pcpu_start_(qs, immpkt, FALSE);
txq->flags &= ~TXQ_TRANSMITTING;
mtx_unlock(&txq->lock);
} else if (immpkt)
return (cxgb_pcpu_enqueue_packet_(qs, immpkt));
return ((err == EBUSY) ? 0 : err);
}
void
cxgb_start(struct ifnet *ifp)
{
struct port_info *p = ifp->if_softc;
if (!p->link_config.link_ok)
return;
cxgb_pcpu_transmit(ifp, NULL);
}
static void
cxgb_pcpu_start_proc(void *arg)
{
struct sge_qset *qs = arg;
struct thread *td;
struct sge_txq *txq = &qs->txq[TXQ_ETH];
int idleticks, err = 0;
td = curthread;
sleep_ticks = max(hz/1000, 1);
qs->qs_flags |= QS_RUNNING;
thread_lock(td);
sched_bind(td, qs->qs_cpuid);
thread_unlock(td);
DELAY(qs->qs_cpuid*100000);
if (bootverbose)
printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu);
for (;;) {
if (qs->qs_flags & QS_EXITING)
break;
if ((qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
idleticks = hz;
if (!buf_ring_empty(txq->txq_mr) ||
!mbufq_empty(&txq->sendq))
cxgb_pcpu_free(qs);
goto done;
} else
idleticks = sleep_ticks;
if (mtx_trylock(&txq->lock)) {
txq->flags |= TXQ_TRANSMITTING;
err = cxgb_pcpu_start_(qs, NULL, TRUE);
txq->flags &= ~TXQ_TRANSMITTING;
mtx_unlock(&txq->lock);
} else
err = EINPROGRESS;
#ifdef notyet
if (mtx_trylock(&qs->rspq.lock)) {
process_responses(sc, qs, -1);
refill_fl_service(sc, &qs->fl[0]);
refill_fl_service(sc, &qs->fl[1]);
t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
V_NEWTIMER(qs->rspq.next_holdoff) | V_NEWINDEX(qs->rspq.cidx));
mtx_unlock(&qs->rspq.lock);
}
#endif
if ((!buf_ring_empty(txq->txq_mr)) && err == 0) {
#if 0
if (cxgb_debug)
printf("head=%p cons=%d prod=%d\n",
txq->sendq.head, txq->txq_mr.br_cons,
txq->txq_mr.br_prod);
#endif
continue;
}
done:
tsleep(qs, 1, "cxgbidle", idleticks);
}
if (bootverbose)
device_printf(qs->port->adapter->dev, "exiting thread for cpu%d\n", qs->qs_cpuid);
cxgb_pcpu_free(qs);
t3_free_qset(qs->port->adapter, qs);
qs->qs_flags &= ~QS_RUNNING;
#if __FreeBSD_version >= 800002
kproc_exit(0);
#else
kthread_exit(0);
#endif
}
#ifdef IFNET_MULTIQUEUE
static int
cxgb_pcpu_cookie_to_qidx(struct port_info *pi, uint32_t cookie)
{
int qidx;
uint32_t tmp;
if (multiq_tx_enable == 0)
return (pi->first_qset);
/*
* Will probably need to be changed for 4-port XXX
*/
tmp = pi->tx_chan ? cookie : cookie & ((RSS_TABLE_SIZE>>1)-1);
DPRINTF(" tmp=%d ", tmp);
qidx = (tmp & (pi->nqsets -1)) + pi->first_qset;
return (qidx);
}
#endif
void
cxgb_pcpu_startup_threads(struct adapter *sc)
{
int i, j, nqsets;
struct proc *p;
for (i = 0; i < (sc)->params.nports; ++i) {
struct port_info *pi = adap2pinfo(sc, i);
#ifdef IFNET_MULTIQUEUE
nqsets = pi->nqsets;
#else
nqsets = 1;
#endif
for (j = 0; j < nqsets; ++j) {
struct sge_qset *qs;
qs = &sc->sge.qs[pi->first_qset + j];
qs->port = pi;
qs->qs_cpuid = ((pi->first_qset + j) % mp_ncpus);
device_printf(sc->dev, "starting thread for %d\n",
qs->qs_cpuid);
#if __FreeBSD_version >= 800002
kproc_create(cxgb_pcpu_start_proc, qs, &p,
RFNOWAIT, 0, "cxgbsp");
#else
kthread_create(cxgb_pcpu_start_proc, qs, &p,
RFNOWAIT, 0, "cxgbsp");
#endif
DELAY(200);
}
}
}
void
cxgb_pcpu_shutdown_threads(struct adapter *sc)
{
int i, j;
int nqsets;
for (i = 0; i < sc->params.nports; i++) {
struct port_info *pi = &sc->port[i];
int first = pi->first_qset;
#ifdef IFNET_MULTIQUEUE
nqsets = pi->nqsets;
#else
nqsets = 1;
#endif
for (j = 0; j < nqsets; j++) {
struct sge_qset *qs = &sc->sge.qs[first + j];
qs->qs_flags |= QS_EXITING;
wakeup(qs);
tsleep(&sc, PRI_MIN_TIMESHARE, "cxgb unload 0", hz>>2);
while (qs->qs_flags & QS_RUNNING) {
qs->qs_flags |= QS_EXITING;
device_printf(sc->dev, "qset thread %d still running - sleeping\n", first + j);
tsleep(&sc, PRI_MIN_TIMESHARE, "cxgb unload 1", 2*hz);
}
}
}
}
static __inline void
check_pkt_coalesce(struct sge_qset *qs)
{
struct adapter *sc;
struct sge_txq *txq;
txq = &qs->txq[TXQ_ETH];
sc = qs->port->adapter;
if (sc->tunq_fill[qs->idx] && (txq->in_use < (txq->size - (txq->size>>2))))
sc->tunq_fill[qs->idx] = 0;
else if (!sc->tunq_fill[qs->idx] && (txq->in_use > (txq->size - (txq->size>>2))))
sc->tunq_fill[qs->idx] = 1;
}
static int
cxgb_tx(struct sge_qset *qs, uint32_t txmax)
{
struct sge_txq *txq;
struct ifnet *ifp = qs->port->ifp;
int i, err, in_use_init, count;
struct mbuf *m_vec[TX_WR_COUNT_MAX];
txq = &qs->txq[TXQ_ETH];
ifp = qs->port->ifp;
in_use_init = txq->in_use;
count = err = 0;
mtx_assert(&txq->lock, MA_OWNED);
while ((txq->in_use - in_use_init < txmax) &&
(txq->size > txq->in_use + TX_MAX_DESC)) {
check_pkt_coalesce(qs);
count = cxgb_dequeue_packet(txq, m_vec);
if (count == 0)
break;
for (i = 0; i < count; i++)
ETHER_BPF_MTAP(ifp, m_vec[i]);
if ((err = t3_encap(qs, m_vec, count)) != 0)
break;
txq->txq_enqueued += count;
}
if (txq->size <= txq->in_use + TX_MAX_DESC) {
txq_fills++;
setbit(&qs->txq_stopped, TXQ_ETH);
}
if (err == ENOMEM) {
int i;
/*
* Sub-optimal :-/
*/
printf("ENOMEM!!!");
for (i = 0; i < count; i++)
m_freem(m_vec[i]);
}
return (err);
}

View File

@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include <dev/mii/mii.h>
#define CONFIG_CHELSIO_T3_CORE
#include <common/cxgb_version.h>
#include <cxgb_config.h>
#ifndef _CXGB_OSDEP_H_
#define _CXGB_OSDEP_H_
@ -113,9 +113,9 @@ struct t3_mbuf_hdr {
#include "opt_inet.h"
#ifdef INET
#define LRO_SUPPORTED
#endif
#define TOE_SUPPORTED
#endif
#endif
#if __FreeBSD_version < 800054
#if defined (__GNUC__)
@ -165,8 +165,7 @@ struct t3_mbuf_hdr {
#define TX_MAX_DESC 4 /* max descriptors per packet */
#define TX_START_MIN_DESC (TX_MAX_DESC << 2)
#define TX_START_MAX_DESC (TX_MAX_DESC << 3) /* maximum number of descriptors
#define TX_START_MAX_DESC (TX_MAX_DESC << 2) /* maximum number of descriptors
* call to start used per */
#define TX_CLEAN_MAX_DESC (TX_MAX_DESC << 4) /* maximum tx descriptors
@ -177,18 +176,17 @@ struct t3_mbuf_hdr {
#define TX_WR_COUNT_MAX 7 /* the maximum total number of packets that can be
* aggregated into a single TX WR
*/
#if defined(__i386__) || defined(__amd64__)
#if defined(__i386__) || defined(__amd64__)
#define smp_mb() mb()
#define L1_CACHE_BYTES 128
static __inline
void prefetch(void *x)
{
__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
}
}
#define smp_mb() mb()
#define L1_CACHE_BYTES 128
extern void kdb_backtrace(void);
#define WARN_ON(condition) do { \
@ -198,8 +196,7 @@ extern void kdb_backtrace(void);
} \
} while (0)
#else /* !i386 && !amd64 */
#else
#define smp_mb()
#define prefetch(x)
#define L1_CACHE_BYTES 32

File diff suppressed because it is too large Load Diff

View File

@ -1,305 +0,0 @@
/**************************************************************************
Copyright (c) 2007, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/queue.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <cxgb_include.h>
#include <sys/mvec.h>
extern int cxgb_use_16k_clusters;
int cxgb_pcpu_cache_enable = 1;
struct buf_stack {
caddr_t *bs_stack;
volatile int bs_head;
int bs_size;
};
static __inline int
buf_stack_push(struct buf_stack *bs, caddr_t buf)
{
if (bs->bs_head + 1 >= bs->bs_size)
return (ENOSPC);
bs->bs_stack[++(bs->bs_head)] = buf;
return (0);
}
static __inline caddr_t
buf_stack_pop(struct buf_stack *bs)
{
if (bs->bs_head < 0)
return (NULL);
return (bs->bs_stack[(bs->bs_head)--]);
}
/*
* Stack is full
*
*/
static __inline int
buf_stack_avail(struct buf_stack *bs)
{
return (bs->bs_size - bs->bs_head - 1);
}
struct cxgb_cache_pcpu {
struct buf_stack ccp_jumbo_free;
struct buf_stack ccp_cluster_free;
uma_zone_t ccp_jumbo_zone;
};
struct cxgb_cache_system {
struct cxgb_cache_pcpu ccs_array[0];
} *cxgb_caches;
static int
buf_stack_init(struct buf_stack *bs, int size)
{
bs->bs_size = size;
bs->bs_head = -1;
if((bs->bs_stack = malloc(sizeof(caddr_t)*size, M_DEVBUF, M_NOWAIT)) == NULL)
return (ENOMEM);
return (0);
}
static void
buf_stack_deinit(struct buf_stack *bs)
{
if (bs->bs_stack != NULL)
free(bs->bs_stack, M_DEVBUF);
}
static int
cxgb_cache_pcpu_init(struct cxgb_cache_pcpu *ccp)
{
int err;
if ((err = buf_stack_init(&ccp->ccp_jumbo_free, (JUMBO_Q_SIZE >> 2))))
return (err);
if ((err = buf_stack_init(&ccp->ccp_cluster_free, (FL_Q_SIZE >> 2))))
return (err);
#if __FreeBSD_version > 800000
if (cxgb_use_16k_clusters)
ccp->ccp_jumbo_zone = zone_jumbo16;
else
ccp->ccp_jumbo_zone = zone_jumbo9;
#else
ccp->ccp_jumbo_zone = zone_jumbop;
#endif
return (0);
}
static void
cxgb_cache_pcpu_deinit(struct cxgb_cache_pcpu *ccp)
{
void *cl;
while ((cl = buf_stack_pop(&ccp->ccp_jumbo_free)) != NULL)
uma_zfree(ccp->ccp_jumbo_zone, cl);
while ((cl = buf_stack_pop(&ccp->ccp_cluster_free)) != NULL)
uma_zfree(zone_clust, cl);
buf_stack_deinit(&ccp->ccp_jumbo_free);
buf_stack_deinit(&ccp->ccp_cluster_free);
}
static int inited = 0;
int
cxgb_cache_init(void)
{
int i, err;
if (inited++ > 0)
return (0);
if ((cxgb_caches = malloc(sizeof(struct cxgb_cache_pcpu)*mp_ncpus, M_DEVBUF, M_WAITOK|M_ZERO)) == NULL)
return (ENOMEM);
for (i = 0; i < mp_ncpus; i++)
if ((err = cxgb_cache_pcpu_init(&cxgb_caches->ccs_array[i])))
goto err;
return (0);
err:
cxgb_cache_flush();
return (err);
}
void
cxgb_cache_flush(void)
{
int i;
if (--inited > 0)
return;
if (cxgb_caches == NULL)
return;
for (i = 0; i < mp_ncpus; i++)
cxgb_cache_pcpu_deinit(&cxgb_caches->ccs_array[i]);
free(cxgb_caches, M_DEVBUF);
cxgb_caches = NULL;
}
caddr_t
cxgb_cache_get(uma_zone_t zone)
{
caddr_t cl = NULL;
struct cxgb_cache_pcpu *ccp;
if (cxgb_pcpu_cache_enable) {
critical_enter();
ccp = &cxgb_caches->ccs_array[curcpu];
if (zone == zone_clust) {
cl = buf_stack_pop(&ccp->ccp_cluster_free);
} else if (zone == ccp->ccp_jumbo_zone) {
cl = buf_stack_pop(&ccp->ccp_jumbo_free);
}
critical_exit();
}
if (cl == NULL)
cl = uma_zalloc(zone, M_NOWAIT);
else
cxgb_cached_allocations++;
return (cl);
}
void
cxgb_cache_put(uma_zone_t zone, void *cl)
{
struct cxgb_cache_pcpu *ccp;
int err = ENOSPC;
if (cxgb_pcpu_cache_enable) {
critical_enter();
ccp = &cxgb_caches->ccs_array[curcpu];
if (zone == zone_clust) {
err = buf_stack_push(&ccp->ccp_cluster_free, cl);
} else if (zone == ccp->ccp_jumbo_zone){
err = buf_stack_push(&ccp->ccp_jumbo_free, cl);
}
critical_exit();
}
if (err)
uma_zfree(zone, cl);
else
cxgb_cached++;
}
void
cxgb_cache_refill(void)
{
struct cxgb_cache_pcpu *ccp;
caddr_t vec[8];
uma_zone_t zone;
int i, count;
return;
restart:
critical_enter();
ccp = &cxgb_caches->ccs_array[curcpu];
zone = ccp->ccp_jumbo_zone;
if (!buf_stack_avail(&ccp->ccp_jumbo_free) &&
!buf_stack_avail(&ccp->ccp_cluster_free)) {
critical_exit();
return;
}
critical_exit();
for (i = 0; i < 8; i++)
if ((vec[i] = uma_zalloc(zone, M_NOWAIT)) == NULL)
goto free;
critical_enter();
ccp = &cxgb_caches->ccs_array[curcpu];
for (i = 0; i < 8 && buf_stack_avail(&ccp->ccp_jumbo_free); i++)
if (buf_stack_push(&ccp->ccp_jumbo_free, vec[i]))
break;
critical_exit();
for (; i < 8; i++)
uma_zfree(zone, vec[i]);
zone = zone_clust;
for (i = 0; i < 8; i++)
if ((vec[i] = uma_zalloc(zone, M_NOWAIT)) == NULL)
goto free;
critical_enter();
ccp = &cxgb_caches->ccs_array[curcpu];
for (i = 0; i < 8 && buf_stack_avail(&ccp->ccp_cluster_free); i++)
if (buf_stack_push(&ccp->ccp_cluster_free, vec[i]))
break;
critical_exit();
for (; i < 8; i++)
uma_zfree(zone, vec[i]);
goto restart;
free:
count = i;
for (; i < count; i++)
uma_zfree(zone, vec[i]);
}

View File

@ -1,6 +1,6 @@
/**************************************************************************
*
* Copyright (c) 2007, Kip Macy kmacy@freebsd.org
* Copyright (c) 2007,2009 Kip Macy kmacy@freebsd.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -33,156 +33,35 @@
#define _MVEC_H_
#include <machine/bus.h>
int cxgb_cache_init(void);
void cxgb_cache_flush(void);
caddr_t cxgb_cache_get(uma_zone_t zone);
void cxgb_cache_put(uma_zone_t zone, void *cl);
void cxgb_cache_refill(void);
extern int cxgb_cached_allocations;
extern int cxgb_cached;
extern int cxgb_ext_freed;
extern int cxgb_mbufs_outstanding;
extern int cxgb_pack_outstanding;
#define mtomv(m) ((struct mbuf_vec *)((m)->m_pktdat))
#define M_IOVEC 0x100000 /* mbuf immediate data area is used for cluster ptrs */
#define M_DDP 0x200000 /* direct data placement mbuf */
#define EXT_PHYS 10 /* physical/bus address */
/*
* duplication from mbuf.h - can't use directly because
* m_ext is a define
*/
struct m_ext_ {
caddr_t ext_buf; /* start of buffer */
void (*ext_free) /* free routine if not the usual */
(void *, void *);
#if __FreeBSD_version >= 800016
void *ext_arg1; /* optional argument pointer */
void *ext_arg2; /* optional argument pointer */
#else
void *ext_args; /* optional argument pointer */
#endif
u_int ext_size; /* size of buffer, for ext_free */
volatile u_int *ref_cnt; /* pointer to ref count info */
int ext_type; /* type of external storage */
};
#define MT_IOVEC 9
#define MT_CLIOVEC 10
#define EXT_IOVEC 8
#define EXT_CLIOVEC 9
#define EXT_JMPIOVEC 10
#define m_cur_offset m_ext.ext_size /* override to provide ddp offset */
#define m_seq m_pkthdr.csum_data /* stored sequence */
#define m_ddp_gl m_ext.ext_buf /* ddp list */
#define m_ddp_flags m_pkthdr.csum_flags /* ddp flags */
#define m_ulp_mode m_pkthdr.tso_segsz /* upper level protocol */
extern uma_zone_t zone_miovec;
struct mbuf_iovec {
struct m_ext_ mi_ext;
uint32_t mi_flags;
uint32_t mi_len;
caddr_t mi_data;
uint16_t mi_tso_segsz;
uint16_t mi_ether_vtag;
uint16_t mi_rss_hash; /* this can be shrunk down if something comes
* along that needs 1 byte
*/
uint16_t mi_pad;
struct mbuf *mi_mbuf; /* need to be able to handle the @#$@@#%$ing packet zone */
#define mi_size mi_ext.ext_size
#define mi_base mi_ext.ext_buf
#define mi_args mi_ext.ext_args
#define mi_size mi_ext.ext_size
#define mi_size mi_ext.ext_size
#define mi_refcnt mi_ext.ref_cnt
#define mi_ext_free mi_ext.ext_free
#define mi_ext_flags mi_ext.ext_flags
#define mi_type mi_ext.ext_type
};
#define MIOVBYTES 512
#define MAX_MBUF_IOV ((MHLEN-8)/sizeof(struct mbuf_iovec))
#define MAX_MIOVEC_IOV ((MIOVBYTES-sizeof(struct m_hdr)-sizeof(struct pkthdr)-8)/sizeof(struct mbuf_iovec))
#define MAX_CL_IOV ((MCLBYTES-sizeof(struct m_hdr)-sizeof(struct pkthdr)-8)/sizeof(struct mbuf_iovec))
#define MAX_PAGE_IOV ((MJUMPAGESIZE-sizeof(struct m_hdr)-sizeof(struct pkthdr)-8)/sizeof(struct mbuf_iovec))
struct mbuf_vec {
uint16_t mv_first; /* first valid cluster */
uint16_t mv_count; /* # of clusters */
uint32_t mv_flags; /* flags for iovec */
struct mbuf_iovec mv_vec[0]; /* depends on whether or not this is in a cluster or an mbuf */
};
void mi_init(void);
void mi_deinit(void);
int _m_explode(struct mbuf *);
void mb_free_vec(struct mbuf *m);
static __inline void
m_iovinit(struct mbuf *m)
{
struct mbuf_vec *mv = mtomv(m);
mv->mv_first = mv->mv_count = 0;
m->m_pkthdr.len = m->m_len = 0;
m->m_flags |= M_IOVEC;
}
static __inline void
m_iovappend(struct mbuf *m, uint8_t *cl, int size, int len, caddr_t data, volatile uint32_t *ref)
{
struct mbuf_vec *mv = mtomv(m);
struct mbuf_iovec *iov;
int idx = mv->mv_first + mv->mv_count;
KASSERT(idx <= MAX_MBUF_IOV, ("tried to append too many clusters to mbuf iovec"));
if ((m->m_flags & M_EXT) != 0)
panic("invalid flags in %s", __func__);
if (mv->mv_count == 0)
m->m_data = data;
iov = &mv->mv_vec[idx];
iov->mi_type = m_gettype(size);
iov->mi_base = cl;
iov->mi_len = len;
iov->mi_data = data;
iov->mi_refcnt = ref;
m->m_pkthdr.len += len;
m->m_len += len;
mv->mv_count++;
}
static __inline int
m_explode(struct mbuf *m)
{
if ((m->m_flags & M_IOVEC) == 0)
return (0);
return _m_explode(m);
}
static __inline void
busdma_map_mbuf_fast(struct mbuf *m, bus_dma_segment_t *seg)
busdma_map_mbuf_fast(struct sge_txq *txq, struct tx_sw_desc *txsd,
struct mbuf *m, bus_dma_segment_t *seg)
{
#if defined(__i386__) || defined(__amd64__)
seg->ds_addr = pmap_kextract(mtod(m, vm_offset_t));
seg->ds_len = m->m_len;
#else
int nsegstmp;
bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m, seg,
&nsegstmp, 0);
#endif
}
int busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs);
int busdma_map_sg_vec(struct mbuf **m, struct mbuf **mp, bus_dma_segment_t *segs, int count);
static __inline int busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t *segs, int count)
int busdma_map_sg_collapse(struct sge_txq *txq, struct tx_sw_desc *txsd,
struct mbuf **m, bus_dma_segment_t *segs, int *nsegs);
void busdma_map_sg_vec(struct sge_txq *txq, struct tx_sw_desc *txsd, struct mbuf *m, bus_dma_segment_t *segs, int *nsegs);
static __inline int
busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t *segs, int count)
{
while (count--) {
segs->ds_addr = pmap_kextract((vm_offset_t)vsegs->ds_addr);
@ -193,156 +72,19 @@ static __inline int busdma_map_sgl(bus_dma_segment_t *vsegs, bus_dma_segment_t *
return (0);
}
struct mbuf *mi_collapse_mbuf(struct mbuf_iovec *mi, struct mbuf *m);
void *mcl_alloc(int seg_count, int *type);
void mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx);
static __inline void
mi_collapse_sge(struct mbuf_iovec *mi, bus_dma_segment_t *seg)
m_freem_list(struct mbuf *m)
{
mi->mi_flags = 0;
mi->mi_base = (caddr_t)seg->ds_addr;
mi->mi_len = seg->ds_len;
mi->mi_size = 0;
mi->mi_type = EXT_PHYS;
mi->mi_refcnt = NULL;
struct mbuf *n;
while (m != NULL) {
n = m->m_nextpkt;
if (n != NULL)
prefetch(n);
m_freem(m);
m = n;
}
}
static __inline void
m_free_iovec(struct mbuf *m, int type)
{
int i;
struct mbuf_vec *mv;
struct mbuf_iovec *mi;
mv = mtomv(m);
mi = mv->mv_vec;
for (i = 0; i < mv->mv_count; i++, mi++) {
DPRINTF("freeing buf=%d of %d\n", i, mv->mv_count);
mb_free_ext_fast(mi, mi->mi_type, i);
}
switch (type) {
case EXT_IOVEC:
uma_zfree(zone_miovec, m);
break;
case EXT_CLIOVEC:
cxgb_cache_put(zone_clust, m);
break;
case EXT_JMPIOVEC:
cxgb_cache_put(zone_jumbop, m);
break;
default:
panic("unexpected type %d\n", type);
}
}
static __inline void
m_freem_iovec(struct mbuf_iovec *mi)
{
struct mbuf *m = (struct mbuf *)mi->mi_base;
switch (mi->mi_type) {
case EXT_MBUF:
#ifdef PIO_LEN
KASSERT(m->m_pkthdr.len > PIO_LEN, ("freeing PIO buf"));
#endif
KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf"));
KASSERT(m->m_next == NULL, ("freeing chain"));
cxgb_mbufs_outstanding--;
m_free_fast(m);
break;
case EXT_PACKET:
cxgb_pack_outstanding--;
m_free(mi->mi_mbuf);
break;
case EXT_IOVEC:
case EXT_CLIOVEC:
case EXT_JMPIOVEC:
m = (struct mbuf *)mi->mi_base;
m_free_iovec(m, mi->mi_type);
break;
case EXT_CLUSTER:
case EXT_JUMBOP:
case EXT_JUMBO9:
case EXT_JUMBO16:
case EXT_SFBUF:
case EXT_NET_DRV:
case EXT_MOD_TYPE:
case EXT_DISPOSABLE:
case EXT_EXTREF:
mb_free_ext_fast(mi, mi->mi_type, -1);
break;
default:
panic("unknown miov type: %d\n", mi->mi_type);
break;
}
}
static __inline uma_zone_t
m_getzonefromtype(int type)
{
uma_zone_t zone;
switch (type) {
case EXT_MBUF:
zone = zone_mbuf;
break;
case EXT_CLUSTER:
zone = zone_clust;
break;
#if MJUMPAGESIZE != MCLBYTES
case EXT_JUMBOP:
zone = zone_jumbop;
break;
#endif
case EXT_JUMBO9:
zone = zone_jumbo9;
break;
case EXT_JUMBO16:
zone = zone_jumbo16;
break;
#ifdef PACKET_ZONE
case EXT_PACKET:
zone = zone_pack;
break;
#endif
default:
panic("%s: invalid cluster type %d", __func__, type);
}
return (zone);
}
static __inline int
m_getsizefromtype(int type)
{
int size;
switch (type) {
case EXT_MBUF:
size = MSIZE;
break;
case EXT_CLUSTER:
case EXT_PACKET:
size = MCLBYTES;
break;
#if MJUMPAGESIZE != MCLBYTES
case EXT_JUMBOP:
size = MJUMPAGESIZE;
break;
#endif
case EXT_JUMBO9:
size = MJUM9BYTES;
break;
case EXT_JUMBO16:
size = MJUM16BYTES;
break;
default:
panic("%s: unrecognized cluster type %d", __func__, type);
}
return (size);
}
void dump_mi(struct mbuf_iovec *mi);
#endif /* _MVEC_H_ */

View File

@ -48,8 +48,6 @@ __FBSDID("$FreeBSD$");
#include <cxgb_include.h>
#include <sys/mvec.h>
#include "opt_zero.h"
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/pmap.h>
@ -60,180 +58,47 @@ __FBSDID("$FreeBSD$");
#define M_SANITY(a, b)
#endif
#define MAX_BUFS 36
#define MAX_HVEC 8
extern uint32_t collapse_free;
extern uint32_t mb_free_vec_free;
uma_zone_t zone_miovec;
static int mi_inited = 0;
int cxgb_mbufs_outstanding = 0;
int cxgb_pack_outstanding = 0;
void
mi_init(void)
{
if (mi_inited > 0)
return;
else
mi_inited++;
zone_miovec = uma_zcreate("MBUF IOVEC", MIOVBYTES,
NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, UMA_ZONE_MAXBUCKET);
}
void
mi_deinit(void)
{
mi_inited--;
if (mi_inited == 0)
uma_zdestroy(zone_miovec);
}
void
dump_mi(struct mbuf_iovec *mi)
{
int i;
struct mbuf_vec *mv;
printf("mi_flags=0x%08x mi_base=%p mi_data=%p mi_len=%d mi_type=%d\n",
mi->mi_flags, mi->mi_base, mi->mi_data, mi->mi_len, mi->mi_type);
if (mi->mi_type == EXT_CLIOVEC ||
mi->mi_type == EXT_IOVEC) {
mv = mtomv((struct mbuf *)mi->mi_base);
mi = mv->mv_vec;
for (i = 0; i < mv->mv_count; i++, mi++)
dump_mi(mi);
}
}
static __inline struct mbuf *
_mcl_collapse_mbuf(struct mbuf_iovec *mi, struct mbuf *m)
{
struct mbuf *n = m->m_next;
prefetch(n);
mi->mi_flags = m->m_flags;
mi->mi_len = m->m_len;
mi->mi_mbuf = NULL;
if (m->m_flags & M_PKTHDR) {
mi->mi_ether_vtag = m->m_pkthdr.ether_vtag;
mi->mi_tso_segsz = m->m_pkthdr.tso_segsz;
#ifdef IFNET_MULTIQUEUE
mi->mi_rss_hash = m->m_pkthdr.flowid;
#endif
if(!SLIST_EMPTY(&m->m_pkthdr.tags))
m_tag_delete_chain(m, NULL);
}
if (m->m_type != MT_DATA) {
mi->mi_data = NULL;
mi->mi_base = (caddr_t)m;
/*
* XXX JMPIOVEC
*/
mi->mi_size = (m->m_type == EXT_CLIOVEC) ? MCLBYTES : MIOVBYTES;
mi->mi_type = m->m_type;
mi->mi_len = m->m_pkthdr.len;
KASSERT(mi->mi_len, ("empty packet"));
mi->mi_refcnt = NULL;
} else if (m->m_flags & M_EXT) {
memcpy(&mi->mi_ext, &m->m_ext, sizeof(struct m_ext_));
mi->mi_data = m->m_data;
mi->mi_base = m->m_ext.ext_buf;
mi->mi_type = m->m_ext.ext_type;
mi->mi_size = m->m_ext.ext_size;
mi->mi_refcnt = m->m_ext.ref_cnt;
if (m->m_ext.ext_type == EXT_PACKET) {
mi->mi_mbuf = m;
#ifdef INVARIANTS
cxgb_pack_outstanding++;
#endif
}
} else {
mi->mi_base = (caddr_t)m;
mi->mi_data = m->m_data;
mi->mi_size = MSIZE;
mi->mi_type = EXT_MBUF;
mi->mi_refcnt = NULL;
#ifdef INVARIANTS
cxgb_mbufs_outstanding++;
#endif
}
KASSERT(mi->mi_len != 0, ("miov has len 0"));
KASSERT(mi->mi_type > 0, ("mi_type is invalid"));
KASSERT(mi->mi_base, ("mi_base is invalid"));
return (n);
}
struct mbuf *
mi_collapse_mbuf(struct mbuf_iovec *mi, struct mbuf *m)
{
return _mcl_collapse_mbuf(mi, m);
}
void *
mcl_alloc(int seg_count, int *type)
{
uma_zone_t zone;
if (seg_count > MAX_CL_IOV) {
zone = zone_jumbop;
*type = EXT_JMPIOVEC;
} else if (seg_count > MAX_MIOVEC_IOV) {
zone = zone_clust;
*type = EXT_CLIOVEC;
} else {
*type = EXT_IOVEC;
zone = zone_miovec;
}
return uma_zalloc_arg(zone, NULL, M_NOWAIT);
}
int
busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs)
busdma_map_sg_collapse(struct sge_txq *txq, struct tx_sw_desc *txsd,
struct mbuf **m, bus_dma_segment_t *segs, int *nsegs)
{
struct mbuf *m0, *mhead, *n = *m;
struct mbuf_iovec *mi;
struct mbuf *marray[TX_MAX_SEGS];
int i, type, seg_count, defragged = 0, err = 0;
struct mbuf_vec *mv;
int skipped, freed;
struct mbuf *n = *m;
int seg_count, defragged = 0, err = 0;
bus_dma_segment_t *psegs;
KASSERT(n->m_pkthdr.len, ("packet has zero header len"));
if (n->m_pkthdr.len <= PIO_LEN)
return (0);
retry:
psegs = segs;
seg_count = 0;
if (n->m_next == NULL) {
busdma_map_mbuf_fast(n, segs);
busdma_map_mbuf_fast(txq, txsd, n, segs);
*nsegs = 1;
return (0);
}
skipped = freed = 0;
#if defined(__i386__) || defined(__amd64__)
while (n && seg_count < TX_MAX_SEGS) {
marray[seg_count] = n;
/*
* firmware doesn't like empty segments
*/
if (__predict_true(n->m_len != 0))
if (__predict_true(n->m_len != 0)) {
seg_count++;
else
skipped++;
busdma_map_mbuf_fast(txq, txsd, n, psegs);
psegs++;
}
n = n->m_next;
}
#else
err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m, segs,
&seg_count, 0);
#endif
if (seg_count == 0) {
if (cxgb_debug)
printf("empty segment chain\n");
err = EFBIG;
goto err_out;
} else if (seg_count >= TX_MAX_SEGS) {
} else if (err == EFBIG || seg_count >= TX_MAX_SEGS) {
if (cxgb_debug)
printf("mbuf chain too long: %d max allowed %d\n",
seg_count, TX_MAX_SEGS);
@ -251,171 +116,17 @@ busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs)
goto err_out;
}
if ((m0 = mcl_alloc(seg_count, &type)) == NULL) {
err = ENOMEM;
goto err_out;
}
memcpy(m0, *m, sizeof(struct m_hdr) + sizeof(struct pkthdr));
m0->m_type = type;
KASSERT(m0->m_pkthdr.len, ("empty packet being marshalled"));
mv = mtomv(m0);
mv->mv_count = seg_count;
mv->mv_first = 0;
for (i = 0, mi = mv->mv_vec; i < seg_count; mi++, segs++, i++) {
n = marray[i];
busdma_map_mbuf_fast(n, segs);
_mcl_collapse_mbuf(mi, n);
}
n = *m;
while (n) {
if (n->m_len == 0)
/* do nothing - free if mbuf or cluster */;
else if ((n->m_flags & M_EXT) == 0) {
goto skip;
} else if ((n->m_flags & M_EXT) &&
(n->m_ext.ext_type == EXT_PACKET)) {
goto skip;
} else if (n->m_flags & M_NOFREE)
goto skip;
else if ((n->m_flags & (M_EXT|M_NOFREE)) == M_EXT)
n->m_flags &= ~M_EXT;
mhead = n->m_next;
m_free(n);
n = mhead;
freed++;
continue;
skip:
/*
* is an immediate mbuf or is from the packet zone
*/
n = n->m_next;
}
*nsegs = seg_count;
*m = m0;
DPRINTF("pktlen=%d m0=%p *m=%p m=%p\n", m0->m_pkthdr.len, m0, *m, m);
return (0);
err_out:
m_freem(*m);
*m = NULL;
err_out:
return (err);
}
int
busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret,
bus_dma_segment_t *segs, int pkt_count)
{
struct mbuf *m0, **mp;
struct mbuf_iovec *mi;
struct mbuf_vec *mv;
int i, type;
if ((m0 = mcl_alloc(pkt_count, &type)) == NULL)
return (ENOMEM);
memcpy(m0, *m, sizeof(struct m_hdr) +
sizeof(struct pkthdr));
m0->m_type = type;
mv = mtomv(m0);
mv->mv_count = pkt_count;
mv->mv_first = 0;
for (mp = m, i = 0, mi = mv->mv_vec; i < pkt_count;
mp++, segs++, mi++, i++) {
busdma_map_mbuf_fast(*mp, segs);
_mcl_collapse_mbuf(mi, *mp);
KASSERT(mi->mi_len, ("empty packet"));
}
for (mp = m, i = 0; i < pkt_count; i++, mp++) {
if ((((*mp)->m_flags & (M_EXT|M_NOFREE)) == M_EXT)
&& ((*mp)->m_ext.ext_type != EXT_PACKET)) {
(*mp)->m_flags &= ~M_EXT;
m_free(*mp);
}
}
*mret = m0;
return (0);
}
void
mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx)
busdma_map_sg_vec(struct sge_txq *txq, struct tx_sw_desc *txsd,
struct mbuf *m, bus_dma_segment_t *segs, int *nsegs)
{
int dofree;
caddr_t cl;
cl = mi->mi_base;
switch (type) {
case EXT_PACKET:
#ifdef INVARIANTS
cxgb_pack_outstanding--;
#endif
m_free(mi->mi_mbuf);
return;
case EXT_MBUF:
KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf"));
#ifdef INVARIANTS
cxgb_mbufs_outstanding--;
#endif
m_free_fast((struct mbuf *)cl);
return;
default:
break;
}
/* Account for lazy ref count assign. */
dofree = (mi->mi_refcnt == NULL);
if (dofree == 0) {
if (*(mi->mi_refcnt) == 1 ||
atomic_fetchadd_int(mi->mi_refcnt, -1) == 1)
dofree = 1;
}
if (dofree == 0)
return;
switch (type) {
case EXT_CLUSTER:
cxgb_cache_put(zone_clust, cl);
break;
case EXT_JUMBOP:
cxgb_cache_put(zone_jumbop, cl);
break;
case EXT_JUMBO9:
cxgb_cache_put(zone_jumbo9, cl);
break;
case EXT_JUMBO16:
cxgb_cache_put(zone_jumbo16, cl);
break;
case EXT_SFBUF:
case EXT_NET_DRV:
case EXT_MOD_TYPE:
case EXT_DISPOSABLE:
*(mi->mi_refcnt) = 0;
uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
mi->mi_ext.ref_cnt));
/* FALLTHROUGH */
case EXT_EXTREF:
KASSERT(mi->mi_ext.ext_free != NULL,
("%s: ext_free not set", __func__));
#if __FreeBSD_version >= 800016
(*(mi->mi_ext.ext_free))(mi->mi_ext.ext_arg1,
mi->mi_ext.ext_arg2);
#else
(*(mi->mi_ext.ext_free))(mi->mi_ext.ext_buf,
mi->mi_ext.ext_args);
#endif
break;
default:
dump_mi(mi);
panic("unknown mv type in m_free_vec type=%d idx=%d", type, idx);
break;
}
for (*nsegs = 0; m != NULL ; segs++, *nsegs += 1, m = m->m_nextpkt)
busdma_map_mbuf_fast(txq, txsd, m, segs);
}
int
_m_explode(struct mbuf *m)
{
panic("IMPLEMENT ME!!!");
}

View File

@ -9,7 +9,7 @@ SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c
SRCS+= cxgb_sge.c cxgb_offload.c cxgb_tn1010.c
SRCS+= device_if.h bus_if.h pci_if.h
SRCS+= opt_inet.h opt_zero.h opt_sched.h
SRCS+= uipc_mvec.c cxgb_support.c cxgb_multiq.c
SRCS+= uipc_mvec.c
CFLAGS+= -g -DDEFAULT_JUMBO -I${CXGB}
CFLAGS+= -DDISABLE_MBUF_IOVEC