- bump __FreeBSD version to reflect added buf_ring, memory barriers,

and ifnet functions

- add memory barriers to <machine/atomic.h>
- update drivers to only conditionally define their own

- add lockless producer / consumer ring buffer
- remove ring buffer implementation from cxgb and update its callers

- add if_transmit(struct ifnet *ifp, struct mbuf *m) to ifnet to
  allow drivers to efficiently manage multiple hardware queues
  (i.e. not serialize all packets through one ifq)
- expose if_qflush to allow drivers to flush any driver managed queues

This work was supported by Bitgravity Inc. and Chelsio Inc.
This commit is contained in:
Kip Macy 2008-11-22 05:55:56 +00:00
parent 2a1b9f07fc
commit db7f0b974f
24 changed files with 472 additions and 256 deletions

View File

@ -22,6 +22,14 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW:
to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
20081121:
__FreeBSD_version 800054 adds memory barriers to
<machine/atomic.h>, new interfaces to ifnet to facilitate
multiple hardware transmit queues for cards that support
them, and a lock-less ring-buffer implementation to
enable drivers to more efficiently manage queueing of
packets.
20081117:
A new version of ZFS (version 13) has been merged to -HEAD.
This version has zpool attribute "listsnapshots" off by

View File

@ -32,6 +32,10 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
#define mb() __asm__ __volatile__ ("mfence;": : :"memory")
#define wmb() __asm__ __volatile__ ("sfence;": : :"memory")
#define rmb() __asm__ __volatile__ ("lfence;": : :"memory")
/*
* Various simple operations on memory, each of which is atomic in the
* presence of interrupts and multiple processors.

View File

@ -47,6 +47,10 @@
#include <machine/sysarch.h>
#endif
#define mb()
#define wmb()
#define rmb()
#ifndef I32_bit
#define I32_bit (1 << 7) /* IRQ disable */
#endif

View File

@ -1807,6 +1807,7 @@ kern/subr_acl_posix1e.c standard
kern/subr_autoconf.c standard
kern/subr_blist.c standard
kern/subr_bus.c standard
kern/subr_bufring.c standard
kern/subr_clist.c standard
kern/subr_clock.c standard
kern/subr_devstat.c standard

View File

@ -557,6 +557,8 @@
#endif /* BCE_DEBUG */
#if __FreeBSD_version < 800054
#if defined(__i386__) || defined(__amd64__)
#define mb() __asm volatile("mfence" ::: "memory")
#define wmb() __asm volatile("sfence" ::: "memory")
@ -566,6 +568,7 @@
#define rmb()
#define wmb()
#endif
#endif
/****************************************************************************/
/* Device identification definitions. */

View File

@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/condvar.h>
#include <sys/buf_ring.h>
#include <net/ethernet.h>
#include <net/if.h>
@ -258,7 +259,7 @@ struct sge_txq {
* mbuf touches
*/
struct mbuf_head cleanq;
struct buf_ring txq_mr;
struct buf_ring *txq_mr;
struct mbuf *immpkt;
uint32_t txq_drops;
uint32_t txq_skipped;

View File

@ -129,7 +129,7 @@ cxgb_pcpu_enqueue_packet_(struct sge_qset *qs, struct mbuf *m)
return (ENXIO);
}
txq = &qs->txq[TXQ_ETH];
err = buf_ring_enqueue(&txq->txq_mr, m);
err = buf_ring_enqueue(txq->txq_mr, m);
if (err) {
txq->txq_drops++;
m_freem(m);
@ -194,14 +194,11 @@ cxgb_dequeue_packet(struct sge_txq *txq, struct mbuf **m_vec)
}
sc = qs->port->adapter;
m = buf_ring_dequeue(&txq->txq_mr);
m = buf_ring_dequeue_sc(txq->txq_mr);
if (m == NULL)
return (0);
count = 1;
KASSERT(m->m_type == MT_DATA,
("m=%p is bad mbuf type %d from ring cons=%d prod=%d", m,
m->m_type, txq->txq_mr.br_cons, txq->txq_mr.br_prod));
m_vec[0] = m;
if (m->m_pkthdr.tso_segsz > 0 || m->m_pkthdr.len > TX_WR_SIZE_MAX ||
m->m_next != NULL || (cxgb_pcpu_tx_coalesce == 0)) {
@ -209,14 +206,14 @@ cxgb_dequeue_packet(struct sge_txq *txq, struct mbuf **m_vec)
}
size = m->m_pkthdr.len;
for (m = buf_ring_peek(&txq->txq_mr); m != NULL;
m = buf_ring_peek(&txq->txq_mr)) {
for (m = buf_ring_peek(txq->txq_mr); m != NULL;
m = buf_ring_peek(txq->txq_mr)) {
if (m->m_pkthdr.tso_segsz > 0 ||
size + m->m_pkthdr.len > TX_WR_SIZE_MAX || m->m_next != NULL)
break;
buf_ring_dequeue(&txq->txq_mr);
buf_ring_dequeue_sc(txq->txq_mr);
size += m->m_pkthdr.len;
m_vec[count++] = m;
@ -367,7 +364,7 @@ cxgb_pcpu_free(struct sge_qset *qs)
mtx_lock(&txq->lock);
while ((m = mbufq_dequeue(&txq->sendq)) != NULL)
m_freem(m);
while ((m = buf_ring_dequeue(&txq->txq_mr)) != NULL)
while ((m = buf_ring_dequeue_sc(txq->txq_mr)) != NULL)
m_freem(m);
t3_free_tx_desc_all(txq);
@ -429,7 +426,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
initerr = ENXIO;
else if (immpkt) {
if (!buf_ring_empty(&txq->txq_mr))
if (!buf_ring_empty(txq->txq_mr))
initerr = cxgb_pcpu_enqueue_packet_(qs, immpkt);
else
txq->immpkt = immpkt;
@ -460,7 +457,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
}
stopped = isset(&qs->txq_stopped, TXQ_ETH);
flush = (((!buf_ring_empty(&txq->txq_mr) || (!IFQ_DRV_IS_EMPTY(&pi->ifp->if_snd))) && !stopped) || txq->immpkt);
flush = (((!buf_ring_empty(txq->txq_mr) || (!IFQ_DRV_IS_EMPTY(&pi->ifp->if_snd))) && !stopped) || txq->immpkt);
max_desc = tx_flush ? TX_ETH_Q_SIZE : TX_START_MAX_DESC;
if (cxgb_debug)
@ -471,7 +468,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
if ((tx_flush && flush && err == 0) &&
(!buf_ring_empty(&txq->txq_mr) ||
(!buf_ring_empty(txq->txq_mr) ||
!IFQ_DRV_IS_EMPTY(&pi->ifp->if_snd))) {
struct thread *td = curthread;
@ -521,7 +518,7 @@ cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt)
txq = &qs->txq[TXQ_ETH];
if (((sc->tunq_coalesce == 0) ||
(buf_ring_count(&txq->txq_mr) >= TX_WR_COUNT_MAX) ||
(buf_ring_count(txq->txq_mr) >= TX_WR_COUNT_MAX) ||
(cxgb_pcpu_tx_coalesce == 0)) && mtx_trylock(&txq->lock)) {
if (cxgb_debug)
printf("doing immediate transmit\n");
@ -529,12 +526,12 @@ cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt)
txq->flags |= TXQ_TRANSMITTING;
err = cxgb_pcpu_start_(qs, immpkt, FALSE);
txq->flags &= ~TXQ_TRANSMITTING;
resid = (buf_ring_count(&txq->txq_mr) > 64) || (desc_reclaimable(txq) > 64);
resid = (buf_ring_count(txq->txq_mr) > 64) || (desc_reclaimable(txq) > 64);
mtx_unlock(&txq->lock);
} else if (immpkt) {
if (cxgb_debug)
printf("deferred coalesce=%jx ring_count=%d mtx_owned=%d\n",
sc->tunq_coalesce, buf_ring_count(&txq->txq_mr), mtx_owned(&txq->lock));
sc->tunq_coalesce, buf_ring_count(txq->txq_mr), mtx_owned(&txq->lock));
err = cxgb_pcpu_enqueue_packet_(qs, immpkt);
}
@ -586,7 +583,7 @@ cxgb_pcpu_start_proc(void *arg)
if ((qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
idleticks = hz;
if (!buf_ring_empty(&txq->txq_mr) ||
if (!buf_ring_empty(txq->txq_mr) ||
!mbufq_empty(&txq->sendq))
cxgb_pcpu_free(qs);
goto done;
@ -611,11 +608,13 @@ cxgb_pcpu_start_proc(void *arg)
mtx_unlock(&qs->rspq.lock);
}
#endif
if ((!buf_ring_empty(&txq->txq_mr)) && err == 0) {
if ((!buf_ring_empty(txq->txq_mr)) && err == 0) {
#if 0
if (cxgb_debug)
printf("head=%p cons=%d prod=%d\n",
txq->sendq.head, txq->txq_mr.br_cons,
txq->txq_mr.br_prod);
#endif
continue;
}
done:

View File

@ -156,9 +156,6 @@ struct t3_mbuf_hdr {
#if defined(__i386__) || defined(__amd64__)
#define mb() __asm volatile("mfence":::"memory")
#define rmb() __asm volatile("lfence":::"memory")
#define wmb() __asm volatile("sfence" ::: "memory")
#define smp_mb() mb()
#define L1_CACHE_BYTES 128
@ -179,163 +176,11 @@ extern void kdb_backtrace(void);
#else /* !i386 && !amd64 */
#define mb()
#define rmb()
#define wmb()
#define smp_mb()
#define prefetch(x)
#define L1_CACHE_BYTES 32
#endif
struct buf_ring {
caddr_t *br_ring;
volatile uint32_t br_cons;
volatile uint32_t br_prod;
int br_size;
struct mtx br_lock;
};
struct buf_ring *buf_ring_alloc(int count, int flags);
void buf_ring_free(struct buf_ring *);
static __inline int
buf_ring_count(struct buf_ring *mr)
{
int size = mr->br_size;
uint32_t mask = size - 1;
return ((size + mr->br_prod - mr->br_cons) & mask);
}
static __inline int
buf_ring_empty(struct buf_ring *mr)
{
return (mr->br_cons == mr->br_prod);
}
static __inline int
buf_ring_full(struct buf_ring *mr)
{
uint32_t mask;
mask = mr->br_size - 1;
return (mr->br_cons == ((mr->br_prod + 1) & mask));
}
/*
* The producer and consumer are independently locked
* this relies on the consumer providing his own serialization
*
*/
static __inline void *
buf_ring_dequeue(struct buf_ring *mr)
{
uint32_t prod, cons, mask;
caddr_t *ring, m;
ring = (caddr_t *)mr->br_ring;
mask = mr->br_size - 1;
cons = mr->br_cons;
mb();
prod = mr->br_prod;
m = NULL;
if (cons != prod) {
m = ring[cons];
ring[cons] = NULL;
mr->br_cons = (cons + 1) & mask;
mb();
}
return (m);
}
#ifdef DEBUG_BUFRING
static __inline void
__buf_ring_scan(struct buf_ring *mr, void *m, char *file, int line)
{
int i;
for (i = 0; i < mr->br_size; i++)
if (m == mr->br_ring[i])
panic("%s:%d m=%p present prod=%d cons=%d idx=%d", file,
line, m, mr->br_prod, mr->br_cons, i);
}
static __inline void
buf_ring_scan(struct buf_ring *mr, void *m, char *file, int line)
{
mtx_lock(&mr->br_lock);
__buf_ring_scan(mr, m, file, line);
mtx_unlock(&mr->br_lock);
}
#else
static __inline void
__buf_ring_scan(struct buf_ring *mr, void *m, char *file, int line)
{
}
static __inline void
buf_ring_scan(struct buf_ring *mr, void *m, char *file, int line)
{
}
#endif
static __inline int
__buf_ring_enqueue(struct buf_ring *mr, void *m, char *file, int line)
{
uint32_t prod, cons, mask;
int err;
mask = mr->br_size - 1;
prod = mr->br_prod;
mb();
cons = mr->br_cons;
__buf_ring_scan(mr, m, file, line);
if (((prod + 1) & mask) != cons) {
KASSERT(mr->br_ring[prod] == NULL, ("overwriting entry"));
mr->br_ring[prod] = m;
mb();
mr->br_prod = (prod + 1) & mask;
err = 0;
} else
err = ENOBUFS;
return (err);
}
static __inline int
buf_ring_enqueue_(struct buf_ring *mr, void *m, char *file, int line)
{
int err;
mtx_lock(&mr->br_lock);
err = __buf_ring_enqueue(mr, m, file, line);
mtx_unlock(&mr->br_lock);
return (err);
}
#define buf_ring_enqueue(mr, m) buf_ring_enqueue_((mr), (m), __FILE__, __LINE__)
static __inline void *
buf_ring_peek(struct buf_ring *mr)
{
int prod, cons, mask;
caddr_t *ring, m;
ring = (caddr_t *)mr->br_ring;
mask = mr->br_size - 1;
cons = mr->br_cons;
prod = mr->br_prod;
m = NULL;
if (cons != prod)
m = ring[cons];
return (m);
}
#define DBG_RX (1 << 0)
static const int debug_flags = DBG_RX;

View File

@ -1719,10 +1719,8 @@ t3_free_qset(adapter_t *sc, struct sge_qset *q)
t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
for (i = 0; i < SGE_TXQ_PER_SET; i++)
if (q->txq[i].txq_mr.br_ring != NULL) {
free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
mtx_destroy(&q->txq[i].txq_mr.br_lock);
}
if (q->txq[i].txq_mr != NULL)
buf_ring_free(q->txq[i].txq_mr, M_DEVBUF);
for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
if (q->fl[i].desc) {
mtx_lock_spin(&sc->sge.reg_lock);
@ -1885,7 +1883,6 @@ t3_free_tx_desc(struct sge_txq *q, int reclaimable)
txsd->flags &= ~TX_SW_DESC_MAPPED;
}
m_freem_iovec(&txsd->mi);
buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
txsd->mi.mi_base = NULL;
/*
* XXX check for cache hit rate here
@ -2285,14 +2282,12 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
int i, header_size, ret = 0;
for (i = 0; i < SGE_TXQ_PER_SET; i++) {
if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
M_DEVBUF, M_WAITOK, &q->txq[i].lock)) == NULL) {
device_printf(sc->dev, "failed to allocate mbuf ring\n");
goto err;
}
q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
}
init_qset_cntxt(q, id);
@ -3509,12 +3504,14 @@ t3_add_configured_sysctls(adapter_t *sc)
SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
0, "#tunneled packets waiting to be sent");
#if 0
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
0, "#tunneled packets queue producer index");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
0, "#tunneled packets queue consumer index");
#endif
SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
0, "#tunneled packets processed by the card");

View File

@ -303,33 +303,3 @@ cxgb_cache_refill(void)
uma_zfree(zone, vec[i]);
}
struct buf_ring *
buf_ring_alloc(int count, int flags)
{
struct buf_ring *br;
KASSERT(powerof2(count), ("buf ring must be size power of 2"));
br = malloc(sizeof(struct buf_ring), M_DEVBUF, flags|M_ZERO);
if (br == NULL)
return (NULL);
br->br_ring = malloc(sizeof(caddr_t)*count, M_DEVBUF, flags|M_ZERO);
if (br->br_ring == NULL) {
free(br, M_DEVBUF);
return (NULL);
}
mtx_init(&br->br_lock, "buf ring", NULL, MTX_DUPOK|MTX_DEF);
br->br_size = count;
br->br_prod = br->br_cons = 0;
return (br);
}
void
buf_ring_free(struct buf_ring *br)
{
free(br->br_ring, M_DEVBUF);
free(br, M_DEVBUF);
}

View File

@ -90,7 +90,7 @@ static int __cxio_init_resource_fifo(struct buf_ring **fifo,
u32 rarray[16];
mtx_init(fifo_lock, "cxio fifo", NULL, MTX_DEF|MTX_DUPOK);
*fifo = buf_ring_alloc(nr, M_NOWAIT);
*fifo = buf_ring_alloc(nr, M_DEVBUF, M_NOWAIT, fifo_lock);
if (*fifo == NULL)
return (-ENOMEM);
#if 0
@ -122,7 +122,7 @@ static int __cxio_init_resource_fifo(struct buf_ring **fifo,
buf_ring_enqueue(*fifo, (void *) (uintptr_t)i);
#if 0
for (i = 0; i < skip_low + skip_high; i++)
buf_ring_dequeue(*fifo);
buf_ring_dequeue_sc(*fifo);
#endif
return 0;
}
@ -149,7 +149,8 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
mtx_init(&rdev_p->rscp->qpid_fifo_lock, "qpid fifo", NULL, MTX_DEF);
rdev_p->rscp->qpid_fifo = buf_ring_alloc(T3_MAX_NUM_QP, M_NOWAIT);
rdev_p->rscp->qpid_fifo = buf_ring_alloc(T3_MAX_NUM_QP, M_DEVBUF,
M_NOWAIT, &rdev_p->rscp->qpid_fifo_lock);
if (rdev_p->rscp->qpid_fifo == NULL)
return (-ENOMEM);
@ -168,7 +169,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
void cxio_hal_destroy_rhdl_resource(void)
{
buf_ring_free(rhdl_fifo);
buf_ring_free(rhdl_fifo, M_DEVBUF);
}
#endif
@ -202,11 +203,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
goto pdid_err;
return 0;
pdid_err:
buf_ring_free(rscp->cqid_fifo);
buf_ring_free(rscp->cqid_fifo, M_DEVBUF);
cqid_err:
buf_ring_free(rscp->qpid_fifo);
buf_ring_free(rscp->qpid_fifo, M_DEVBUF);
qpid_err:
buf_ring_free(rscp->tpt_fifo);
buf_ring_free(rscp->tpt_fifo, M_DEVBUF);
tpt_err:
return (-ENOMEM);
}
@ -219,7 +220,7 @@ static u32 cxio_hal_get_resource(struct buf_ring *fifo, struct mtx *lock)
u32 entry;
mtx_lock(lock);
entry = (u32)(uintptr_t)buf_ring_dequeue(fifo);
entry = (u32)(uintptr_t)buf_ring_dequeue_sc(fifo);
mtx_unlock(lock);
return entry;
}
@ -276,10 +277,10 @@ void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
{
buf_ring_free(rscp->tpt_fifo);
buf_ring_free(rscp->cqid_fifo);
buf_ring_free(rscp->qpid_fifo);
buf_ring_free(rscp->pdid_fifo);
buf_ring_free(rscp->tpt_fifo, M_DEVBUF);
buf_ring_free(rscp->cqid_fifo, M_DEVBUF);
buf_ring_free(rscp->qpid_fifo, M_DEVBUF);
buf_ring_free(rscp->pdid_fifo, M_DEVBUF);
free(rscp, M_DEVBUF);
}

View File

@ -279,6 +279,8 @@ struct mxge_media_type
/* implement our own memory barriers, since bus_space_barrier
cannot handle write-combining regions */
#if __FreeBSD_version < 800053
#if defined (__GNUC__)
#if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
#define mb() __asm__ __volatile__ ("sfence;": : :"memory")
@ -293,6 +295,8 @@ struct mxge_media_type
#error "unknown compiler"
#endif
#endif
static inline void
mxge_pio_copy(volatile void *to_v, void *from_v, size_t size)
{

View File

@ -242,8 +242,12 @@ typedef xge_pci_info_t *pci_cfg_h;
mtx_unlock_flags(lockp, flags); \
}
#if __FreeBSD_version > 800053
/* Write memory barrier */
#define xge_os_wmb() wmb()
#else
#define xge_os_wmb()
#endif
/* Delay (in micro seconds) */
#define xge_os_udelay(us) DELAY(us)

View File

@ -32,6 +32,21 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
#if defined(I686_CPU)
#define mb() __asm__ __volatile__ ("mfence;": : :"memory")
#define wmb() __asm__ __volatile__ ("sfence;": : :"memory")
#define rmb() __asm__ __volatile__ ("lfence;": : :"memory")
#else
/*
* do we need a serializing instruction?
*/
#define mb()
#define wmb()
#define rmb()
#endif
/*
* Various simple operations on memory, each of which is atomic in the
* presence of interrupts and multiple processors.

View File

@ -165,15 +165,6 @@ do { \
#define spin_unlock_irqrestore mtx_unlock_irqrestore
#ifndef mb
#define mb() __asm__ __volatile__("lock; addl $0, 0(%%esp)": : :"memory")
#endif
#ifndef rmb
#define rmb() mb()
#endif
#ifndef wmb
#define wmb() barrier()
#endif
#ifdef SMP
#define smp_mb() mb()
#define smp_rmb() rmb()

View File

@ -29,6 +29,10 @@
#ifndef _MACHINE_ATOMIC_H_
#define _MACHINE_ATOMIC_H_
#define mb()
#define wmb()
#define rmb()
/*
* Various simple arithmetic on memory which is atomic in the presence
* of interrupts and SMP safe.

68
sys/kern/subr_bufring.c Normal file
View File

@ -0,0 +1,68 @@
/**************************************************************************
*
* Copyright (c) 2007,2008 Kip Macy kmacy@freebsd.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. The name of Kip Macy nor the names of other
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
***************************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/ktr.h>
#include <sys/buf_ring.h>
struct buf_ring *
buf_ring_alloc(int count, struct malloc_type *type, int flags, struct mtx *lock)
{
struct buf_ring *br;
KASSERT(powerof2(count), ("buf ring must be size power of 2"));
br = malloc(sizeof(struct buf_ring) + count*sizeof(caddr_t),
type, flags|M_ZERO);
if (br == NULL)
return (NULL);
#ifdef DEBUG_BUFRING
br->br_lock = lock;
#endif
br->br_prod_size = br->br_cons_size = count;
br->br_prod_mask = br->br_cons_mask = count-1;
br->br_prod_head = br->br_cons_head = 0;
br->br_prod_tail = br->br_cons_tail = 0;
return (br);
}
void
buf_ring_free(struct buf_ring *br, struct malloc_type *type)
{
free(br, type);
}

View File

@ -113,10 +113,11 @@ static int ifconf(u_long, caddr_t);
static void if_freemulti(struct ifmultiaddr *);
static void if_grow(void);
static void if_init(void *);
static void if_qflush(struct ifaltq *);
static void if_qflush(struct ifnet *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static void if_slowtimo(void *);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int if_rtdel(struct radix_node *, void *);
@ -126,6 +127,7 @@ static void if_start_deferred(void *context, int pending);
static void do_link_state_change(void *, int);
static int if_getgroup(struct ifgroupreq *, struct ifnet *);
static int if_getgroupmembers(struct ifgroupreq *);
#ifdef INET6
/*
* XXX: declare here to avoid to include many inet6 related files..
@ -481,6 +483,28 @@ if_free_type(struct ifnet *ifp, u_char type)
free(ifp, M_IFNET);
};
void
ifq_attach(struct ifaltq *ifq, struct ifnet *ifp)
{
mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
if (ifq->ifq_maxlen == 0)
ifq->ifq_maxlen = ifqmaxlen;
ifq->altq_type = 0;
ifq->altq_disc = NULL;
ifq->altq_flags &= ALTQF_CANTCHANGE;
ifq->altq_tbr = NULL;
ifq->altq_ifp = ifp;
}
void
ifq_detach(struct ifaltq *ifq)
{
mtx_destroy(&ifq->ifq_mtx);
}
/*
* Perform generic interface initalization tasks and attach the interface
* to the list of "active" interfaces.
@ -522,7 +546,8 @@ if_attach(struct ifnet *ifp)
getmicrotime(&ifp->if_lastchange);
ifp->if_data.ifi_epoch = time_uptime;
ifp->if_data.ifi_datalen = sizeof(struct if_data);
ifp->if_transmit = if_transmit;
ifp->if_qflush = if_qflush;
#ifdef MAC
mac_ifnet_init(ifp);
mac_ifnet_create(ifp);
@ -534,7 +559,7 @@ if_attach(struct ifnet *ifp)
make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
net_cdevsw.d_name, ifp->if_index);
mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
ifq_attach(&ifp->if_snd, ifp);
/*
* create a Link Level name for this device
@ -572,19 +597,6 @@ if_attach(struct ifnet *ifp)
TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */
/*
* XXX: why do we warn about this? We're correcting it and most
* drivers just set the value the way we do.
*/
if (ifp->if_snd.ifq_maxlen == 0) {
if_printf(ifp, "XXX: driver didn't set ifq_maxlen\n");
ifp->if_snd.ifq_maxlen = ifqmaxlen;
}
ifp->if_snd.altq_type = 0;
ifp->if_snd.altq_disc = NULL;
ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
ifp->if_snd.altq_tbr = NULL;
ifp->if_snd.altq_ifp = ifp;
IFNET_WLOCK();
TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
@ -826,7 +838,7 @@ if_detach(struct ifnet *ifp)
KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
knlist_clear(&ifp->if_klist, 0);
knlist_destroy(&ifp->if_klist);
mtx_destroy(&ifp->if_snd.ifq_mtx);
ifq_detach(&ifp->if_snd);
IF_AFDATA_DESTROY(ifp);
splx(s);
}
@ -1377,7 +1389,8 @@ if_unroute(struct ifnet *ifp, int flag, int fam)
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
if_qflush(&ifp->if_snd);
ifp->if_qflush(ifp);
#ifdef DEV_CARP
if (ifp->if_carp)
carp_carpdev_state(ifp->if_carp);
@ -1507,10 +1520,12 @@ if_up(struct ifnet *ifp)
* Flush an interface queue.
*/
static void
if_qflush(struct ifaltq *ifq)
if_qflush(struct ifnet *ifp)
{
struct mbuf *m, *n;
struct ifaltq *ifq;
ifq = &ifp->if_snd;
IFQ_LOCK(ifq);
#ifdef ALTQ
if (ALTQ_IS_ENABLED(ifq))
@ -2801,6 +2816,19 @@ if_start_deferred(void *context, int pending)
(ifp->if_start)(ifp);
}
/*
* Backwards compatibility interface for drivers
* that have not implemented it
*/
static int
if_transmit(struct ifnet *ifp, struct mbuf *m)
{
int error;
IFQ_HANDOFF(ifp, m, error);
return (error);
}
int
if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
{

View File

@ -186,7 +186,11 @@ struct ifnet {
/* protected by if_addr_mtx */
void *if_pf_kif;
void *if_lagg; /* lagg glue */
void *if_pspare[10]; /* multiq/TOE 3; vimage 3; general use 4 */
void *if_pspare[8]; /* multiq/TOE 3; vimage 3; general use 4 */
void (*if_qflush) /* flush any queues */
(struct ifnet *);
int (*if_transmit) /* initiate output routine */
(struct ifnet *, struct mbuf *);
int if_ispare[2]; /* general use 2 */
};
@ -686,6 +690,9 @@ int ifioctl(struct socket *, u_long, caddr_t, struct thread *);
int ifpromisc(struct ifnet *, int);
struct ifnet *ifunit(const char *);
void ifq_attach(struct ifaltq *, struct ifnet *ifp);
void ifq_detach(struct ifaltq *);
struct ifaddr *ifa_ifwithaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);

View File

@ -39,6 +39,10 @@
#define __ATOMIC_BARRIER \
__asm __volatile("sync" : : : "memory")
#define mb() __ATOMIC_BARRIER
#define wmb() mb()
#define rmb() mb()
/*
* atomic_add(p, v)
* { *p += v; }

View File

@ -40,6 +40,10 @@
#define __ASI_ATOMIC ASI_P
#endif
#define mb() __asm__ __volatile__ ("membar #MemIssue": : :"memory")
#define wmb() mb()
#define rmb() mb()
/*
* Various simple arithmetic on memory which is atomic in the presence
* of interrupts and multiple processors. See atomic(9) for details.

View File

@ -33,6 +33,10 @@
#include <machine/cpufunc.h>
#define mb() __asm__ __volatile__ ("membar #MemIssue": : :"memory")
#define wmb() mb()
#define rmb() mb()
/* Userland needs different ASI's. */
#ifdef _KERNEL
#define __ASI_ATOMIC ASI_N

250
sys/sys/buf_ring.h Normal file
View File

@ -0,0 +1,250 @@
/**************************************************************************
*
* Copyright (c) 2007,2008 Kip Macy kmacy@freebsd.org
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. The name of Kip Macy nor the names of other
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*
***************************************************************************/
#ifndef _SYS_BUF_RING_H_
#define _SYS_BUF_RING_H_
#include <machine/cpu.h>
#if defined(INVARIANTS) && !defined(DEBUG_BUFRING)
#define DEBUG_BUFRING 1
#endif
#ifdef DEBUG_BUFRING
#include <sys/lock.h>
#include <sys/mutex.h>
#endif
struct buf_ring {
volatile uint32_t br_prod_head;
volatile uint32_t br_prod_tail;
int br_prod_size;
int br_prod_mask;
/*
* Pad out to next L2 cache line
*/
uint64_t _pad0[14];
volatile uint32_t br_cons_head;
volatile uint32_t br_cons_tail;
int br_cons_size;
int br_cons_mask;
/*
* Pad out to next L2 cache line
*/
uint64_t _pad1[14];
#ifdef DEBUG_BUFRING
struct mtx *br_lock;
#endif
void *br_ring[0];
};
static __inline int
buf_ring_enqueue(struct buf_ring *br, void *buf)
{
uint32_t prod_head, prod_next;
uint32_t cons_tail;
int success;
#ifdef DEBUG_BUFRING
int i;
for (i = br->br_cons_head; i != br->br_prod_head;
i = ((i + 1) & br->br_cons_mask))
if(br->br_ring[i] == buf)
panic("buf=%p already enqueue at %d prod=%d cons=%d",
buf, i, br->br_prod_tail, br->br_cons_tail);
#endif
critical_enter();
do {
prod_head = br->br_prod_head;
cons_tail = br->br_cons_tail;
prod_next = (prod_head + 1) & br->br_prod_mask;
if (prod_next == cons_tail) {
critical_exit();
return (ENOSPC);
}
success = atomic_cmpset_int(&br->br_prod_head, prod_head,
prod_next);
} while (success == 0);
#ifdef DEBUG_BUFRING
if (br->br_ring[prod_head] != NULL)
panic("dangling value in enqueue");
#endif
br->br_ring[prod_head] = buf;
wmb();
/*
* If there are other enqueues in progress
* that preceeded us, we need to wait for them
* to complete
*/
while (br->br_prod_tail != prod_head)
cpu_spinwait();
br->br_prod_tail = prod_next;
mb();
critical_exit();
return (0);
}
/*
* multi-consumer safe dequeue
*
*/
static __inline void *
buf_ring_dequeue_mc(struct buf_ring *br)
{
uint32_t cons_head, cons_next;
uint32_t prod_tail;
void *buf;
int success;
critical_enter();
do {
cons_head = br->br_cons_head;
prod_tail = br->br_prod_tail;
cons_next = (cons_head + 1) & br->br_cons_mask;
if (cons_head == prod_tail) {
critical_exit();
return (NULL);
}
success = atomic_cmpset_int(&br->br_cons_head, cons_head,
cons_next);
} while (success == 0);
buf = br->br_ring[cons_head];
#ifdef DEBUG_BUFRING
br->br_ring[cons_head] = NULL;
#endif
mb();
/*
* If there are other dequeues in progress
* that preceeded us, we need to wait for them
* to complete
*/
while (br->br_cons_tail != cons_head)
cpu_spinwait();
br->br_cons_tail = cons_next;
mb();
critical_exit();
return (buf);
}
/*
* Single-Consumer dequeue for uses where dequeue
* is protected by a lock
*/
static __inline void *
buf_ring_dequeue_sc(struct buf_ring *br)
{
uint32_t cons_head, cons_next;
uint32_t prod_tail;
void *buf;
critical_enter();
cons_head = br->br_cons_head;
prod_tail = br->br_prod_tail;
cons_next = (cons_head + 1) & br->br_cons_mask;
if (cons_head == prod_tail) {
critical_exit();
return (NULL);
}
br->br_cons_head = cons_next;
buf = br->br_ring[cons_head];
mb();
#ifdef DEBUG_BUFRING
br->br_ring[cons_head] = NULL;
if (!mtx_owned(br->br_lock))
panic("lock not held on single consumer dequeue");
if (br->br_cons_tail != cons_head)
panic("inconsistent list cons_tail=%d cons_head=%d",
br->br_cons_tail, cons_head);
#endif
br->br_cons_tail = cons_next;
mb();
critical_exit();
return (buf);
}
static __inline void *
buf_ring_peek(struct buf_ring *br)
{
#ifdef DEBUG_BUFRING
if ((br->br_lock != NULL) && !mtx_owned(br->br_lock))
panic("lock not held on single consumer dequeue");
#endif
mb();
return (br->br_ring[br->br_cons_tail]);
}
static __inline int
buf_ring_full(struct buf_ring *br)
{
return (((br->br_prod_head + 1) & br->br_prod_mask) == br->br_cons_tail);
}
static __inline int
buf_ring_empty(struct buf_ring *br)
{
return (br->br_cons_head == br->br_prod_tail);
}
static __inline int
buf_ring_count(struct buf_ring *br)
{
return ((br->br_prod_size + br->br_prod_tail - br->br_cons_tail)
& br->br_prod_mask);
}
struct buf_ring *buf_ring_alloc(int count, struct malloc_type *type, int flags,
struct mtx *);
void buf_ring_free(struct buf_ring *br, struct malloc_type *type);
#endif

View File

@ -57,7 +57,7 @@
* is created, otherwise 1.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 800053 /* Master, propagated to newvers */
#define __FreeBSD_version 800054 /* Master, propagated to newvers */
#ifndef LOCORE
#include <sys/types.h>