The drbr(9) API appeared to be so unclear, that most drivers in

tree used it incorrectly, which lead to inaccurate overrated
if_obytes accounting. The drbr(9) used to update ifnet stats on
drbr_enqueue(), which is not accurate since enqueuing doesn't
imply successful processing by driver. Dequeuing neither mean
that. Most drivers also called drbr_stats_update() which did
accounting again, leading to doubled if_obytes statistics. And
in case of severe transmitting, when a packet could be several
times enqueued and dequeued it could have been accounted several
times.

o Thus, make drbr(9) API thinner. Now drbr(9) merely chooses between
  ALTQ queueing or buf_ring(9) queueing.
  - It doesn't touch the buf_ring stats any more.
  - It doesn't touch ifnet stats anymore.
  - drbr_stats_update() no longer exists.

o buf_ring(9) handles its stats itself:
  - It handles br_drops itself.
  - br_prod_bytes stats are dropped. Rationale: no one ever
    reads them but update of a common counter on every packet
    negatively affects performance due to excessive cache
    invalidation.
  - buf_ring_enqueue_bytes() reduced to buf_ring_enqueue(), since
    we no longer account bytes.

o Drivers handle their stats theirselves: if_obytes, if_omcasts.

o mlx4(4), igb(4), em(4), vxge(4), oce(4) and  ixv(4) no longer
  use drbr_stats_update(), and update ifnet stats theirselves.

o bxe(4) was the most correct driver, it didn't call
  drbr_stats_update(), thus it was the only driver accurate under
  moderate load. Now it also maintains stats itself.

o ixgbe(4) had already taken stats from hardware, so just
  - drop software stats updating.
  - take multicast packet count from hardware as well.

o mxge(4) just no longer needs NO_SLOW_STATS define.

o cxgb(4), cxgbe(4) need no change, since they obtain stats
  from hardware.

Reviewed by:	jfv, gnn
This commit is contained in:
Gleb Smirnoff 2012-09-28 18:28:27 +00:00
parent a1c9ec3ce0
commit 063efed28c
13 changed files with 32 additions and 56 deletions
share/man/man9
sys

@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd January 30, 2012
.Dd September 27, 2012
.Dt BUF_RING 9
.Os
.Sh NAME
@ -33,7 +33,6 @@
.Nm buf_ring_alloc ,
.Nm buf_ring_free ,
.Nm buf_ring_enqueue ,
.Nm buf_ring_enqueue_bytes ,
.Nm buf_ring_dequeue_mc ,
.Nm buf_ring_dequeue_sc ,
.Nm buf_ring_count ,
@ -50,8 +49,6 @@
.Fn buf_ring_free "struct buf_ring *br" "struct malloc_type *type"
.Ft int
.Fn buf_ring_enqueue "struct buf_ring *br" "void *buf"
.Ft int
.Fn buf_ring_enqueue_bytes "struct buf_ring *br" "void *buf" "int bytes"
.Ft void *
.Fn buf_ring_dequeue_mc "struct buf_ring *br"
.Ft void *
@ -91,12 +88,6 @@ The
function is used to enqueue a buffer to a buf_ring.
.Pp
The
.Fn buf_ring_enqueue_bytes
function is used to enqueue a buffer to a buf_ring and increment the
number of bytes enqueued by
.Fa bytes .
.Pp
The
.Fn buf_ring_dequeue_mc
function is a multi-consumer safe way of dequeueing elements from a buf_ring.
.Pp
@ -134,9 +125,7 @@ otherwise.
.Sh RETURN VALUES
The
.Fn buf_ring_enqueue
and
.Fn buf_ring_enqueue_bytes
functions return
function return
.Er ENOBUFS
if there are no available slots in the buf_ring.
.Sh HISTORY

@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd January 30, 2012
.Dd September 27, 2012
.Dt DRBR 9
.Os
.Sh NAME
@ -37,7 +37,6 @@
.Nm drbr_flush ,
.Nm drbr_empty ,
.Nm drbr_inuse ,
.Nm drbr_stats_update ,
.Nd network driver interface to buf_ring
.Sh SYNOPSIS
.In sys/param.h
@ -57,8 +56,6 @@
.Fn drbr_empty "struct ifnet *ifp" "struct buf_ring *br"
.Ft int
.Fn drbr_inuse "struct ifnet *ifp" "struct buf_ring *br"
.Ft void
.Fn drbr_stats_update "struct ifnet *ifp" "int len" "int mflags"
.Sh DESCRIPTION
The
.Nm
@ -123,9 +120,6 @@ there will not be more mbufs when
is actually called.
Provided the tx queue lock is held there will not be less.
.Pp
The
.Fn drbr_stats_update
function updates the number of bytes and the number of multicast packets sent.
.Sh RETURN VALUES
The
.Fn drbr_enqueue

@ -9557,6 +9557,11 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
/* The transmit frame was enqueued successfully. */
tx_count++;
/* Update stats */
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
/* Send a copy of the frame to any BPF listeners. */
BPF_MTAP(ifp, next);

@ -922,7 +922,9 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
break;
}
enq++;
drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;

@ -1014,7 +1014,9 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
break;
}
enq++;
drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;

@ -853,7 +853,6 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
break;
}
enqueued++;
drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
@ -5335,6 +5334,7 @@ ixgbe_update_stats_counters(struct adapter *adapter)
ifp->if_ibytes = adapter->stats.gorc;
ifp->if_obytes = adapter->stats.gotc;
ifp->if_imcasts = adapter->stats.mprc;
ifp->if_omcasts = adapter->stats.mptc;
ifp->if_collisions = 0;
/* Rx Errors */

@ -641,7 +641,9 @@ ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
break;
}
enqueued++;
drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)

@ -47,8 +47,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sx.h>
#include <sys/taskqueue.h>
/* count xmits ourselves, rather than via drbr */
#define NO_SLOW_STATS
#include <net/if.h>
#include <net/if_arp.h>
#include <net/ethernet.h>

@ -1183,7 +1183,9 @@ oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq)
}
break;
}
drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
ETHER_BPF_MTAP(ifp, next);
next = drbr_dequeue(ifp, br);
}

@ -709,7 +709,9 @@ vxge_mq_send_locked(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t m_head)
VXGE_DRV_STATS(vpath, tx_again);
break;
}
drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, next);

@ -590,22 +590,10 @@ do { \
} while (0)
#ifdef _KERNEL
static __inline void
drbr_stats_update(struct ifnet *ifp, int len, int mflags)
{
#ifndef NO_SLOW_STATS
ifp->if_obytes += len;
if (mflags & M_MCAST)
ifp->if_omcasts++;
#endif
}
static __inline int
drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
{
int error = 0;
int len = m->m_pkthdr.len;
int mflags = m->m_flags;
#ifdef ALTQ
if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
@ -613,12 +601,10 @@ drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
return (error);
}
#endif
if ((error = buf_ring_enqueue_bytes(br, m, len)) == ENOBUFS) {
br->br_drops++;
error = buf_ring_enqueue(br, m);
if (error)
m_freem(m);
} else
drbr_stats_update(ifp, len, mflags);
return (error);
}

@ -948,7 +948,9 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_ind, struct mbuf *m)
break;
}
enqueued++;
drbr_stats_update(dev, next->m_pkthdr.len, next->m_flags);
dev->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
dev->if_omcasts++;
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(dev, next);
if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)

@ -48,7 +48,6 @@ struct buf_ring {
int br_prod_mask;
uint64_t br_drops;
uint64_t br_prod_bufs;
uint64_t br_prod_bytes;
/*
* Pad out to next L2 cache line
*/
@ -74,7 +73,7 @@ struct buf_ring {
*
*/
static __inline int
buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes)
buf_ring_enqueue(struct buf_ring *br, void *buf)
{
uint32_t prod_head, prod_next;
uint32_t cons_tail;
@ -95,6 +94,7 @@ buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes)
prod_next = (prod_head + 1) & br->br_prod_mask;
if (prod_next == cons_tail) {
br->br_drops++;
critical_exit();
return (ENOBUFS);
}
@ -117,19 +117,11 @@ buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes)
while (br->br_prod_tail != prod_head)
cpu_spinwait();
br->br_prod_bufs++;
br->br_prod_bytes += nbytes;
br->br_prod_tail = prod_next;
critical_exit();
return (0);
}
static __inline int
buf_ring_enqueue(struct buf_ring *br, void *buf)
{
return (buf_ring_enqueue_bytes(br, buf, 0));
}
/*
* multi-consumer safe dequeue
*