Restructure mbuf send tags to provide stronger guarantees.

- Perform ifp mismatch checks (to determine if a send tag is allocated
  for a different ifp than the one the packet is being output on), in
  ip_output() and ip6_output().  This avoids sending packets with send
  tags to ifnet drivers that don't support send tags.

  Since we are now checking for ifp mismatches before invoking
  if_output, we can now try to allocate a new tag before invoking
  if_output sending the original packet on the new tag if allocation
  succeeds.

  To avoid code duplication for the fragment and unfragmented cases,
  add ip_output_send() and ip6_output_send() as wrappers around
  if_output and nd6_output_ifp, respectively.  All of the logic for
  setting send tags and dealing with send tag-related errors is done
  in these wrapper functions.

  For pseudo interfaces that wrap other network interfaces (vlan and
  lagg), wrapper send tags are now allocated so that ip*_output see
  the wrapper ifp as the ifp in the send tag.  The if_transmit
  routines rewrite the send tags after performing an ifp mismatch
  check.  If an ifp mismatch is detected, the transmit routines fail
  with EAGAIN.

- To provide clearer life cycle management of send tags, especially
  in the presence of vlan and lagg wrapper tags, add a reference count
  to send tags managed via m_snd_tag_ref() and m_snd_tag_rele().
  Provide a helper function (m_snd_tag_init()) for use by drivers
  supporting send tags.  m_snd_tag_init() takes care of the if_ref
  on the ifp meaning that code alloating send tags via if_snd_tag_alloc
  no longer has to manage that manually.  Similarly, m_snd_tag_rele
  drops the refcount on the ifp after invoking if_snd_tag_free when
  the last reference to a send tag is dropped.

  This also closes use after free races if there are pending packets in
  driver tx rings after the socket is closed (e.g. from tcpdrop).

  In order for m_free to work reliably, add a new CSUM_SND_TAG flag in
  csum_flags to indicate 'snd_tag' is set (rather than 'rcvif').
  Drivers now also check this flag instead of checking snd_tag against
  NULL.  This avoids false positive matches when a forwarded packet
  has a non-NULL rcvif that was treated as a send tag.

- cxgbe was relying on snd_tag_free being called when the inp was
  detached so that it could kick the firmware to flush any pending
  work on the flow.  This is because the driver doesn't require ACK
  messages from the firmware for every request, but instead does a
  kind of manual interrupt coalescing by only setting a flag to
  request a completion on a subset of requests.  If all of the
  in-flight requests don't have the flag when the tag is detached from
  the inp, the flow might never return the credits.  The current
  snd_tag_free command issues a flush command to force the credits to
  return.  However, the credit return is what also frees the mbufs,
  and since those mbufs now hold references on the tag, this meant
  that snd_tag_free would never be called.

  To fix, explicitly drop the mbuf's reference on the snd tag when the
  mbuf is queued in the firmware work queue.  This means that once the
  inp's reference on the tag goes away and all in-flight mbufs have
  been queued to the firmware, tag's refcount will drop to zero and
  snd_tag_free will kick in and send the flush request.  Note that we
  need to avoid doing this in the middle of ethofld_tx(), so the
  driver grabs a temporary reference on the tag around that loop to
  defer the free to the end of the function in case it sends the last
  mbuf to the queue after the inp has dropped its reference on the
  tag.

- mlx5 preallocates send tags and was using the ifp pointer even when
  the send tag wasn't in use.  Explicitly use the ifp from other data
  structures instead.

- Sprinkle some assertions in various places to assert that received
  packets don't have a send tag, and that other places that overwrite
  rcvif (e.g. 802.11 transmit) don't clobber a send tag pointer.

Reviewed by:	gallatin, hselasky, rgrimes, ae
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D20117
This commit is contained in:
John Baldwin 2019-05-24 22:30:40 +00:00
parent 07e007e1ca
commit fb3bc59600
23 changed files with 499 additions and 189 deletions

View File

@ -2057,13 +2057,8 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
return (rc);
}
#ifdef RATELIMIT
if (m->m_pkthdr.snd_tag != NULL) {
/* EAGAIN tells the stack we are not the correct interface. */
if (__predict_false(ifp != m->m_pkthdr.snd_tag->ifp)) {
m_freem(m);
return (EAGAIN);
}
if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
return (ethofld_transmit(ifp, m));
}
#endif

View File

@ -789,7 +789,7 @@ cxgbe_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF);
mbufq_init(&cst->pending_tx, INT_MAX);
mbufq_init(&cst->pending_fwack, INT_MAX);
cst->com.ifp = ifp;
m_snd_tag_init(&cst->com, ifp);
cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF;
cst->adapter = sc;
cst->port_id = pi->port_id;

View File

@ -2325,7 +2325,7 @@ static inline int
needs_eo(struct mbuf *m)
{
return (m->m_pkthdr.snd_tag != NULL);
return (m->m_pkthdr.csum_flags & CSUM_SND_TAG);
}
#endif
@ -2539,8 +2539,11 @@ parse_pkt(struct adapter *sc, struct mbuf **mp)
* checksumming is enabled. needs_l4_csum happens to check for all the
* right things.
*/
if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0)))
if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0))) {
m_snd_tag_rele(m0->m_pkthdr.snd_tag);
m0->m_pkthdr.snd_tag = NULL;
m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
}
#endif
if (!needs_tso(m0) &&
@ -5922,6 +5925,21 @@ ethofld_tx(struct cxgbe_snd_tag *cst)
cst->tx_nocompl = 0;
}
(void) mbufq_dequeue(&cst->pending_tx);
/*
* Drop the mbuf's reference on the tag now rather
* than waiting until m_freem(). This ensures that
* cxgbe_snd_tag_free gets called when the inp drops
* its reference on the tag and there are no more
* mbufs in the pending_tx queue and can flush any
* pending requests. Otherwise if the last mbuf
* doesn't request a completion the etid will never be
* released.
*/
m->m_pkthdr.snd_tag = NULL;
m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
m_snd_tag_rele(&cst->com);
mbufq_enqueue(&cst->pending_fwack, m);
}
}
@ -5933,6 +5951,7 @@ ethofld_transmit(struct ifnet *ifp, struct mbuf *m0)
int rc;
MPASS(m0->m_nextpkt == NULL);
MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG);
MPASS(m0->m_pkthdr.snd_tag != NULL);
cst = mst_to_cst(m0->m_pkthdr.snd_tag);
@ -5967,8 +5986,18 @@ ethofld_transmit(struct ifnet *ifp, struct mbuf *m0)
mbufq_enqueue(&cst->pending_tx, m0);
cst->plen += m0->m_pkthdr.len;
/*
* Hold an extra reference on the tag while generating work
* requests to ensure that we don't try to free the tag during
* ethofld_tx() in case we are sending the final mbuf after
* the inp was freed.
*/
m_snd_tag_ref(&cst->com);
ethofld_tx(cst);
rc = 0;
mtx_unlock(&cst->lock);
m_snd_tag_rele(&cst->com);
return (0);
done:
mtx_unlock(&cst->lock);
if (__predict_false(rc != 0))
@ -6015,7 +6044,6 @@ ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0
cst->flags &= ~EO_FLUSH_RPL_PENDING;
cst->tx_credits += cpl->credits;
freetag:
cxgbe_snd_tag_free_locked(cst);
return (0); /* cst is gone. */
}
@ -6033,22 +6061,27 @@ ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0
cst->tx_credits += cpl->credits;
MPASS(cst->tx_credits <= cst->tx_total);
m = mbufq_first(&cst->pending_tx);
if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m))
ethofld_tx(cst);
if (__predict_false((cst->flags & EO_SND_TAG_REF) == 0) &&
cst->ncompl == 0) {
if (cst->tx_credits == cst->tx_total)
goto freetag;
else {
MPASS((cst->flags & EO_FLUSH_RPL_PENDING) == 0);
send_etid_flush_wr(cst);
}
if (cst->flags & EO_SND_TAG_REF) {
/*
* As with ethofld_transmit(), hold an extra reference
* so that the tag is stable across ethold_tx().
*/
m_snd_tag_ref(&cst->com);
m = mbufq_first(&cst->pending_tx);
if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m))
ethofld_tx(cst);
mtx_unlock(&cst->lock);
m_snd_tag_rele(&cst->com);
} else {
/*
* There shouldn't be any pending packets if the tag
* was freed by the kernel since any pending packet
* should hold a reference to the tag.
*/
MPASS(mbufq_first(&cst->pending_tx) == NULL);
mtx_unlock(&cst->lock);
}
mtx_unlock(&cst->lock);
return (0);
}
#endif

View File

@ -1247,7 +1247,7 @@ mlx5e_create_rq(struct mlx5e_channel *c,
wq_sz = mlx5_wq_ll_get_size(&rq->wq);
err = -tcp_lro_init_args(&rq->lro, c->tag.m_snd_tag.ifp, TCP_LRO_ENTRIES, wq_sz);
err = -tcp_lro_init_args(&rq->lro, priv->ifp, TCP_LRO_ENTRIES, wq_sz);
if (err)
goto err_rq_wq_destroy;
@ -1288,7 +1288,7 @@ mlx5e_create_rq(struct mlx5e_channel *c,
}
}
rq->ifp = c->tag.m_snd_tag.ifp;
rq->ifp = priv->ifp;
rq->channel = c;
rq->ix = c->ix;
@ -2145,7 +2145,6 @@ mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->priv = priv;
c->ix = ix;
/* setup send tag */
c->tag.m_snd_tag.ifp = priv->ifp;
c->tag.type = IF_SND_TAG_TYPE_UNLIMITED;
c->mkey_be = cpu_to_be32(priv->mr.key);
c->num_tc = priv->num_tc;
@ -3987,6 +3986,8 @@ mlx5e_ul_snd_tag_alloc(struct ifnet *ifp,
if (unlikely(pch->sq[0].running == 0))
return (ENXIO);
mlx5e_ref_channel(priv);
MPASS(pch->tag.m_snd_tag.refcount == 0);
m_snd_tag_init(&pch->tag.m_snd_tag, ifp);
*ppmt = &pch->tag.m_snd_tag;
return (0);
}

View File

@ -843,7 +843,6 @@ mlx5e_rl_init(struct mlx5e_priv *priv)
for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) {
struct mlx5e_rl_channel *channel = rlw->channels + i;
channel->worker = rlw;
channel->tag.m_snd_tag.ifp = priv->ifp;
channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT;
STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry);
}
@ -1127,6 +1126,8 @@ mlx5e_rl_snd_tag_alloc(struct ifnet *ifp,
}
/* store pointer to mbuf tag */
MPASS(channel->tag.m_snd_tag.refcount == 0);
m_snd_tag_init(&channel->tag.m_snd_tag, ifp);
*ppmt = &channel->tag.m_snd_tag;
done:
return (error);

View File

@ -83,10 +83,6 @@ mlx5e_select_queue_by_send_tag(struct ifnet *ifp, struct mbuf *mb)
struct mlx5e_snd_tag *ptag;
struct mlx5e_sq *sq;
/* check for route change */
if (mb->m_pkthdr.snd_tag->ifp != ifp)
return (NULL);
/* get pointer to sendqueue */
ptag = container_of(mb->m_pkthdr.snd_tag,
struct mlx5e_snd_tag, m_snd_tag);
@ -609,21 +605,10 @@ mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb)
struct mlx5e_sq *sq;
int ret;
if (mb->m_pkthdr.snd_tag != NULL) {
if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
sq = mlx5e_select_queue_by_send_tag(ifp, mb);
if (unlikely(sq == NULL)) {
/* Check for route change */
if (mb->m_pkthdr.snd_tag->ifp != ifp) {
/* Free mbuf */
m_freem(mb);
/*
* Tell upper layers about route
* change and to re-transmit this
* packet:
*/
return (EAGAIN);
}
goto select_queue;
}
} else {

View File

@ -46,8 +46,12 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
@ -112,6 +116,10 @@ static quad_t maxmbufmem; /* overall real memory limit for all mbufs */
SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
"Maximum real memory allocatable to various mbuf types");
static counter_u64_t snd_tag_count;
SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW,
&snd_tag_count, "# of active mbuf send tags");
/*
* tunable_mbinit() has to be run before any mbuf allocations are done.
*/
@ -378,6 +386,8 @@ mbuf_init(void *dummy)
*/
EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
EVENTHANDLER_PRI_FIRST);
snd_tag_count = counter_u64_alloc(M_WAITOK);
}
SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
@ -1149,3 +1159,24 @@ m_freem(struct mbuf *mb)
while (mb != NULL)
mb = m_free(mb);
}
void
m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp)
{
if_ref(ifp);
mst->ifp = ifp;
refcount_init(&mst->refcount, 1);
counter_u64_add(snd_tag_count, 1);
}
void
m_snd_tag_destroy(struct m_snd_tag *mst)
{
struct ifnet *ifp;
ifp = mst->ifp;
ifp->if_snd_tag_free(mst);
if_rele(ifp);
counter_u64_add(snd_tag_count, -1);
}

View File

@ -382,6 +382,10 @@ m_move_pkthdr(struct mbuf *to, struct mbuf *from)
to->m_pkthdr = from->m_pkthdr; /* especially tags */
SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */
from->m_flags &= ~M_PKTHDR;
if (from->m_pkthdr.csum_flags & CSUM_SND_TAG) {
from->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
from->m_pkthdr.snd_tag = NULL;
}
}
/*
@ -414,6 +418,8 @@ m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
if ((to->m_flags & M_EXT) == 0)
to->m_data = to->m_pktdat;
to->m_pkthdr = from->m_pkthdr;
if (from->m_pkthdr.csum_flags & CSUM_SND_TAG)
m_snd_tag_ref(from->m_pkthdr.snd_tag);
SLIST_INIT(&to->m_pkthdr.tags);
return (m_tag_copy_chain(to, from, how));
}
@ -924,7 +930,12 @@ m_split(struct mbuf *m0, int len0, int wait)
return (NULL);
n->m_next = m->m_next;
m->m_next = NULL;
n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
n->m_pkthdr.snd_tag =
m_snd_tag_ref(m0->m_pkthdr.snd_tag);
n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
} else
n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
return (n);
@ -932,7 +943,12 @@ m_split(struct mbuf *m0, int len0, int wait)
n = m_gethdr(wait, m0->m_type);
if (n == NULL)
return (NULL);
n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) {
n->m_pkthdr.snd_tag =
m_snd_tag_ref(m0->m_pkthdr.snd_tag);
n->m_pkthdr.csum_flags |= CSUM_SND_TAG;
} else
n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
n->m_pkthdr.len = m0->m_pkthdr.len - len0;
m0->m_pkthdr.len = len0;
if (m->m_flags & M_EXT)

View File

@ -2304,7 +2304,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
int gottime;
/* Skip outgoing duplicate packets. */
if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
m->m_flags &= ~M_PROMISC;
return;
}
@ -2314,7 +2314,7 @@ bpf_mtap(struct bpf_if *bp, struct mbuf *m)
NET_EPOCH_ENTER(et);
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
#ifdef BPF_JITTER

View File

@ -4295,6 +4295,8 @@ if_getsoftc(if_t ifp)
void
if_setrcvif(struct mbuf *m, if_t ifp)
{
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (struct ifnet *)ifp;
}

View File

@ -816,6 +816,7 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
* We will rely on rcvif being set properly in the deferred context,
* so assert it is correct here.
*/
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p "
"rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp));
CURVNET_SET_QUIET(ifp->if_vnet);

View File

@ -95,6 +95,11 @@ static struct {
{0, NULL}
};
struct lagg_snd_tag {
struct m_snd_tag com;
struct m_snd_tag *tag;
};
VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */
#define V_lagg_list VNET(lagg_list)
VNET_DEFINE_STATIC(struct mtx, lagg_list_mtx);
@ -134,6 +139,10 @@ static int lagg_ioctl(struct ifnet *, u_long, caddr_t);
static int lagg_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *,
struct m_snd_tag **);
static int lagg_snd_tag_modify(struct m_snd_tag *,
union if_snd_tag_modify_params *);
static int lagg_snd_tag_query(struct m_snd_tag *,
union if_snd_tag_query_params *);
static void lagg_snd_tag_free(struct m_snd_tag *);
#endif
static int lagg_setmulti(struct lagg_port *);
@ -525,6 +534,8 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
#ifdef RATELIMIT
ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
ifp->if_snd_tag_modify = lagg_snd_tag_modify;
ifp->if_snd_tag_query = lagg_snd_tag_query;
ifp->if_snd_tag_free = lagg_snd_tag_free;
#endif
ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
@ -1537,63 +1548,126 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
#ifdef RATELIMIT
static inline struct lagg_snd_tag *
mst_to_lst(struct m_snd_tag *mst)
{
return (__containerof(mst, struct lagg_snd_tag, com));
}
/*
* Look up the port used by a specific flow. This only works for lagg
* protocols with deterministic port mappings (e.g. not roundrobin).
* In addition protocols which use a hash to map flows to ports must
* be configured to use the mbuf flowid rather than hashing packet
* contents.
*/
static struct lagg_port *
lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
{
struct lagg_softc *sc;
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t p;
sc = ifp->if_softc;
switch (sc->sc_proto) {
case LAGG_PROTO_FAILOVER:
return (lagg_link_active(sc, sc->sc_primary));
case LAGG_PROTO_LOADBALANCE:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
flowtype == M_HASHTYPE_NONE)
return (NULL);
p = flowid >> sc->flowid_shift;
p %= sc->sc_count;
lb = (struct lagg_lb *)sc->sc_psc;
lp = lb->lb_ports[p];
return (lagg_link_active(sc, lp));
case LAGG_PROTO_LACP:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
flowtype == M_HASHTYPE_NONE)
return (NULL);
return (lacp_select_tx_port_by_hash(sc, flowid));
default:
return (NULL);
}
}
static int
lagg_snd_tag_alloc(struct ifnet *ifp,
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
struct lagg_snd_tag *lst;
struct lagg_softc *sc;
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t p;
struct ifnet *lp_ifp;
int error;
sc = ifp->if_softc;
LAGG_RLOCK();
switch (sc->sc_proto) {
case LAGG_PROTO_FAILOVER:
lp = lagg_link_active(sc, sc->sc_primary);
break;
case LAGG_PROTO_LOADBALANCE:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
params->hdr.flowtype == M_HASHTYPE_NONE) {
LAGG_RUNLOCK();
return (EOPNOTSUPP);
}
p = params->hdr.flowid >> sc->flowid_shift;
p %= sc->sc_count;
lb = (struct lagg_lb *)sc->sc_psc;
lp = lb->lb_ports[p];
lp = lagg_link_active(sc, lp);
break;
case LAGG_PROTO_LACP:
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
params->hdr.flowtype == M_HASHTYPE_NONE) {
LAGG_RUNLOCK();
return (EOPNOTSUPP);
}
lp = lacp_select_tx_port_by_hash(sc, params->hdr.flowid);
break;
default:
LAGG_RUNLOCK();
return (EOPNOTSUPP);
}
lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
if (lp == NULL) {
LAGG_RUNLOCK();
return (EOPNOTSUPP);
}
ifp = lp->lp_ifp;
LAGG_RUNLOCK();
if (ifp == NULL || ifp->if_snd_tag_alloc == NULL ||
(ifp->if_capenable & IFCAP_TXRTLMT) == 0)
if (lp->lp_ifp == NULL || lp->lp_ifp->if_snd_tag_alloc == NULL) {
LAGG_RUNLOCK();
return (EOPNOTSUPP);
}
lp_ifp = lp->lp_ifp;
if_ref(lp_ifp);
LAGG_RUNLOCK();
/* forward allocation request */
return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
lst = malloc(sizeof(*lst), M_LAGG, M_NOWAIT);
if (lst == NULL) {
if_rele(lp_ifp);
return (ENOMEM);
}
error = lp_ifp->if_snd_tag_alloc(lp_ifp, params, &lst->tag);
if_rele(lp_ifp);
if (error) {
free(lst, M_LAGG);
return (error);
}
m_snd_tag_init(&lst->com, ifp);
*ppmt = &lst->com;
return (0);
}
static int
lagg_snd_tag_modify(struct m_snd_tag *mst,
union if_snd_tag_modify_params *params)
{
struct lagg_snd_tag *lst;
lst = mst_to_lst(mst);
return (lst->tag->ifp->if_snd_tag_modify(lst->tag, params));
}
static int
lagg_snd_tag_query(struct m_snd_tag *mst,
union if_snd_tag_query_params *params)
{
struct lagg_snd_tag *lst;
lst = mst_to_lst(mst);
return (lst->tag->ifp->if_snd_tag_query(lst->tag, params));
}
static void
lagg_snd_tag_free(struct m_snd_tag *tag)
lagg_snd_tag_free(struct m_snd_tag *mst)
{
tag->ifp->if_snd_tag_free(tag);
struct lagg_snd_tag *lst;
lst = mst_to_lst(mst);
m_snd_tag_rele(lst->tag);
free(lst, M_LAGG);
}
#endif
@ -1720,6 +1794,10 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
int error;
#ifdef RATELIMIT
if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
#endif
LAGG_RLOCK();
/* We need a Tx algorithm and at least one port */
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
@ -1910,6 +1988,21 @@ int
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
#ifdef RATELIMIT
if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
struct lagg_snd_tag *lst;
struct m_snd_tag *mst;
mst = m->m_pkthdr.snd_tag;
lst = mst_to_lst(mst);
if (lst->tag->ifp != ifp) {
m_freem(m);
return (EAGAIN);
}
m->m_pkthdr.snd_tag = m_snd_tag_ref(lst->tag);
m_snd_tag_rele(mst);
}
#endif
return (ifp->if_transmit)(ifp, m);
}

View File

@ -103,6 +103,20 @@ struct ifvlantrunk {
int refcnt;
};
#ifdef RATELIMIT
struct vlan_snd_tag {
struct m_snd_tag com;
struct m_snd_tag *tag;
};
static inline struct vlan_snd_tag *
mst_to_vst(struct m_snd_tag *mst)
{
return (__containerof(mst, struct vlan_snd_tag, com));
}
#endif
/*
* This macro provides a facility to iterate over every vlan on a trunk with
* the assumption that none will be added/removed during iteration.
@ -267,7 +281,11 @@ static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr);
#ifdef RATELIMIT
static int vlan_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *, struct m_snd_tag **);
static void vlan_snd_tag_free(struct m_snd_tag *);
static int vlan_snd_tag_modify(struct m_snd_tag *,
union if_snd_tag_modify_params *);
static int vlan_snd_tag_query(struct m_snd_tag *,
union if_snd_tag_query_params *);
static void vlan_snd_tag_free(struct m_snd_tag *);
#endif
static void vlan_qflush(struct ifnet *ifp);
static int vlan_setflag(struct ifnet *ifp, int flag, int status,
@ -1048,6 +1066,8 @@ vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
ifp->if_ioctl = vlan_ioctl;
#ifdef RATELIMIT
ifp->if_snd_tag_alloc = vlan_snd_tag_alloc;
ifp->if_snd_tag_modify = vlan_snd_tag_modify;
ifp->if_snd_tag_query = vlan_snd_tag_query;
ifp->if_snd_tag_free = vlan_snd_tag_free;
#endif
ifp->if_flags = VLAN_IFFLAGS;
@ -1137,6 +1157,26 @@ vlan_transmit(struct ifnet *ifp, struct mbuf *m)
BPF_MTAP(ifp, m);
#ifdef RATELIMIT
if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
struct vlan_snd_tag *vst;
struct m_snd_tag *mst;
MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
mst = m->m_pkthdr.snd_tag;
vst = mst_to_vst(mst);
if (vst->tag->ifp != p) {
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
NET_EPOCH_EXIT(et);
m_freem(m);
return (EAGAIN);
}
m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag);
m_snd_tag_rele(mst);
}
#endif
/*
* Do not run parent's if_transmit() if the parent is not up,
* or parent's driver will cause a system crash.
@ -1928,18 +1968,71 @@ vlan_snd_tag_alloc(struct ifnet *ifp,
union if_snd_tag_alloc_params *params,
struct m_snd_tag **ppmt)
{
struct epoch_tracker et;
struct vlan_snd_tag *vst;
struct ifvlan *ifv;
struct ifnet *parent;
int error;
/* get trunk device */
ifp = vlan_trunkdev(ifp);
if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0)
NET_EPOCH_ENTER(et);
ifv = ifp->if_softc;
if (ifv->ifv_trunk != NULL)
parent = PARENT(ifv);
else
parent = NULL;
if (parent == NULL || parent->if_snd_tag_alloc == NULL) {
NET_EPOCH_EXIT(et);
return (EOPNOTSUPP);
/* forward allocation request */
return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
}
if_ref(parent);
NET_EPOCH_EXIT(et);
vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT);
if (vst == NULL) {
if_rele(parent);
return (ENOMEM);
}
error = parent->if_snd_tag_alloc(parent, params, &vst->tag);
if_rele(parent);
if (error) {
free(vst, M_VLAN);
return (error);
}
m_snd_tag_init(&vst->com, ifp);
*ppmt = &vst->com;
return (0);
}
static int
vlan_snd_tag_modify(struct m_snd_tag *mst,
union if_snd_tag_modify_params *params)
{
struct vlan_snd_tag *vst;
vst = mst_to_vst(mst);
return (vst->tag->ifp->if_snd_tag_modify(vst->tag, params));
}
static int
vlan_snd_tag_query(struct m_snd_tag *mst,
union if_snd_tag_query_params *params)
{
struct vlan_snd_tag *vst;
vst = mst_to_vst(mst);
return (vst->tag->ifp->if_snd_tag_query(vst->tag, params));
}
static void
vlan_snd_tag_free(struct m_snd_tag *tag)
vlan_snd_tag_free(struct m_snd_tag *mst)
{
tag->ifp->if_snd_tag_free(tag);
struct vlan_snd_tag *vst;
vst = mst_to_vst(mst);
m_snd_tag_rele(vst->tag);
free(vst, M_VLAN);
}
#endif

View File

@ -839,6 +839,7 @@ netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy,
("%s: invalid policy %u for %s", __func__, npp->np_policy,
npp->np_name));
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
ifp = m->m_pkthdr.rcvif;
if (ifp != NULL)
*cpuidp = nws_array[(ifp->if_index + source) % nws_count];

View File

@ -2015,6 +2015,7 @@ hwmp_discover(struct ieee80211vap *vap,
*/
IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_HWMP, dest,
"%s", "queue frame until path found");
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)(uintptr_t)
ieee80211_mac_hash(ic, dest);
/* XXX age chosen randomly */

View File

@ -1225,6 +1225,7 @@ mesh_forward(struct ieee80211vap *vap, struct mbuf *m,
M_WME_SETAC(mcopy, WME_AC_BE);
/* XXX do we know m_nextpkt is NULL? */
MPASS((mcopy->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mcopy->m_pkthdr.rcvif = (void *) ni;
/*

View File

@ -163,6 +163,7 @@ ieee80211_vap_pkt_send_dest(struct ieee80211vap *vap, struct mbuf *m,
* uses any existing value for rcvif to identify the
* interface it (might have been) received on.
*/
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)ni;
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1: 0;
@ -528,6 +529,7 @@ ieee80211_raw_output(struct ieee80211vap *vap, struct ieee80211_node *ni,
* that the mbuf has the same node value that
* it would if it were going via the normal path.
*/
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)ni;
/*

View File

@ -299,6 +299,7 @@ ieee80211_dwds_mcast(struct ieee80211vap *vap0, struct mbuf *m)
continue;
}
mcopy->m_flags |= M_MCAST;
MPASS((mcopy->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mcopy->m_pkthdr.rcvif = (void *) ni;
err = ieee80211_parent_xmitpkt(ic, mcopy);
@ -332,6 +333,7 @@ ieee80211_dwds_discover(struct ieee80211_node *ni, struct mbuf *m)
* XXX handle overflow?
* XXX per/vap beacon interval?
*/
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
m->m_pkthdr.rcvif = (void *)(uintptr_t)
ieee80211_mac_hash(ic, ni->ni_macaddr);
(void) ieee80211_ageq_append(&ic->ic_stageq, m,

View File

@ -3274,13 +3274,6 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
error = EOPNOTSUPP;
} else {
error = ifp->if_snd_tag_alloc(ifp, &params, &inp->inp_snd_tag);
/*
* At success increment the refcount on
* the send tag's network interface:
*/
if (error == 0)
if_ref(inp->inp_snd_tag->ifp);
}
return (error);
}
@ -3293,7 +3286,6 @@ void
in_pcbdetach_txrtlmt(struct inpcb *inp)
{
struct m_snd_tag *mst;
struct ifnet *ifp;
INP_WLOCK_ASSERT(inp);
@ -3303,19 +3295,7 @@ in_pcbdetach_txrtlmt(struct inpcb *inp)
if (mst == NULL)
return;
ifp = mst->ifp;
if (ifp == NULL)
return;
/*
* If the device was detached while we still had reference(s)
* on the ifp, we assume if_snd_tag_free() was replaced with
* stubs.
*/
ifp->if_snd_tag_free(mst);
/* release reference count on network interface */
if_rele(ifp);
m_snd_tag_rele(mst);
}
/*
@ -3360,6 +3340,17 @@ in_pcboutput_txrtlmt(struct inpcb *inp, struct ifnet *ifp, struct mbuf *mb)
*/
max_pacing_rate = socket->so_max_pacing_rate;
/*
* If the existing send tag is for the wrong interface due to
* a route change, first drop the existing tag. Set the
* CHANGED flag so that we will keep trying to allocate a new
* tag if we fail to allocate one this time.
*/
if (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp) {
in_pcbdetach_txrtlmt(inp);
inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
}
/*
* NOTE: When attaching to a network interface a reference is
* made to ensure the network interface doesn't go away until

View File

@ -204,6 +204,51 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp,
return 0;
}
static int
ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
const struct sockaddr_in *gw, struct route *ro)
{
struct m_snd_tag *mst;
int error;
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mst = NULL;
#ifdef RATELIMIT
if (inp != NULL) {
if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
(inp->inp_snd_tag != NULL &&
inp->inp_snd_tag->ifp != ifp))
in_pcboutput_txrtlmt(inp, ifp, m);
if (inp->inp_snd_tag != NULL)
mst = inp->inp_snd_tag;
}
#endif
if (mst != NULL) {
KASSERT(m->m_pkthdr.rcvif == NULL,
("trying to add a send tag to a forwarded packet"));
if (mst->ifp != ifp) {
error = EAGAIN;
goto done;
}
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
}
error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro);
done:
/* Check for route change invalidating send tags. */
#ifdef RATELIMIT
if (error == EAGAIN)
in_pcboutput_eagain(inp);
#endif
return (error);
}
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
@ -687,23 +732,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
*/
m_clrprotoflags(m);
IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
#ifdef RATELIMIT
if (inp != NULL) {
if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
in_pcboutput_txrtlmt(inp, ifp, m);
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = inp->inp_snd_tag;
} else {
m->m_pkthdr.snd_tag = NULL;
}
#endif
error = (*ifp->if_output)(ifp, m,
(const struct sockaddr *)gw, ro);
#ifdef RATELIMIT
/* check for route change */
if (error == EAGAIN)
in_pcboutput_eagain(inp);
#endif
error = ip_output_send(inp, ifp, m, gw, ro);
goto done;
}
@ -739,23 +768,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
mtod(m, struct ip *), NULL);
#ifdef RATELIMIT
if (inp != NULL) {
if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
in_pcboutput_txrtlmt(inp, ifp, m);
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = inp->inp_snd_tag;
} else {
m->m_pkthdr.snd_tag = NULL;
}
#endif
error = (*ifp->if_output)(ifp, m,
(const struct sockaddr *)gw, ro);
#ifdef RATELIMIT
/* check for route change */
if (error == EAGAIN)
in_pcboutput_eagain(inp);
#endif
error = ip_output_send(inp, ifp, m, gw, ro);
} else
m_freem(m);
}

View File

@ -276,6 +276,51 @@ ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
return (0);
}
static int
ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp,
struct mbuf *m, struct sockaddr_in6 *dst, struct route_in6 *ro)
{
struct m_snd_tag *mst;
int error;
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
mst = NULL;
#ifdef RATELIMIT
if (inp != NULL) {
if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
(inp->inp_snd_tag != NULL &&
inp->inp_snd_tag->ifp != ifp))
in_pcboutput_txrtlmt(inp, ifp, m);
if (inp->inp_snd_tag != NULL)
mst = inp->inp_snd_tag;
}
#endif
if (mst != NULL) {
KASSERT(m->m_pkthdr.rcvif == NULL,
("trying to add a send tag to a forwarded packet"));
if (mst->ifp != ifp) {
error = EAGAIN;
goto done;
}
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
}
error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro);
done:
/* Check for route change invalidating send tags. */
#ifdef RATELIMIT
if (error == EAGAIN)
in_pcboutput_eagain(inp);
#endif
return (error);
}
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
@ -968,23 +1013,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
#ifdef RATELIMIT
if (inp != NULL) {
if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
in_pcboutput_txrtlmt(inp, ifp, m);
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = inp->inp_snd_tag;
} else {
m->m_pkthdr.snd_tag = NULL;
}
#endif
error = nd6_output_ifp(ifp, origifp, m, dst,
(struct route *)ro);
#ifdef RATELIMIT
/* check for route change */
if (error == EAGAIN)
in_pcboutput_eagain(inp);
#endif
error = ip6_output_send(inp, ifp, origifp, m, dst, ro);
goto done;
}
@ -1083,23 +1112,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
#ifdef RATELIMIT
if (inp != NULL) {
if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED)
in_pcboutput_txrtlmt(inp, ifp, m);
/* stamp send tag on mbuf */
m->m_pkthdr.snd_tag = inp->inp_snd_tag;
} else {
m->m_pkthdr.snd_tag = NULL;
}
#endif
error = nd6_output_ifp(ifp, origifp, m, dst,
(struct route *)ro);
#ifdef RATELIMIT
/* check for route change */
if (error == EAGAIN)
in_pcboutput_eagain(inp);
#endif
error = ip6_output_send(inp, ifp, origifp, m, dst, ro);
} else
m_freem(m);
}

View File

@ -1758,7 +1758,7 @@ do { \
oif = NULL;
} else {
MPASS(args->flags & IPFW_ARGS_OUT);
iif = mem ? NULL : m->m_pkthdr.rcvif;
iif = mem ? NULL : m_rcvif(m);
oif = args->ifp;
}

View File

@ -40,6 +40,7 @@
#include <sys/queue.h>
#ifdef _KERNEL
#include <sys/systm.h>
#include <sys/refcount.h>
#include <vm/uma.h>
#ifdef WITNESS
#include <sys/lock.h>
@ -138,6 +139,7 @@ struct m_tag {
*/
struct m_snd_tag {
struct ifnet *ifp; /* network interface tag belongs to */
volatile u_int refcount;
};
/*
@ -494,6 +496,8 @@ struct mbuf {
#define CSUM_L5_VALID 0x20000000 /* checksum is correct */
#define CSUM_COALESCED 0x40000000 /* contains merged segments */
#define CSUM_SND_TAG 0x80000000 /* Packet header has send tag */
/*
* CSUM flag description for use with printf(9) %b identifier.
*/
@ -503,7 +507,7 @@ struct mbuf {
"\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
"\16CSUM_IP6_ISCSI" \
"\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
"\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED"
"\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESCED\40CSUM_SND_TAG"
/* CSUM flags compatibility mappings. */
#define CSUM_IP_CHECKED CSUM_L3_CALC
@ -633,6 +637,8 @@ int m_sanity(struct mbuf *, int);
struct mbuf *m_split(struct mbuf *, int, int);
struct mbuf *m_uiotombuf(struct uio *, int, int, int, int);
struct mbuf *m_unshare(struct mbuf *, int);
void m_snd_tag_init(struct m_snd_tag *, struct ifnet *);
void m_snd_tag_destroy(struct m_snd_tag *);
static __inline int
m_gettype(int size)
@ -995,6 +1001,17 @@ m_align(struct mbuf *m, int len)
*/
#define MCHTYPE(m, t) m_chtype((m), (t))
/* Return the rcvif of a packet header. */
static __inline struct ifnet *
m_rcvif(struct mbuf *m)
{
M_ASSERTPKTHDR(m);
if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
return (NULL);
return (m->m_pkthdr.rcvif);
}
/* Length to m_copy to copy all. */
#define M_COPYALL 1000000000
@ -1185,6 +1202,22 @@ m_tag_find(struct mbuf *m, int type, struct m_tag *start)
m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
}
static inline struct m_snd_tag *
m_snd_tag_ref(struct m_snd_tag *mst)
{
refcount_acquire(&mst->refcount);
return (mst);
}
static inline void
m_snd_tag_rele(struct m_snd_tag *mst)
{
if (refcount_release(&mst->refcount))
m_snd_tag_destroy(mst);
}
static __inline struct mbuf *
m_free(struct mbuf *m)
{
@ -1193,6 +1226,8 @@ m_free(struct mbuf *m)
MBUF_PROBE1(m__free, m);
if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
m_tag_delete_chain(m, NULL);
if (m->m_flags & M_PKTHDR && m->m_pkthdr.csum_flags & CSUM_SND_TAG)
m_snd_tag_rele(m->m_pkthdr.snd_tag);
if (m->m_flags & M_EXT)
mb_free_ext(m);
else if ((m->m_flags & M_NOFREE) == 0)