xen/netfront: deal with mbuf data crossing a page boundary
There's been a report recently of mbufs with data that crosses a page boundary. It seems those mbufs are generated by the iSCSI target system: https://lists.xenproject.org/archives/html/xen-devel/2021-12/msg01581.html In order to handle those mbufs correctly on netfront use the bus_dma interface and explicitly request that segments must not cross a page boundary. No other requirements are necessary, so it's expected that bus_dma won't need to bounce the data and hence it shouldn't introduce a too big performance penalty. Using bus_dma requires some changes to netfront, mainly in order to accommodate for the fact that now ring slots no longer have a 1:1 match with mbufs, as a single mbuf can use two ring slots if the data buffer crosses a page boundary. Store the first packet of the mbuf chain in every ring slot that's used, and use a mbuf tag in order to store the bus_dma related structures and a refcount to keep track of the pending slots before the mbuf chain can be freed. Reported by: G.R. Tested by: G.R. MFC: 1 week Differential revision: https://reviews.freebsd.org/D33876
This commit is contained in:
parent
402c04af1b
commit
dabb3db7a8
@ -71,6 +71,8 @@ __FBSDID("$FreeBSD$");
|
||||
#include <contrib/xen/io/netif.h>
|
||||
#include <xen/xenbus/xenbusvar.h>
|
||||
|
||||
#include <machine/bus.h>
|
||||
|
||||
#include "xenbus_if.h"
|
||||
|
||||
/* Features supported by all backends. TSO and LRO can be negotiated */
|
||||
@ -126,7 +128,6 @@ static void xn_release_tx_bufs(struct netfront_txq *);
|
||||
static void xn_rxq_intr(struct netfront_rxq *);
|
||||
static void xn_txq_intr(struct netfront_txq *);
|
||||
static void xn_intr(void *);
|
||||
static inline int xn_count_frags(struct mbuf *m);
|
||||
static int xn_assemble_tx_request(struct netfront_txq *, struct mbuf *);
|
||||
static int xn_ioctl(struct ifnet *, u_long, caddr_t);
|
||||
static void xn_ifinit_locked(struct netfront_info *);
|
||||
@ -199,6 +200,17 @@ struct netfront_txq {
|
||||
struct taskqueue *tq;
|
||||
struct task defrtask;
|
||||
|
||||
bus_dma_segment_t segs[MAX_TX_REQ_FRAGS];
|
||||
struct mbuf_xennet {
|
||||
struct m_tag tag;
|
||||
bus_dma_tag_t dma_tag;
|
||||
bus_dmamap_t dma_map;
|
||||
struct netfront_txq *txq;
|
||||
SLIST_ENTRY(mbuf_xennet) next;
|
||||
u_int count;
|
||||
} xennet_tag[NET_TX_RING_SIZE + 1];
|
||||
SLIST_HEAD(, mbuf_xennet) tags;
|
||||
|
||||
bool full;
|
||||
};
|
||||
|
||||
@ -221,6 +233,8 @@ struct netfront_info {
|
||||
|
||||
struct ifmedia sc_media;
|
||||
|
||||
bus_dma_tag_t dma_tag;
|
||||
|
||||
bool xn_reset;
|
||||
};
|
||||
|
||||
@ -301,6 +315,42 @@ xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri)
|
||||
return (ref);
|
||||
}
|
||||
|
||||
#define MTAG_COOKIE 1218492000
|
||||
#define MTAG_XENNET 0
|
||||
|
||||
static void mbuf_grab(struct mbuf *m)
|
||||
{
|
||||
struct mbuf_xennet *ref;
|
||||
|
||||
ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE,
|
||||
MTAG_XENNET, NULL);
|
||||
KASSERT(ref != NULL, ("Cannot find refcount"));
|
||||
ref->count++;
|
||||
}
|
||||
|
||||
static void mbuf_release(struct mbuf *m)
|
||||
{
|
||||
struct mbuf_xennet *ref;
|
||||
|
||||
ref = (struct mbuf_xennet *)m_tag_locate(m, MTAG_COOKIE,
|
||||
MTAG_XENNET, NULL);
|
||||
KASSERT(ref != NULL, ("Cannot find refcount"));
|
||||
KASSERT(ref->count > 0, ("Invalid reference count"));
|
||||
|
||||
if (--ref->count == 0)
|
||||
m_freem(m);
|
||||
}
|
||||
|
||||
static void tag_free(struct m_tag *t)
|
||||
{
|
||||
struct mbuf_xennet *ref = (struct mbuf_xennet *)t;
|
||||
|
||||
KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt"));
|
||||
bus_dmamap_sync(ref->dma_tag, ref->dma_map, BUS_DMASYNC_POSTWRITE);
|
||||
bus_dmamap_destroy(ref->dma_tag, ref->dma_map);
|
||||
SLIST_INSERT_HEAD(&ref->txq->tags, ref, next);
|
||||
}
|
||||
|
||||
#define IPRINTK(fmt, args...) \
|
||||
printf("[XEN] " fmt, ##args)
|
||||
#ifdef INVARIANTS
|
||||
@ -778,11 +828,18 @@ disconnect_txq(struct netfront_txq *txq)
|
||||
static void
|
||||
destroy_txq(struct netfront_txq *txq)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
free(txq->ring.sring, M_DEVBUF);
|
||||
buf_ring_free(txq->br, M_DEVBUF);
|
||||
taskqueue_drain_all(txq->tq);
|
||||
taskqueue_free(txq->tq);
|
||||
|
||||
for (i = 0; i <= NET_TX_RING_SIZE; i++) {
|
||||
bus_dmamap_destroy(txq->info->dma_tag,
|
||||
txq->xennet_tag[i].dma_map);
|
||||
txq->xennet_tag[i].dma_map = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -822,10 +879,27 @@ setup_txqs(device_t dev, struct netfront_info *info,
|
||||
|
||||
mtx_init(&txq->lock, txq->name, "netfront transmit lock",
|
||||
MTX_DEF);
|
||||
SLIST_INIT(&txq->tags);
|
||||
|
||||
for (i = 0; i <= NET_TX_RING_SIZE; i++) {
|
||||
txq->mbufs[i] = (void *) ((u_long) i+1);
|
||||
txq->grant_ref[i] = GRANT_REF_INVALID;
|
||||
txq->xennet_tag[i].txq = txq;
|
||||
txq->xennet_tag[i].dma_tag = info->dma_tag;
|
||||
error = bus_dmamap_create(info->dma_tag, 0,
|
||||
&txq->xennet_tag[i].dma_map);
|
||||
if (error != 0) {
|
||||
device_printf(dev,
|
||||
"failed to allocate dma map\n");
|
||||
goto fail;
|
||||
}
|
||||
m_tag_setup(&txq->xennet_tag[i].tag,
|
||||
MTAG_COOKIE, MTAG_XENNET,
|
||||
sizeof(txq->xennet_tag[i]) -
|
||||
sizeof(txq->xennet_tag[i].tag));
|
||||
txq->xennet_tag[i].tag.m_tag_free = &tag_free;
|
||||
SLIST_INSERT_HEAD(&txq->tags, &txq->xennet_tag[i],
|
||||
next);
|
||||
}
|
||||
txq->mbufs[NET_TX_RING_SIZE] = (void *)0;
|
||||
|
||||
@ -1041,7 +1115,7 @@ xn_release_tx_bufs(struct netfront_txq *txq)
|
||||
if (txq->mbufs_cnt < 0) {
|
||||
panic("%s: tx_chain_cnt must be >= 0", __func__);
|
||||
}
|
||||
m_free(m);
|
||||
mbuf_release(m);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1311,7 +1385,7 @@ xn_txeof(struct netfront_txq *txq)
|
||||
txq->mbufs[id] = NULL;
|
||||
add_id_to_freelist(txq->mbufs, id);
|
||||
txq->mbufs_cnt--;
|
||||
m_free(m);
|
||||
mbuf_release(m);
|
||||
/* Only mark the txq active if we've freed up at least one slot to try */
|
||||
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
|
||||
}
|
||||
@ -1507,22 +1581,6 @@ xn_get_responses(struct netfront_rxq *rxq,
|
||||
return (err);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Count the number of fragments in an mbuf chain.
|
||||
*
|
||||
* Surprisingly, there isn't an M* macro for this.
|
||||
*/
|
||||
static inline int
|
||||
xn_count_frags(struct mbuf *m)
|
||||
{
|
||||
int nfrags;
|
||||
|
||||
for (nfrags = 0; m != NULL; m = m->m_next)
|
||||
nfrags++;
|
||||
|
||||
return (nfrags);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an mbuf chain, make sure we have enough room and then push
|
||||
* it onto the transmit ring.
|
||||
@ -1530,46 +1588,50 @@ xn_count_frags(struct mbuf *m)
|
||||
static int
|
||||
xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
|
||||
{
|
||||
struct mbuf *m;
|
||||
struct netfront_info *np = txq->info;
|
||||
struct ifnet *ifp = np->xn_ifp;
|
||||
u_int nfrags;
|
||||
int otherend_id;
|
||||
int otherend_id, error, nfrags;
|
||||
bus_dma_segment_t *segs = txq->segs;
|
||||
struct mbuf_xennet *tag;
|
||||
bus_dmamap_t map;
|
||||
unsigned int i;
|
||||
|
||||
/**
|
||||
* Defragment the mbuf if necessary.
|
||||
*/
|
||||
nfrags = xn_count_frags(m_head);
|
||||
KASSERT(!SLIST_EMPTY(&txq->tags), ("no tags available"));
|
||||
tag = SLIST_FIRST(&txq->tags);
|
||||
SLIST_REMOVE_HEAD(&txq->tags, next);
|
||||
KASSERT(tag->count == 0, ("tag already in-use"));
|
||||
map = tag->dma_map;
|
||||
error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs,
|
||||
&nfrags, 0);
|
||||
if (error == EFBIG || nfrags > np->maxfrags) {
|
||||
struct mbuf *m;
|
||||
|
||||
/*
|
||||
* Check to see whether this request is longer than netback
|
||||
* can handle, and try to defrag it.
|
||||
*/
|
||||
/**
|
||||
* It is a bit lame, but the netback driver in Linux can't
|
||||
* deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of
|
||||
* the Linux network stack.
|
||||
*/
|
||||
if (nfrags > np->maxfrags) {
|
||||
bus_dmamap_unload(np->dma_tag, map);
|
||||
m = m_defrag(m_head, M_NOWAIT);
|
||||
if (!m) {
|
||||
/*
|
||||
* Defrag failed, so free the mbuf and
|
||||
* therefore drop the packet.
|
||||
*/
|
||||
SLIST_INSERT_HEAD(&txq->tags, tag, next);
|
||||
m_freem(m_head);
|
||||
return (EMSGSIZE);
|
||||
}
|
||||
m_head = m;
|
||||
error = bus_dmamap_load_mbuf_sg(np->dma_tag, map, m_head, segs,
|
||||
&nfrags, 0);
|
||||
if (error != 0 || nfrags > np->maxfrags) {
|
||||
bus_dmamap_unload(np->dma_tag, map);
|
||||
SLIST_INSERT_HEAD(&txq->tags, tag, next);
|
||||
m_freem(m_head);
|
||||
return (error ?: EFBIG);
|
||||
}
|
||||
} else if (error != 0) {
|
||||
SLIST_INSERT_HEAD(&txq->tags, tag, next);
|
||||
m_freem(m_head);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/* Determine how many fragments now exist */
|
||||
nfrags = xn_count_frags(m_head);
|
||||
|
||||
/*
|
||||
* Check to see whether the defragmented packet has too many
|
||||
* segments for the Linux netback driver.
|
||||
*/
|
||||
/**
|
||||
* The FreeBSD TCP stack, with TSO enabled, can produce a chain
|
||||
* of mbufs longer than Linux can handle. Make sure we don't
|
||||
@ -1583,6 +1645,8 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
|
||||
"won't be able to handle it, dropping\n",
|
||||
__func__, nfrags, MAX_TX_REQ_FRAGS);
|
||||
#endif
|
||||
SLIST_INSERT_HEAD(&txq->tags, tag, next);
|
||||
bus_dmamap_unload(np->dma_tag, map);
|
||||
m_freem(m_head);
|
||||
return (EMSGSIZE);
|
||||
}
|
||||
@ -1604,9 +1668,9 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
|
||||
* the fragment pointers. Stop when we run out
|
||||
* of fragments or hit the end of the mbuf chain.
|
||||
*/
|
||||
m = m_head;
|
||||
otherend_id = xenbus_get_otherend_id(np->xbdev);
|
||||
for (m = m_head; m; m = m->m_next) {
|
||||
m_tag_prepend(m_head, &tag->tag);
|
||||
for (i = 0; i < nfrags; i++) {
|
||||
netif_tx_request_t *tx;
|
||||
uintptr_t id;
|
||||
grant_ref_t ref;
|
||||
@ -1621,17 +1685,20 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
|
||||
if (txq->mbufs_cnt > NET_TX_RING_SIZE)
|
||||
panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n",
|
||||
__func__);
|
||||
txq->mbufs[id] = m;
|
||||
mbuf_grab(m_head);
|
||||
txq->mbufs[id] = m_head;
|
||||
tx->id = id;
|
||||
ref = gnttab_claim_grant_reference(&txq->gref_head);
|
||||
KASSERT((short)ref >= 0, ("Negative ref"));
|
||||
mfn = virt_to_mfn(mtod(m, vm_offset_t));
|
||||
mfn = atop(segs[i].ds_addr);
|
||||
gnttab_grant_foreign_access_ref(ref, otherend_id,
|
||||
mfn, GNTMAP_readonly);
|
||||
tx->gref = txq->grant_ref[id] = ref;
|
||||
tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
|
||||
tx->offset = segs[i].ds_addr & PAGE_MASK;
|
||||
KASSERT(tx->offset + segs[i].ds_len <= PAGE_SIZE,
|
||||
("mbuf segment crosses a page boundary"));
|
||||
tx->flags = 0;
|
||||
if (m == m_head) {
|
||||
if (i == 0) {
|
||||
/*
|
||||
* The first fragment has the entire packet
|
||||
* size, subsequent fragments have just the
|
||||
@ -1640,7 +1707,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
|
||||
* subtracting the sizes of the other
|
||||
* fragments.
|
||||
*/
|
||||
tx->size = m->m_pkthdr.len;
|
||||
tx->size = m_head->m_pkthdr.len;
|
||||
|
||||
/*
|
||||
* The first fragment contains the checksum flags
|
||||
@ -1654,12 +1721,12 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
|
||||
* so we have to test for CSUM_TSO
|
||||
* explicitly.
|
||||
*/
|
||||
if (m->m_pkthdr.csum_flags
|
||||
if (m_head->m_pkthdr.csum_flags
|
||||
& (CSUM_DELAY_DATA | CSUM_TSO)) {
|
||||
tx->flags |= (NETTXF_csum_blank
|
||||
| NETTXF_data_validated);
|
||||
}
|
||||
if (m->m_pkthdr.csum_flags & CSUM_TSO) {
|
||||
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
|
||||
struct netif_extra_info *gso =
|
||||
(struct netif_extra_info *)
|
||||
RING_GET_REQUEST(&txq->ring,
|
||||
@ -1667,7 +1734,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
|
||||
|
||||
tx->flags |= NETTXF_extra_info;
|
||||
|
||||
gso->u.gso.size = m->m_pkthdr.tso_segsz;
|
||||
gso->u.gso.size = m_head->m_pkthdr.tso_segsz;
|
||||
gso->u.gso.type =
|
||||
XEN_NETIF_GSO_TYPE_TCPV4;
|
||||
gso->u.gso.pad = 0;
|
||||
@ -1677,13 +1744,14 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
|
||||
gso->flags = 0;
|
||||
}
|
||||
} else {
|
||||
tx->size = m->m_len;
|
||||
tx->size = segs[i].ds_len;
|
||||
}
|
||||
if (m->m_next)
|
||||
if (i != nfrags - 1)
|
||||
tx->flags |= NETTXF_more_data;
|
||||
|
||||
txq->ring.req_prod_pvt++;
|
||||
}
|
||||
bus_dmamap_sync(np->dma_tag, map, BUS_DMASYNC_PREWRITE);
|
||||
BPF_MTAP(ifp, m_head);
|
||||
|
||||
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
|
||||
@ -2244,7 +2312,20 @@ create_netdev(device_t dev)
|
||||
ether_ifattach(ifp, np->mac);
|
||||
netfront_carrier_off(np);
|
||||
|
||||
return (0);
|
||||
err = bus_dma_tag_create(
|
||||
bus_get_dma_tag(dev), /* parent */
|
||||
1, PAGE_SIZE, /* algnmnt, boundary */
|
||||
BUS_SPACE_MAXADDR, /* lowaddr */
|
||||
BUS_SPACE_MAXADDR, /* highaddr */
|
||||
NULL, NULL, /* filter, filterarg */
|
||||
PAGE_SIZE * MAX_TX_REQ_FRAGS, /* max request size */
|
||||
MAX_TX_REQ_FRAGS, /* max segments */
|
||||
PAGE_SIZE, /* maxsegsize */
|
||||
BUS_DMA_ALLOCNOW, /* flags */
|
||||
NULL, NULL, /* lockfunc, lockarg */
|
||||
&np->dma_tag);
|
||||
|
||||
return (err);
|
||||
|
||||
error:
|
||||
KASSERT(err != 0, ("Error path with no error code specified"));
|
||||
@ -2277,6 +2358,7 @@ netif_free(struct netfront_info *np)
|
||||
if_free(np->xn_ifp);
|
||||
np->xn_ifp = NULL;
|
||||
ifmedia_removeall(&np->sc_media);
|
||||
bus_dma_tag_destroy(np->dma_tag);
|
||||
}
|
||||
|
||||
static void
|
||||
|
Loading…
Reference in New Issue
Block a user