netmap: add kernel support for the "offsets" feature

This feature enables applications to ask netmap to transmit or
receive packets starting at a user-specified offset from the
beginning of the netmap buffer. This is meant to ease those
packet manipulation operations such as pushing or popping packet
headers, that may be useful to implement software switches,
routers and other packet processors.
To use the feature, drivers (e.g., iflib, vtnet, etc.) must have
explicit support. This change does not add support for any driver,
but introduces the necessary kernel changes. However, offsets support
is already included for VALE ports and pipes.
This commit is contained in:
Vincenzo Maffione 2021-03-29 18:22:48 +02:00
parent 9d81dd5404
commit a6d768d845
15 changed files with 937 additions and 330 deletions

View File

@ -805,6 +805,14 @@ netmap_update_config(struct netmap_adapter *na)
static int netmap_txsync_to_host(struct netmap_kring *kring, int flags);
static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags);
static int
netmap_default_bufcfg(struct netmap_kring *kring, uint64_t target)
{
kring->hwbuf_len = target;
kring->buf_align = 0; /* no alignment */
return 0;
}
/* create the krings array and initialize the fields common to all adapters.
* The array layout is this:
*
@ -885,12 +893,16 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
kring->nr_pending_mode = NKR_NETMAP_OFF;
if (i < nma_get_nrings(na, t)) {
kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
kring->nm_bufcfg = na->nm_bufcfg;
if (kring->nm_bufcfg == NULL)
kring->nm_bufcfg = netmap_default_bufcfg;
} else {
if (!(na->na_flags & NAF_HOST_RINGS))
kring->nr_kflags |= NKR_FAKERING;
kring->nm_sync = (t == NR_TX ?
netmap_txsync_to_host:
netmap_rxsync_from_host);
kring->nm_bufcfg = netmap_default_bufcfg;
}
kring->nm_notify = na->nm_notify;
kring->rhead = kring->rcur = kring->nr_hwcur = 0;
@ -969,20 +981,27 @@ netmap_hw_krings_delete(struct netmap_adapter *na)
netmap_krings_delete(na);
}
static void
netmap_mem_drop(struct netmap_adapter *na)
void
netmap_mem_restore(struct netmap_adapter *na)
{
int last = netmap_mem_deref(na->nm_mem, na);
/* if the native allocator had been overrided on regif,
* restore it now and drop the temporary one
*/
if (last && na->nm_mem_prev) {
if (na->nm_mem_prev) {
netmap_mem_put(na->nm_mem);
na->nm_mem = na->nm_mem_prev;
na->nm_mem_prev = NULL;
}
}
static void
netmap_mem_drop(struct netmap_adapter *na)
{
/* if the native allocator had been overrided on regif,
* restore it now and drop the temporary one
*/
if (netmap_mem_deref(na->nm_mem, na)) {
netmap_mem_restore(na);
}
}
/*
* Undo everything that was done in netmap_do_regif(). In particular,
* call nm_register(ifp,0) to stop netmap mode on the interface and
@ -1571,7 +1590,7 @@ netmap_get_na(struct nmreq_header *hdr,
if (error || *na != NULL)
goto out;
/* try to see if this is a bridge port */
/* try to see if this is a vale port */
error = netmap_get_vale_na(hdr, na, nmd, create);
if (error)
goto out;
@ -2232,6 +2251,198 @@ netmap_buf_size_validate(const struct netmap_adapter *na, unsigned mtu) {
return 0;
}
/* Handle the offset option, if present in the hdr.
* Returns 0 on success, or an error.
*/
static int
netmap_offsets_init(struct netmap_priv_d *priv, struct nmreq_header *hdr)
{
struct nmreq_opt_offsets *opt;
struct netmap_adapter *na = priv->np_na;
struct netmap_kring *kring;
uint64_t mask = 0, bits = 0, maxbits = sizeof(uint64_t) * 8,
max_offset = 0, initial_offset = 0, min_gap = 0;
u_int i;
enum txrx t;
int error = 0;
opt = (struct nmreq_opt_offsets *)
nmreq_getoption(hdr, NETMAP_REQ_OPT_OFFSETS);
if (opt == NULL)
return 0;
if (!(na->na_flags & NAF_OFFSETS)) {
if (netmap_verbose)
nm_prerr("%s does not support offsets",
na->name);
error = EOPNOTSUPP;
goto out;
}
/* check sanity of the opt values */
max_offset = opt->nro_max_offset;
min_gap = opt->nro_min_gap;
initial_offset = opt->nro_initial_offset;
bits = opt->nro_offset_bits;
if (bits > maxbits) {
if (netmap_verbose)
nm_prerr("bits: %llu too large (max %llu)",
(unsigned long long)bits,
(unsigned long long)maxbits);
error = EINVAL;
goto out;
}
/* we take bits == 0 as a request to use the entire field */
if (bits == 0 || bits == maxbits) {
/* shifting a type by sizeof(type) is undefined */
bits = maxbits;
mask = 0xffffffffffffffff;
} else {
mask = (1ULL << bits) - 1;
}
if (max_offset > NETMAP_BUF_SIZE(na)) {
if (netmap_verbose)
nm_prerr("max offset %llu > buf size %u",
(unsigned long long)max_offset, NETMAP_BUF_SIZE(na));
error = EINVAL;
goto out;
}
if ((max_offset & mask) != max_offset) {
if (netmap_verbose)
nm_prerr("max offset %llu to large for %llu bits",
(unsigned long long)max_offset,
(unsigned long long)bits);
error = EINVAL;
goto out;
}
if (initial_offset > max_offset) {
if (netmap_verbose)
nm_prerr("initial offset %llu > max offset %llu",
(unsigned long long)initial_offset,
(unsigned long long)max_offset);
error = EINVAL;
goto out;
}
/* initialize the kring and ring fields. */
foreach_selected_ring(priv, t, i, kring) {
struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
u_int j;
/* it the ring is already in use we check that the
* new request is compatible with the existing one
*/
if (kring->offset_mask) {
if ((kring->offset_mask & mask) != mask ||
kring->offset_max < max_offset) {
if (netmap_verbose)
nm_prinf("%s: cannot increase"
"offset mask and/or max"
"(current: mask=%llx,max=%llu",
kring->name,
(unsigned long long)kring->offset_mask,
(unsigned long long)kring->offset_max);
error = EBUSY;
goto out;
}
mask = kring->offset_mask;
max_offset = kring->offset_max;
} else {
kring->offset_mask = mask;
*(uint64_t *)(uintptr_t)&ring->offset_mask = mask;
kring->offset_max = max_offset;
kring->offset_gap = min_gap;
}
/* if there is an initial offset, put it into
* all the slots
*
* Note: we cannot change the offsets if the
* ring is already in use.
*/
if (!initial_offset || kring->users > 1)
continue;
for (j = 0; j < kring->nkr_num_slots; j++) {
struct netmap_slot *slot = ring->slot + j;
nm_write_offset(kring, slot, initial_offset);
}
}
out:
opt->nro_opt.nro_status = error;
if (!error) {
opt->nro_max_offset = max_offset;
}
return error;
}
static int
netmap_compute_buf_len(struct netmap_priv_d *priv)
{
enum txrx t;
u_int i;
struct netmap_kring *kring;
int error = 0;
unsigned mtu = 0;
struct netmap_adapter *na = priv->np_na;
uint64_t target, maxframe;
if (na->ifp != NULL)
mtu = nm_os_ifnet_mtu(na->ifp);
foreach_selected_ring(priv, t, i, kring) {
if (kring->users > 1)
continue;
target = NETMAP_BUF_SIZE(kring->na) -
kring->offset_max;
if (!kring->offset_gap)
kring->offset_gap =
NETMAP_BUF_SIZE(kring->na);
if (kring->offset_gap < target)
target = kring->offset_gap;
if (mtu) {
maxframe = mtu + ETH_HLEN +
ETH_FCS_LEN + VLAN_HLEN;
if (maxframe < target) {
target = kring->offset_gap;
}
}
error = kring->nm_bufcfg(kring, target);
if (error)
goto out;
*(uint64_t *)(uintptr_t)&kring->ring->buf_align = kring->buf_align;
if (mtu && t == NR_RX && kring->hwbuf_len < mtu) {
if (!(na->na_flags & NAF_MOREFRAG)) {
nm_prerr("error: large MTU (%d) needed "
"but %s does not support "
"NS_MOREFRAG", mtu,
na->name);
error = EINVAL;
goto out;
} else {
nm_prinf("info: netmap application on "
"%s needs to support "
"NS_MOREFRAG "
"(MTU=%u,buf_size=%llu)",
kring->name, mtu,
(unsigned long long)kring->hwbuf_len);
}
}
}
out:
return error;
}
/*
* possibly move the interface to netmap-mode.
@ -2381,6 +2592,16 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
if (error)
goto err_rel_excl;
/* initialize offsets if requested */
error = netmap_offsets_init(priv, hdr);
if (error)
goto err_rel_excl;
/* compute and validate the buf lenghts */
error = netmap_compute_buf_len(priv);
if (error)
goto err_rel_excl;
/* in all cases, create a new netmap if */
nifp = netmap_mem_if_new(na, priv);
if (nifp == NULL) {
@ -2713,17 +2934,12 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
}
#ifdef WITH_VALE
case NETMAP_REQ_VALE_ATTACH: {
error = netmap_vale_attach(hdr, NULL /* userspace request */);
error = netmap_bdg_attach(hdr, NULL /* userspace request */);
break;
}
case NETMAP_REQ_VALE_DETACH: {
error = netmap_vale_detach(hdr, NULL /* userspace request */);
break;
}
case NETMAP_REQ_VALE_LIST: {
error = netmap_vale_list(hdr);
error = netmap_bdg_detach(hdr, NULL /* userspace request */);
break;
}
@ -2795,6 +3011,11 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
break;
}
case NETMAP_REQ_VALE_LIST: {
error = netmap_vale_list(hdr);
break;
}
case NETMAP_REQ_VALE_NEWIF: {
error = nm_vi_create(hdr);
break;
@ -2804,13 +3025,13 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
error = nm_vi_destroy(hdr->nr_name);
break;
}
#endif /* WITH_VALE */
case NETMAP_REQ_VALE_POLLING_ENABLE:
case NETMAP_REQ_VALE_POLLING_DISABLE: {
error = nm_bdg_polling(hdr);
break;
}
#endif /* WITH_VALE */
case NETMAP_REQ_POOLS_INFO_GET: {
/* Get information from the memory allocator used for
* hdr->nr_name. */
@ -3029,6 +3250,9 @@ nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size)
case NETMAP_REQ_OPT_SYNC_KLOOP_MODE:
rv = sizeof(struct nmreq_opt_sync_kloop_mode);
break;
case NETMAP_REQ_OPT_OFFSETS:
rv = sizeof(struct nmreq_opt_offsets);
break;
}
/* subtract the common header */
return rv - sizeof(struct nmreq_option);
@ -3733,16 +3957,14 @@ netmap_attach_common(struct netmap_adapter *na)
na->active_fds = 0;
if (na->nm_mem == NULL) {
/* use the global allocator */
na->nm_mem = netmap_mem_get(&nm_mem);
/* use iommu or global allocator */
na->nm_mem = netmap_mem_get_iommu(na);
}
#ifdef WITH_VALE
if (na->nm_bdg_attach == NULL)
/* no special nm_bdg_attach callback. On VALE
* attach, we need to interpose a bwrap
*/
na->nm_bdg_attach = netmap_default_bdg_attach;
#endif
return 0;
}

View File

@ -540,6 +540,85 @@ netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
return error;
}
/* Process NETMAP_REQ_VALE_ATTACH.
*/
int
netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_attach *req =
(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter * vpna;
struct netmap_adapter *na = NULL;
struct netmap_mem_d *nmd = NULL;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
if (req->reg.nr_mem_id) {
nmd = netmap_mem_find(req->reg.nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto unlock_exit;
}
}
/* check for existing one */
error = netmap_get_vale_na(hdr, &na, nmd, 0);
if (na) {
error = EBUSY;
goto unref_exit;
}
error = netmap_get_vale_na(hdr, &na,
nmd, 1 /* create if not exists */);
if (error) { /* no device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
}
if (NETMAP_OWNED_BY_ANY(na)) {
error = EBUSY;
goto unref_exit;
}
if (na->nm_bdg_ctl) {
/* nop for VALE ports. The bwrap needs to put the hwna
* in netmap mode (see netmap_bwrap_bdg_ctl)
*/
error = na->nm_bdg_ctl(hdr, na);
if (error)
goto unref_exit;
nm_prdis("registered %s to netmap-mode", na->name);
}
vpna = (struct netmap_vp_adapter *)na;
req->port_index = vpna->bdg_port;
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
return 0;
unref_exit:
netmap_adapter_put(na);
unlock_exit:
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
return error;
}
int
nm_is_bwrap(struct netmap_adapter *na)
@ -547,6 +626,74 @@ nm_is_bwrap(struct netmap_adapter *na)
return na->nm_register == netmap_bwrap_reg;
}
/* Process NETMAP_REQ_VALE_DETACH.
*/
int
netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token)
{
int error;
NMG_LOCK();
error = netmap_bdg_detach_locked(hdr, auth_token);
NMG_UNLOCK();
return error;
}
int
netmap_bdg_detach_locked(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
struct nm_bridge *b = NULL;
int error;
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto error_exit;
}
error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
if (error) { /* no device, or another bridge or user owns the device */
goto error_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto error_exit;
} else if (nm_is_bwrap(na) &&
((struct netmap_bwrap_adapter *)na)->na_polling_state) {
/* Don't detach a NIC with polling */
error = EBUSY;
goto unref_exit;
}
vpna = (struct netmap_vp_adapter *)na;
if (na->na_vp != vpna) {
/* trying to detach first attach of VALE persistent port attached
* to 2 bridges
*/
error = EBUSY;
goto unref_exit;
}
nmreq_det->port_index = vpna->bdg_port;
if (na->nm_bdg_ctl) {
/* remove the port from bridge. The bwrap
* also needs to put the hwna in normal mode
*/
error = na->nm_bdg_ctl(hdr, na);
}
unref_exit:
netmap_adapter_put(na);
error_exit:
return error;
}
struct nm_bdg_polling_state;
struct
@ -1092,7 +1239,7 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
* hwna rx ring.
* The bridge wrapper then sends the packets through the bridge.
*/
static int
int
netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
@ -1217,7 +1364,7 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
/* intercept the hwna nm_nofify callback on the hw rings */
for (i = 0; i < hwna->num_rx_rings; i++) {
hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
hwna->rx_rings[i]->nm_notify = bna->nm_intr_notify;
}
i = hwna->num_rx_rings; /* for safety */
/* save the host ring notify unconditionally */
@ -1250,12 +1397,6 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
hwna->na_lut.objtotal = 0;
hwna->na_lut.objsize = 0;
/* pass ownership of the netmap rings to the hwna */
for_rx_tx(t) {
for (i = 0; i < netmap_all_rings(na, t); i++) {
NMR(na, t)[i]->ring = NULL;
}
}
/* reset the number of host rings to default */
for_rx_tx(t) {
nma_set_host_nrings(hwna, t, 1);
@ -1275,6 +1416,11 @@ netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
struct netmap_adapter *hwna = bna->hwna;
int error;
/* cache the lut in the embedded host adapter */
error = netmap_mem_get_lut(hwna->nm_mem, &bna->host.up.na_lut);
if (error)
return error;
/* Forward the request to the hwna. It may happen that nobody
* registered hwna yet, so netmap_mem_get_lut() may have not
* been called yet. */
@ -1289,9 +1435,69 @@ netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
info->num_rx_descs = hwna->num_tx_desc;
info->rx_buf_maxsize = hwna->rx_buf_maxsize;
if (na->na_flags & NAF_HOST_RINGS) {
struct netmap_adapter *hostna = &bna->host.up;
enum txrx t;
/* limit the number of host rings to that of hw */
if (na->na_flags & NAF_HOST_ALL) {
hostna->num_tx_rings = nma_get_nrings(hwna, NR_RX);
hostna->num_rx_rings = nma_get_nrings(hwna, NR_TX);
} else {
nm_bound_var(&hostna->num_tx_rings, 1, 1,
nma_get_nrings(hwna, NR_TX), NULL);
nm_bound_var(&hostna->num_rx_rings, 1, 1,
nma_get_nrings(hwna, NR_RX), NULL);
}
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t);
u_int nr = nma_get_nrings(hostna, t);
nma_set_host_nrings(na, t, nr);
if (nma_get_host_nrings(hwna, t) < nr) {
nma_set_host_nrings(hwna, t, nr);
}
nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
}
}
return 0;
}
/* nm_bufcfg callback for bwrap */
static int
netmap_bwrap_bufcfg(struct netmap_kring *kring, uint64_t target)
{
struct netmap_adapter *na = kring->na;
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
struct netmap_adapter *hwna = bna->hwna;
struct netmap_kring *hwkring;
enum txrx r;
int error;
/* we need the hw kring that corresponds to the bwrap one:
* remember that rx and tx are swapped
*/
r = nm_txrx_swap(kring->tx);
hwkring = NMR(hwna, r)[kring->ring_id];
/* copy down the offset information, forward the request
* and copy up the results
*/
hwkring->offset_mask = kring->offset_mask;
hwkring->offset_max = kring->offset_max;
hwkring->offset_gap = kring->offset_gap;
error = hwkring->nm_bufcfg(hwkring, target);
if (error)
return error;
kring->hwbuf_len = hwkring->hwbuf_len;
kring->buf_align = hwkring->buf_align;
return 0;
}
/* nm_krings_create callback for bwrap */
int
@ -1314,6 +1520,9 @@ netmap_bwrap_krings_create_common(struct netmap_adapter *na)
for_rx_tx(t) {
for (i = 0; i < netmap_all_rings(hwna, t); i++) {
NMR(hwna, t)[i]->users++;
/* this to prevent deletion of the rings through
* our krings, instead of through the hwna ones */
NMR(na, t)[i]->nr_kflags |= NKR_NEEDRING;
}
}
@ -1355,6 +1564,7 @@ netmap_bwrap_krings_create_common(struct netmap_adapter *na)
for_rx_tx(t) {
for (i = 0; i < netmap_all_rings(hwna, t); i++) {
NMR(hwna, t)[i]->users--;
NMR(na, t)[i]->users--;
}
}
hwna->nm_krings_delete(hwna);
@ -1377,6 +1587,7 @@ netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
for_rx_tx(t) {
for (i = 0; i < netmap_all_rings(hwna, t); i++) {
NMR(hwna, t)[i]->users--;
NMR(na, t)[i]->users--;
}
}
@ -1480,6 +1691,7 @@ netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
error = netmap_do_regif(npriv, na, hdr);
if (error) {
netmap_priv_delete(npriv);
netmap_mem_restore(bna->hwna);
return error;
}
bna->na_kpriv = npriv;
@ -1490,6 +1702,7 @@ netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
netmap_priv_delete(bna->na_kpriv);
bna->na_kpriv = NULL;
na->na_flags &= ~NAF_BUSY;
netmap_mem_restore(bna->hwna);
}
return error;
@ -1527,6 +1740,7 @@ netmap_bwrap_attach_common(struct netmap_adapter *na,
}
na->nm_dtor = netmap_bwrap_dtor;
na->nm_config = netmap_bwrap_config;
na->nm_bufcfg = netmap_bwrap_bufcfg;
na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
na->pdev = hwna->pdev;
na->nm_mem = netmap_mem_get(hwna->nm_mem);
@ -1546,25 +1760,8 @@ netmap_bwrap_attach_common(struct netmap_adapter *na,
na->na_flags |= NAF_HOST_RINGS;
hostna = &bna->host.up;
/* limit the number of host rings to that of hw */
nm_bound_var(&hostna->num_tx_rings, 1, 1,
nma_get_nrings(hwna, NR_TX), NULL);
nm_bound_var(&hostna->num_rx_rings, 1, 1,
nma_get_nrings(hwna, NR_RX), NULL);
snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
hostna->ifp = hwna->ifp;
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t);
u_int nr = nma_get_nrings(hostna, t);
nma_set_nrings(hostna, t, nr);
nma_set_host_nrings(na, t, nr);
if (nma_get_host_nrings(hwna, t) < nr) {
nma_set_host_nrings(hwna, t, nr);
}
nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
}
// hostna->nm_txsync = netmap_bwrap_host_txsync;
// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
hostna->nm_mem = netmap_mem_get(na->nm_mem);
@ -1574,6 +1771,7 @@ netmap_bwrap_attach_common(struct netmap_adapter *na,
hostna->na_hostvp = &bna->host;
hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
/* bwrap_config() will determine the number of host rings */
}
if (hwna->na_flags & NAF_MOREFRAG)
na->na_flags |= NAF_MOREFRAG;

View File

@ -178,8 +178,10 @@ int netmap_bdg_free(struct nm_bridge *b);
void netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw);
int netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na);
int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
int netmap_bdg_detach_locked(struct nmreq_header *hdr, void *auth_token);
int netmap_vp_reg(struct netmap_adapter *na, int onoff);
int netmap_vp_rxsync(struct netmap_kring *kring, int flags);
int netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags);
int netmap_bwrap_notify(struct netmap_kring *kring, int flags);
int netmap_bwrap_attach_common(struct netmap_adapter *na,
struct netmap_adapter *hwna);

View File

@ -1057,7 +1057,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
vm_page_replace(page, object, (*mres)->pindex, *mres);
*mres = page;
}
vm_page_valid(page);
page->valid = VM_PAGE_BITS_ALL;
return (VM_PAGER_OK);
}

View File

@ -106,7 +106,7 @@ __FBSDID("$FreeBSD$");
static inline struct mbuf *
nm_os_get_mbuf(struct ifnet *ifp, int len)
{
return alloc_skb(ifp->needed_headroom + len +
return alloc_skb(LL_RESERVED_SPACE(ifp) + len +
ifp->needed_tailroom, GFP_ATOMIC);
}

View File

@ -459,8 +459,16 @@ struct netmap_kring {
* On a NIC reset, the NIC ring indexes may be reset but the
* indexes in the netmap rings remain the same. nkr_hwofs
* keeps track of the offset between the two.
*
* Moreover, during reset, we can restore only the subset of
* the NIC ring that corresponds to the kernel-owned part of
* the netmap ring. The rest of the slots must be restored
* by the *sync routines when the user releases more slots.
* The nkr_to_refill field keeps track of the number of slots
* that still need to be restored.
*/
int32_t nkr_hwofs;
int32_t nkr_to_refill;
/* last_reclaim is opaque marker to help reduce the frequency
* of operations such as reclaiming tx buffers. A possible use
@ -535,6 +543,36 @@ struct netmap_kring {
uint32_t pipe_tail; /* hwtail updated by the other end */
#endif /* WITH_PIPES */
/* mask for the offset-related part of the ptr field in the slots */
uint64_t offset_mask;
/* maximum user-specified offset, as stipulated at bind time.
* Larger offset requests will be silently capped to offset_max.
*/
uint64_t offset_max;
/* minimum gap between two consecutive offsets into the same
* buffer, as stipulated at bind time. This is used to choose
* the hwbuf_len, but is not otherwise checked for compliance
* at runtime.
*/
uint64_t offset_gap;
/* size of hardware buffer. This may be less than the size of
* the netmap buffers because of non-zero offsets, or because
* the netmap buffer size exceeds the capability of the hardware.
*/
uint64_t hwbuf_len;
/* required aligment (in bytes) for the buffers used by this ring.
* Netmap buffers are aligned to cachelines, which should suffice
* for most NICs. If the user is passing offsets, though, we need
* to check that the resulting buf address complies with any
* alignment restriction.
*/
uint64_t buf_align;
/* harware specific logic for the selection of the hwbuf_len */
int (*nm_bufcfg)(struct netmap_kring *kring, uint64_t target);
int (*save_notify)(struct netmap_kring *kring, int flags);
#ifdef WITH_MONITOR
@ -719,6 +757,8 @@ struct netmap_adapter {
#define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */
/* free */
#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */
#define NAF_OFFSETS 1024 /* the adapter supports the slot offsets */
#define NAF_HOST_ALL 2048 /* the adapter wants as many host rings as hw */
#define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */
#define NAF_BUSY (1U<<31) /* the adapter is used internally and
* cannot be registered from userspace
@ -782,6 +822,22 @@ struct netmap_adapter {
* nm_config() returns configuration information from the OS
* Called with NMG_LOCK held.
*
* nm_bufcfg()
* the purpose of this callback is to fill the kring->hwbuf_len
* (l) and kring->buf_align fields. The l value is most important
* for RX rings, where we want to disallow writes outside of the
* netmap buffer. The l value must be computed taking into account
* the stipulated max_offset (o), possibily increased if there are
* alignment constraints, the maxframe (m), if known, and the
* current NETMAP_BUF_SIZE (b) of the memory region used by the
* adapter. We want the largest supported l such that o + l <= b.
* If m is known to be <= b - o, the callback may also choose the
* largest l <= b, ignoring the offset. The buf_align field is
* most important for TX rings when there are offsets. The user
* will see this value in the ring->buf_align field. Misaligned
* offsets will cause the corresponding packets to be silently
* dropped.
*
* nm_krings_create() create and init the tx_rings and
* rx_rings arrays of kring structures. In particular,
* set the nm_sync callbacks for each ring.
@ -811,6 +867,7 @@ struct netmap_adapter {
int (*nm_txsync)(struct netmap_kring *kring, int flags);
int (*nm_rxsync)(struct netmap_kring *kring, int flags);
int (*nm_notify)(struct netmap_kring *kring, int flags);
int (*nm_bufcfg)(struct netmap_kring *kring, uint64_t target);
#define NAF_FORCE_READ 1
#define NAF_FORCE_RECLAIM 2
#define NAF_CAN_FORWARD_DOWN 4
@ -1096,12 +1153,13 @@ struct netmap_bwrap_adapter {
* here its original value, to be restored at detach
*/
struct netmap_vp_adapter *saved_na_vp;
int (*nm_intr_notify)(struct netmap_kring *kring, int flags);
};
int nm_bdg_polling(struct nmreq_header *hdr);
int netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token);
int netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token);
#ifdef WITH_VALE
int netmap_vale_attach(struct nmreq_header *hdr, void *auth_token);
int netmap_vale_detach(struct nmreq_header *hdr, void *auth_token);
int netmap_vale_list(struct nmreq_header *hdr);
int netmap_vi_create(struct nmreq_header *hdr, int);
int nm_vi_create(struct nmreq_header *);
@ -1431,6 +1489,12 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *);
} while (0)
#endif
#define NM_CHECK_ADDR_LEN_OFF(na_, l_, o_) do { \
if ((l_) + (o_) < (l_) || \
(l_) + (o_) > NETMAP_BUF_SIZE(na_)) { \
(l_) = NETMAP_BUF_SIZE(na_) - (o_); \
} } while (0)
/*---------------------------------------------------------------*/
/*
@ -1493,6 +1557,7 @@ int netmap_get_na(struct nmreq_header *hdr, struct netmap_adapter **na,
void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp);
int netmap_get_hw_na(struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_adapter **na);
void netmap_mem_restore(struct netmap_adapter *na);
#ifdef WITH_VALE
uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
@ -1680,7 +1745,7 @@ extern int netmap_generic_txqdisc;
/* Assigns the device IOMMU domain to an allocator.
* Returns -ENOMEM in case the domain is different */
#define nm_iommu_group_id(dev) (0)
#define nm_iommu_group_id(dev) (-1)
/* Callback invoked by the dma machinery after a successful dmamap_load */
static void netmap_dmamap_cb(__unused void *arg,
@ -1890,6 +1955,9 @@ struct plut_entry {
struct netmap_obj_pool;
/* alignment for netmap buffers */
#define NM_BUF_ALIGN 64
/*
* NMB return the virtual address of a buffer (buffer 0 on bad index)
* PNMB also fills the physical address
@ -1919,6 +1987,40 @@ PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
return ret;
}
static inline void
nm_write_offset(struct netmap_kring *kring,
struct netmap_slot *slot, uint64_t offset)
{
slot->ptr = (slot->ptr & ~kring->offset_mask) |
(offset & kring->offset_mask);
}
static inline uint64_t
nm_get_offset(struct netmap_kring *kring, struct netmap_slot *slot)
{
uint64_t offset = (slot->ptr & kring->offset_mask);
if (unlikely(offset > kring->offset_max))
offset = kring->offset_max;
return offset;
}
static inline void *
NMB_O(struct netmap_kring *kring, struct netmap_slot *slot)
{
void *addr = NMB(kring->na, slot);
return (char *)addr + nm_get_offset(kring, slot);
}
static inline void *
PNMB_O(struct netmap_kring *kring, struct netmap_slot *slot, uint64_t *pp)
{
void *addr = PNMB(kring->na, slot, pp);
uint64_t offset = nm_get_offset(kring, slot);
addr = (char *)addr + offset;
*pp += offset;
return addr;
}
/*
* Structure associated to each netmap file descriptor.
@ -2418,4 +2520,15 @@ void netmap_uninit_bridges(void);
#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
#endif /* ! linux */
/* some macros that may not be defined */
#ifndef ETH_HLEN
#define ETH_HLEN 6
#endif
#ifndef ETH_FCS_LEN
#define ETH_FCS_LEN 4
#endif
#ifndef VLAN_HLEN
#define VLAN_HLEN 4
#endif
#endif /* _NET_NETMAP_KERN_H_ */

View File

@ -146,16 +146,19 @@ struct netmap_mem_ops {
vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t);
int (*nmd_config)(struct netmap_mem_d *);
int (*nmd_finalize)(struct netmap_mem_d *);
void (*nmd_deref)(struct netmap_mem_d *);
int (*nmd_finalize)(struct netmap_mem_d *, struct netmap_adapter *);
void (*nmd_deref)(struct netmap_mem_d *, struct netmap_adapter *);
ssize_t (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
void (*nmd_delete)(struct netmap_mem_d *);
struct netmap_if * (*nmd_if_new)(struct netmap_adapter *,
struct netmap_priv_d *);
void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
int (*nmd_rings_create)(struct netmap_adapter *);
void (*nmd_rings_delete)(struct netmap_adapter *);
struct netmap_if * (*nmd_if_new)(struct netmap_mem_d *,
struct netmap_adapter *, struct netmap_priv_d *);
void (*nmd_if_delete)(struct netmap_mem_d *,
struct netmap_adapter *, struct netmap_if *);
int (*nmd_rings_create)(struct netmap_mem_d *,
struct netmap_adapter *);
void (*nmd_rings_delete)(struct netmap_mem_d *,
struct netmap_adapter *);
};
struct netmap_mem_d {
@ -165,6 +168,7 @@ struct netmap_mem_d {
u_int flags;
#define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
#define NETMAP_MEM_HIDDEN 0x8 /* beeing prepared */
#define NETMAP_MEM_NOMAP 0x10 /* do not map/unmap pdevs */
int lasterr; /* last error for curr config */
int active; /* active users */
int refcount;
@ -267,7 +271,7 @@ netmap_mem_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
struct netmap_mem_d *nmd = na->nm_mem;
NMA_LOCK(nmd);
nifp = nmd->ops->nmd_if_new(na, priv);
nifp = nmd->ops->nmd_if_new(nmd, na, priv);
NMA_UNLOCK(nmd);
return nifp;
@ -279,7 +283,7 @@ netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nif)
struct netmap_mem_d *nmd = na->nm_mem;
NMA_LOCK(nmd);
nmd->ops->nmd_if_delete(na, nif);
nmd->ops->nmd_if_delete(nmd, na, nif);
NMA_UNLOCK(nmd);
}
@ -290,7 +294,7 @@ netmap_mem_rings_create(struct netmap_adapter *na)
struct netmap_mem_d *nmd = na->nm_mem;
NMA_LOCK(nmd);
rv = nmd->ops->nmd_rings_create(na);
rv = nmd->ops->nmd_rings_create(nmd, na);
NMA_UNLOCK(nmd);
return rv;
@ -302,13 +306,13 @@ netmap_mem_rings_delete(struct netmap_adapter *na)
struct netmap_mem_d *nmd = na->nm_mem;
NMA_LOCK(nmd);
nmd->ops->nmd_rings_delete(na);
nmd->ops->nmd_rings_delete(nmd, na);
NMA_UNLOCK(nmd);
}
static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
static int nm_mem_check_group(struct netmap_mem_d *, struct device *);
static void nm_mem_release_id(struct netmap_mem_d *);
nm_memid_t
@ -319,14 +323,14 @@ netmap_mem_get_id(struct netmap_mem_d *nmd)
#ifdef NM_DEBUG_MEM_PUTGET
#define NM_DBG_REFC(nmd, func, line) \
nm_prinf("%d mem[%d] -> %d", line, (nmd)->nm_id, (nmd)->refcount);
nm_prinf("%d mem[%d:%d] -> %d", line, (nmd)->nm_id, (nmd)->nm_grp, (nmd)->refcount);
#else
#define NM_DBG_REFC(nmd, func, line)
#endif
/* circular list of all existing allocators */
static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
NM_MTX_T nm_mem_list_lock;
static NM_MTX_T nm_mem_list_lock;
struct netmap_mem_d *
__netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
@ -356,7 +360,7 @@ int
netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
int lasterr = 0;
if (nm_mem_assign_group(nmd, na->pdev) < 0) {
if (nm_mem_check_group(nmd, na->pdev) < 0) {
return ENOMEM;
}
@ -367,9 +371,9 @@ netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
nmd->active++;
nmd->lasterr = nmd->ops->nmd_finalize(nmd);
nmd->lasterr = nmd->ops->nmd_finalize(nmd, na);
if (!nmd->lasterr && na->pdev) {
if (!nmd->lasterr && !(nmd->flags & NETMAP_MEM_NOMAP)) {
nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
}
@ -473,7 +477,7 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
int last_user = 0;
NMA_LOCK(nmd);
if (na->active_fds <= 0)
if (na->active_fds <= 0 && !(nmd->flags & NETMAP_MEM_NOMAP))
netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
if (nmd->active == 1) {
last_user = 1;
@ -484,11 +488,10 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
*/
netmap_mem_init_bitmaps(nmd);
}
nmd->ops->nmd_deref(nmd);
nmd->ops->nmd_deref(nmd, na);
nmd->active--;
if (last_user) {
nmd->nm_grp = -1;
nmd->lasterr = 0;
}
@ -584,6 +587,7 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.name = "1"
};
static struct netmap_mem_d nm_mem_blueprint;
/* blueprint for the private memory allocators */
/* XXX clang is not happy about using name as a print format */
@ -649,7 +653,7 @@ DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
/* call with nm_mem_list_lock held */
static int
nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
nm_mem_assign_id_locked(struct netmap_mem_d *nmd, int grp_id)
{
nm_memid_t id;
struct netmap_mem_d *scan = netmap_last_mem_d;
@ -663,6 +667,7 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
scan = scan->next;
if (id != scan->nm_id) {
nmd->nm_id = id;
nmd->nm_grp = grp_id;
nmd->prev = scan->prev;
nmd->next = scan;
scan->prev->next = nmd;
@ -680,12 +685,12 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
/* call with nm_mem_list_lock *not* held */
static int
nm_mem_assign_id(struct netmap_mem_d *nmd)
nm_mem_assign_id(struct netmap_mem_d *nmd, int grp_id)
{
int ret;
NM_MTX_LOCK(nm_mem_list_lock);
ret = nm_mem_assign_id_locked(nmd);
ret = nm_mem_assign_id_locked(nmd, grp_id);
NM_MTX_UNLOCK(nm_mem_list_lock);
return ret;
@ -725,21 +730,24 @@ netmap_mem_find(nm_memid_t id)
}
static int
nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
nm_mem_check_group(struct netmap_mem_d *nmd, struct device *dev)
{
int err = 0, id;
/* Skip not hw adapters.
* Vale port can use particular allocator through vale-ctl -m option
*/
if (!dev)
return 0;
id = nm_iommu_group_id(dev);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("iommu_group %d", id);
NMA_LOCK(nmd);
if (nmd->nm_grp < 0)
nmd->nm_grp = id;
if (nmd->nm_grp != id) {
if (netmap_verbose)
nm_prerr("iommu group mismatch: %u vs %u",
nm_prerr("iommu group mismatch: %d vs %d",
nmd->nm_grp, id);
nmd->lasterr = err = ENOMEM;
}
@ -1327,7 +1335,7 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
p->r_objsize = objsize;
#define MAX_CLUSTSIZE (1<<22) // 4 MB
#define LINE_ROUND NM_CACHE_ALIGN // 64
#define LINE_ROUND NM_BUF_ALIGN // 64
if (objsize >= MAX_CLUSTSIZE) {
/* we could do it but there is no point */
nm_prerr("unsupported allocation for %d bytes", objsize);
@ -1524,11 +1532,13 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
{
int i, lim = p->objtotal;
struct netmap_lut *lut;
if (na == NULL || na->pdev == NULL)
return 0;
lut = &na->na_lut;
#if defined(__FreeBSD__)
/* On FreeBSD mapping and unmapping is performed by the txsync
* and rxsync routine, packet by packet. */
@ -1542,7 +1552,7 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
nm_prerr("unsupported on Windows");
#else /* linux */
nm_prdis("unmapping and freeing plut for %s", na->name);
if (lut->plut == NULL)
if (lut->plut == NULL || na->pdev == NULL)
return 0;
for (i = 0; i < lim; i += p->_clustentries) {
if (lut->plut[i].paddr)
@ -1634,6 +1644,7 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd)
goto error;
nmd->nm_totalsize += nmd->pools[i].memtotal;
}
nmd->nm_totalsize = (nmd->nm_totalsize + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
nmd->lasterr = netmap_mem_init_bitmaps(nmd);
if (nmd->lasterr)
goto error;
@ -1660,11 +1671,17 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd)
* allocator for private memory
*/
static void *
_netmap_mem_private_new(size_t size, struct netmap_obj_params *p,
struct netmap_mem_ops *ops, int *perr)
_netmap_mem_private_new(size_t size, struct netmap_obj_params *p, int grp_id,
struct netmap_mem_ops *ops, uint64_t memtotal, int *perr)
{
struct netmap_mem_d *d = NULL;
int i, err = 0;
int checksz = 0;
/* if memtotal is !=0 we check that the request fits the available
* memory. Moreover, any surprlus memory is assigned to buffers.
*/
checksz = (memtotal > 0);
d = nm_os_malloc(size);
if (d == NULL) {
@ -1675,7 +1692,7 @@ _netmap_mem_private_new(size_t size, struct netmap_obj_params *p,
*d = nm_blueprint;
d->ops = ops;
err = nm_mem_assign_id(d);
err = nm_mem_assign_id(d, grp_id);
if (err)
goto error_free;
snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id);
@ -1684,9 +1701,31 @@ _netmap_mem_private_new(size_t size, struct netmap_obj_params *p,
snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
nm_blueprint.pools[i].name,
d->name);
if (checksz) {
uint64_t poolsz = p[i].num * p[i].size;
if (memtotal < poolsz) {
nm_prerr("%s: request too large", d->pools[i].name);
err = ENOMEM;
goto error;
}
memtotal -= poolsz;
}
d->params[i].num = p[i].num;
d->params[i].size = p[i].size;
}
if (checksz && memtotal > 0) {
uint64_t sz = d->params[NETMAP_BUF_POOL].size;
uint64_t n = (memtotal + sz - 1) / sz;
if (n) {
if (netmap_verbose) {
nm_prinf("%s: adding %llu more buffers",
d->pools[NETMAP_BUF_POOL].name,
(unsigned long long)n);
}
d->params[NETMAP_BUF_POOL].num += n;
}
}
NMA_LOCK_INIT(d);
@ -1762,11 +1801,65 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
p[NETMAP_BUF_POOL].num,
p[NETMAP_BUF_POOL].size);
d = _netmap_mem_private_new(sizeof(*d), p, &netmap_mem_global_ops, perr);
d = _netmap_mem_private_new(sizeof(*d), p, -1, &netmap_mem_global_ops, 0, perr);
return d;
}
/* Reference iommu allocator - find existing or create new,
* for not hw addapeters fallback to global allocator.
*/
struct netmap_mem_d *
netmap_mem_get_iommu(struct netmap_adapter *na)
{
int i, err, grp_id;
struct netmap_mem_d *nmd;
if (na == NULL || na->pdev == NULL)
return netmap_mem_get(&nm_mem);
grp_id = nm_iommu_group_id(na->pdev);
NM_MTX_LOCK(nm_mem_list_lock);
nmd = netmap_last_mem_d;
do {
if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_grp == grp_id) {
nmd->refcount++;
NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
NM_MTX_UNLOCK(nm_mem_list_lock);
return nmd;
}
nmd = nmd->next;
} while (nmd != netmap_last_mem_d);
nmd = nm_os_malloc(sizeof(*nmd));
if (nmd == NULL)
goto error;
*nmd = nm_mem_blueprint;
err = nm_mem_assign_id_locked(nmd, grp_id);
if (err)
goto error_free;
snprintf(nmd->name, sizeof(nmd->name), "%d", nmd->nm_id);
for (i = 0; i < NETMAP_POOLS_NR; i++) {
snprintf(nmd->pools[i].name, NETMAP_POOL_MAX_NAMSZ, "%s-%s",
nm_mem_blueprint.pools[i].name, nmd->name);
}
NMA_LOCK_INIT(nmd);
NM_MTX_UNLOCK(nm_mem_list_lock);
return nmd;
error_free:
nm_os_free(nmd);
error:
NM_MTX_UNLOCK(nm_mem_list_lock);
return NULL;
}
/* call with lock held */
static int
@ -1800,7 +1893,7 @@ netmap_mem2_config(struct netmap_mem_d *nmd)
}
static int
netmap_mem2_finalize(struct netmap_mem_d *nmd)
netmap_mem2_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
if (nmd->flags & NETMAP_MEM_FINALIZED)
goto out;
@ -1837,6 +1930,7 @@ NM_MTX_T nm_mem_ext_list_lock;
int
netmap_mem_init(void)
{
nm_mem_blueprint = nm_mem;
NM_MTX_INIT(nm_mem_list_lock);
NMA_LOCK_INIT(&nm_mem);
netmap_mem_get(&nm_mem);
@ -1852,37 +1946,23 @@ netmap_mem_fini(void)
netmap_mem_put(&nm_mem);
}
static void
netmap_free_rings(struct netmap_adapter *na)
static int
netmap_mem_ring_needed(struct netmap_kring *kring)
{
enum txrx t;
for_rx_tx(t) {
u_int i;
for (i = 0; i < netmap_all_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)",
kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("deleting ring %s", kring->name);
if (!(kring->nr_kflags & NKR_FAKERING)) {
nm_prdis("freeing bufs for %s", kring->name);
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
} else {
nm_prdis("NOT freeing bufs for %s", kring->name);
}
netmap_ring_free(na->nm_mem, ring);
kring->ring = NULL;
}
}
return kring->ring == NULL &&
(kring->users > 0 ||
(kring->nr_kflags & NKR_NEEDRING));
}
static int
netmap_mem_ring_todelete(struct netmap_kring *kring)
{
return kring->ring != NULL &&
kring->users == 0 &&
!(kring->nr_kflags & NKR_NEEDRING);
}
/* call with NMA_LOCK held *
*
* Allocate netmap rings and buffers for this card
@ -1891,7 +1971,7 @@ netmap_free_rings(struct netmap_adapter *na)
* in netmap_krings_create().
*/
static int
netmap_mem2_rings_create(struct netmap_adapter *na)
netmap_mem2_rings_create(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
enum txrx t;
@ -1903,7 +1983,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
struct netmap_ring *ring = kring->ring;
u_int len, ndesc;
if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
if (!netmap_mem_ring_needed(kring)) {
/* uneeded, or already created by somebody else */
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("NOT creating ring %s (ring %p, users %d neekring %d)",
@ -1915,7 +1995,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
ring = netmap_ring_malloc(nmd, len);
if (ring == NULL) {
nm_prerr("Cannot allocate %s_ring", nm_txrx2str(t));
goto cleanup;
@ -1924,16 +2004,16 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
kring->ring = ring;
*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
*(int64_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
(nmd->pools[NETMAP_IF_POOL].memtotal +
nmd->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(nmd, ring);
/* copy values from kring */
ring->head = kring->rhead;
ring->cur = kring->rcur;
ring->tail = kring->rtail;
*(uint32_t *)(uintptr_t)&ring->nr_buf_size =
netmap_mem_bufsize(na->nm_mem);
netmap_mem_bufsize(nmd);
nm_prdis("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
nm_prdis("initializing slots for %s_ring", nm_txrx2str(t));
@ -1941,7 +2021,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
/* this is a real ring */
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("allocating buffers for %s", kring->name);
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
if (netmap_new_bufs(nmd, ring->slot, ndesc)) {
nm_prerr("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
goto cleanup;
}
@ -1949,7 +2029,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
/* this is a fake ring, set all indices to 0 */
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("NOT allocating buffers for %s", kring->name);
netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
netmap_mem_set_ring(nmd, ring->slot, ndesc, 0);
}
/* ring info */
*(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id;
@ -1970,12 +2050,35 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
}
static void
netmap_mem2_rings_delete(struct netmap_adapter *na)
netmap_mem2_rings_delete(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
/* last instance, release bufs and rings */
netmap_free_rings(na);
}
enum txrx t;
for_rx_tx(t) {
u_int i;
for (i = 0; i < netmap_all_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
if (!netmap_mem_ring_todelete(kring)) {
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)",
kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("deleting ring %s", kring->name);
if (!(kring->nr_kflags & NKR_FAKERING)) {
nm_prdis("freeing bufs for %s", kring->name);
netmap_free_bufs(nmd, ring->slot, kring->nkr_num_slots);
} else {
nm_prdis("NOT freeing bufs for %s", kring->name);
}
netmap_ring_free(nmd, ring);
kring->ring = NULL;
}
}
}
/* call with NMA_LOCK held */
/*
@ -1986,7 +2089,8 @@ netmap_mem2_rings_delete(struct netmap_adapter *na)
* the interface is in netmap mode.
*/
static struct netmap_if *
netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
netmap_mem2_if_new(struct netmap_mem_d *nmd,
struct netmap_adapter *na, struct netmap_priv_d *priv)
{
struct netmap_if *nifp;
ssize_t base; /* handy for relative offsets between rings and nifp */
@ -2005,7 +2109,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
*/
len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t));
nifp = netmap_if_malloc(na->nm_mem, len);
nifp = netmap_if_malloc(nmd, len);
if (nifp == NULL) {
return NULL;
}
@ -2024,7 +2128,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
* between the ring and nifp, so the information is usable in
* userspace to reach the ring from the nifp.
*/
base = netmap_if_offset(na->nm_mem, nifp);
base = netmap_if_offset(nmd, nifp);
for (i = 0; i < n[NR_TX]; i++) {
/* XXX instead of ofs == 0 maybe use the offset of an error
* ring, like we do for buffers? */
@ -2032,7 +2136,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
if (na->tx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_TX]
&& i < priv->np_qlast[NR_TX]) {
ofs = netmap_ring_offset(na->nm_mem,
ofs = netmap_ring_offset(nmd,
na->tx_rings[i]->ring) - base;
}
*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs;
@ -2044,7 +2148,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
if (na->rx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_RX]
&& i < priv->np_qlast[NR_RX]) {
ofs = netmap_ring_offset(na->nm_mem,
ofs = netmap_ring_offset(nmd,
na->rx_rings[i]->ring) - base;
}
*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs;
@ -2054,18 +2158,19 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
}
static void
netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
netmap_mem2_if_delete(struct netmap_mem_d *nmd,
struct netmap_adapter *na, struct netmap_if *nifp)
{
if (nifp == NULL)
/* nothing to do */
return;
if (nifp->ni_bufs_head)
netmap_extra_free(na, nifp->ni_bufs_head);
netmap_if_free(na->nm_mem, nifp);
netmap_if_free(nmd, nifp);
}
static void
netmap_mem2_deref(struct netmap_mem_d *nmd)
netmap_mem2_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
if (netmap_debug & NM_DEBUG_MEM)
@ -2257,11 +2362,14 @@ netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror)
nm_prinf("not found, creating new");
nme = _netmap_mem_private_new(sizeof(*nme),
(struct netmap_obj_params[]){
{ pi->nr_if_pool_objsize, pi->nr_if_pool_objtotal },
{ pi->nr_ring_pool_objsize, pi->nr_ring_pool_objtotal },
{ pi->nr_buf_pool_objsize, pi->nr_buf_pool_objtotal }},
-1,
&netmap_mem_ext_ops,
pi->nr_memsize,
&error);
if (nme == NULL)
goto out_unmap;
@ -2517,7 +2625,7 @@ netmap_mem_pt_guest_config(struct netmap_mem_d *nmd)
}
static int
netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
uint64_t mem_size;
@ -2590,7 +2698,7 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd)
}
static void
netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
@ -2630,13 +2738,14 @@ netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd)
}
static struct netmap_if *
netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
netmap_mem_pt_guest_if_new(struct netmap_mem_d *nmd,
struct netmap_adapter *na, struct netmap_priv_d *priv)
{
struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
struct mem_pt_if *ptif;
struct netmap_if *nifp = NULL;
ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
ptif = netmap_mem_pt_guest_ifp_lookup(nmd, na->ifp);
if (ptif == NULL) {
nm_prerr("interface %s is not in passthrough", na->name);
goto out;
@ -2649,25 +2758,27 @@ netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv
}
static void
netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
netmap_mem_pt_guest_if_delete(struct netmap_mem_d * nmd,
struct netmap_adapter *na, struct netmap_if *nifp)
{
struct mem_pt_if *ptif;
ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
ptif = netmap_mem_pt_guest_ifp_lookup(nmd, na->ifp);
if (ptif == NULL) {
nm_prerr("interface %s is not in passthrough", na->name);
}
}
static int
netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
netmap_mem_pt_guest_rings_create(struct netmap_mem_d *nmd,
struct netmap_adapter *na)
{
struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
struct mem_pt_if *ptif;
struct netmap_if *nifp;
int i, error = -1;
ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp);
ptif = netmap_mem_pt_guest_ifp_lookup(nmd, na->ifp);
if (ptif == NULL) {
nm_prerr("interface %s is not in passthrough", na->name);
goto out;
@ -2698,7 +2809,7 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
}
static void
netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na)
netmap_mem_pt_guest_rings_delete(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
#if 0
enum txrx t;
@ -2712,6 +2823,8 @@ netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na)
}
}
#endif
(void)nmd;
(void)na;
}
static struct netmap_mem_ops netmap_mem_pt_guest_ops = {
@ -2769,7 +2882,7 @@ netmap_mem_pt_guest_create(nm_memid_t mem_id)
ptnmd->pt_ifs = NULL;
/* Assign new id in the guest (We have the lock) */
err = nm_mem_assign_id_locked(&ptnmd->up);
err = nm_mem_assign_id_locked(&ptnmd->up, -1);
if (err)
goto error;

View File

@ -147,6 +147,7 @@ struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_
#define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__)
#define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__)
struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int);
struct netmap_mem_d* netmap_mem_get_iommu(struct netmap_adapter *);
void __netmap_mem_put(struct netmap_mem_d *, const char *, int);
struct netmap_mem_d* netmap_mem_find(nm_memid_t);
unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd);
@ -172,7 +173,6 @@ int netmap_mem_pools_info_get(struct nmreq_pools_info *,
#define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
#define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
#define NETMAP_MEM_EXT 0x10 /* external memory (not remappable) */
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);

View File

@ -151,6 +151,7 @@ netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
nna->up.num_rx_rings = req->nr_rx_rings;
nna->up.num_tx_desc = req->nr_tx_slots;
nna->up.num_rx_desc = req->nr_rx_slots;
nna->up.na_flags = NAF_OFFSETS;
error = netmap_attach_common(&nna->up);
if (error)
goto free_nna;

View File

@ -211,8 +211,12 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
m--, k = nm_next(k, lim), nk = (complete ? k : nk)) {
struct netmap_slot *rs = &rxring->slot[k];
struct netmap_slot *ts = &txring->slot[k];
uint64_t off = nm_get_offset(rxkring, rs);
*rs = *ts;
if (nm_get_offset(rxkring, rs) < off) {
nm_write_offset(rxkring, rs, off);
}
if (ts->flags & NS_BUF_CHANGED) {
ts->flags &= ~NS_BUF_CHANGED;
}
@ -263,9 +267,9 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
struct netmap_slot *rs = &rxring->slot[k];
struct netmap_slot *ts = &txring->slot[k];
/* copy the slot. This also propagates any offset */
*ts = *rs;
if (rs->flags & NS_BUF_CHANGED) {
/* copy the slot and report the buffer change */
*ts = *rs;
rs->flags &= ~NS_BUF_CHANGED;
}
}
@ -414,7 +418,6 @@ netmap_pipe_reg_both(struct netmap_adapter *na, struct netmap_adapter *ona)
for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_on(kring)) {
struct netmap_kring *sring, *dring;
kring->nr_mode = NKR_NETMAP_ON;
if ((kring->nr_kflags & NKR_FAKERING) &&
@ -426,27 +429,25 @@ netmap_pipe_reg_both(struct netmap_adapter *na, struct netmap_adapter *ona)
continue;
}
/* copy the buffers from the non-fake ring */
if (kring->nr_kflags & NKR_FAKERING) {
sring = kring->pipe;
dring = kring;
} else {
sring = kring;
dring = kring->pipe;
}
memcpy(dring->ring->slot,
sring->ring->slot,
/* copy the buffers from the non-fake ring
* (this also propagates any initial offset)
*/
memcpy(kring->pipe->ring->slot,
kring->ring->slot,
sizeof(struct netmap_slot) *
sring->nkr_num_slots);
kring->nkr_num_slots);
/* copy the offset-related fields */
*(uint64_t *)(uintptr_t)&kring->pipe->ring->offset_mask =
kring->ring->offset_mask;
*(uint64_t *)(uintptr_t)&kring->pipe->ring->buf_align =
kring->ring->buf_align;
/* mark both rings as fake and needed,
* so that buffers will not be
* deleted by the standard machinery
* (we will delete them by ourselves in
* netmap_pipe_krings_delete)
*/
sring->nr_kflags |=
(NKR_FAKERING | NKR_NEEDRING);
dring->nr_kflags |=
kring->nr_kflags |=
(NKR_FAKERING | NKR_NEEDRING);
kring->nr_mode = NKR_NETMAP_ON;
}
@ -660,7 +661,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
const char *pipe_id = NULL;
int role = 0;
int error, retries = 0;
char *cbra;
char *cbra, pipe_char;
/* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */
cbra = strrchr(hdr->nr_name, '{');
@ -675,6 +676,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
return 0;
}
}
pipe_char = *cbra;
pipe_id = cbra + 1;
if (*pipe_id == '\0' || cbra == hdr->nr_name) {
/* Bracket is the last character, so pipe name is missing;
@ -690,15 +692,13 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
/* first, try to find the parent adapter */
for (;;) {
char nr_name_orig[NETMAP_REQ_IFNAMSIZ];
int create_error;
/* Temporarily remove the pipe suffix. */
strlcpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
*cbra = '\0';
error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
/* Restore the pipe suffix. */
strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
*cbra = pipe_char;
if (!error)
break;
if (error != ENXIO || retries++) {
@ -711,7 +711,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
NMG_UNLOCK();
create_error = netmap_vi_create(hdr, 1 /* autodelete */);
NMG_LOCK();
strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
*cbra = pipe_char;
if (create_error && create_error != EEXIST) {
if (create_error != EOPNOTSUPP) {
nm_prerr("failed to create a persistent vale port: %d",
@ -771,7 +771,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
mna->up.nm_krings_create = netmap_pipe_krings_create;
mna->up.nm_krings_delete = netmap_pipe_krings_delete;
mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
mna->up.na_flags |= NAF_MEM_OWNER;
mna->up.na_flags |= NAF_MEM_OWNER | NAF_OFFSETS;
mna->up.na_lut = pna->na_lut;
mna->up.num_tx_rings = req->nr_tx_rings;

View File

@ -99,7 +99,7 @@ __FBSDID("$FreeBSD$");
* In the tx loop, we aggregate traffic in batches to make all operations
* faster. The batch size is bridge_batch.
*/
#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
#define NM_BDG_MAXRINGS 16 /* XXX unclear how many (must be a pow of 2). */
#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
@ -154,8 +154,9 @@ struct netmap_bdg_ops vale_bdg_ops = {
* with other odd sizes. We assume there is enough room
* in the source and destination buffers.
*
* XXX only for multiples of 64 bytes, non overlapped.
* XXX only for multiples of NM_BUF_ALIGN bytes, non overlapped.
*/
static inline void
pkt_copy(void *_src, void *_dst, int l)
{
@ -165,7 +166,8 @@ pkt_copy(void *_src, void *_dst, int l)
memcpy(dst, src, l);
return;
}
for (; likely(l > 0); l-=64) {
for (; likely(l > 0); l -= NM_BUF_ALIGN) {
/* XXX NM_BUF_ALIGN/sizeof(uint64_t) statements */
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
@ -387,144 +389,6 @@ netmap_vale_list(struct nmreq_header *hdr)
return error;
}
/* Process NETMAP_REQ_VALE_ATTACH.
*/
int
netmap_vale_attach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_attach *req =
(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter * vpna;
struct netmap_adapter *na = NULL;
struct netmap_mem_d *nmd = NULL;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
if (req->reg.nr_mem_id) {
nmd = netmap_mem_find(req->reg.nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto unlock_exit;
}
}
/* check for existing one */
error = netmap_get_vale_na(hdr, &na, nmd, 0);
if (na) {
error = EBUSY;
goto unref_exit;
}
error = netmap_get_vale_na(hdr, &na,
nmd, 1 /* create if not exists */);
if (error) { /* no device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
}
if (NETMAP_OWNED_BY_ANY(na)) {
error = EBUSY;
goto unref_exit;
}
if (na->nm_bdg_ctl) {
/* nop for VALE ports. The bwrap needs to put the hwna
* in netmap mode (see netmap_bwrap_bdg_ctl)
*/
error = na->nm_bdg_ctl(hdr, na);
if (error)
goto unref_exit;
nm_prdis("registered %s to netmap-mode", na->name);
}
vpna = (struct netmap_vp_adapter *)na;
req->port_index = vpna->bdg_port;
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
return 0;
unref_exit:
netmap_adapter_put(na);
unlock_exit:
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
return error;
}
/* Process NETMAP_REQ_VALE_DETACH.
*/
int
netmap_vale_detach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
if (error) { /* no device, or another bridge or user owns the device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
} else if (nm_is_bwrap(na) &&
((struct netmap_bwrap_adapter *)na)->na_polling_state) {
/* Don't detach a NIC with polling */
error = EBUSY;
goto unref_exit;
}
vpna = (struct netmap_vp_adapter *)na;
if (na->na_vp != vpna) {
/* trying to detach first attach of VALE persistent port attached
* to 2 bridges
*/
error = EBUSY;
goto unref_exit;
}
nmreq_det->port_index = vpna->bdg_port;
if (na->nm_bdg_ctl) {
/* remove the port from bridge. The bwrap
* also needs to put the hwna in normal mode
*/
error = na->nm_bdg_ctl(hdr, na);
}
unref_exit:
netmap_adapter_put(na);
unlock_exit:
NMG_UNLOCK();
return error;
}
/* nm_dtor callback for ephemeral VALE ports */
static void
@ -651,8 +515,9 @@ nm_vale_preflush(struct netmap_kring *kring, u_int end)
/* this slot goes into a list so initialize the link field */
ft[ft_i].ft_next = NM_FT_NULL;
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
if (unlikely(buf == NULL)) {
(void *)(uintptr_t)slot->ptr : NMB_O(kring, slot);
if (unlikely(buf == NULL ||
slot->len > NETMAP_BUF_SIZE(&na->up) - nm_get_offset(kring, slot))) {
nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d",
(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
kring->name, j, ft[ft_i].ft_len);
@ -939,9 +804,6 @@ nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
/*
* Broadcast traffic goes to ring 0 on all destinations.
* So we need to add these rings to the list of ports to scan.
* XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
* expensive. We should keep a compact list of active destinations
* so we could shorten this loop.
*/
brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
if (brddst->bq_head != NM_FT_NULL) {
@ -998,7 +860,7 @@ nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
next = d->bq_head;
/* we need to reserve this many slots. If fewer are
* available, some packets will be dropped.
* Packets may have multiple fragments, so we may not use
* Packets may have multiple fragments, so
* there is a chance that we may not use all of the slots
* we have claimed, so we will need to handle the leftover
* ones when we regain the lock.
@ -1108,21 +970,36 @@ nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
do {
char *dst, *src = ft_p->ft_buf;
size_t copy_len = ft_p->ft_len, dst_len = copy_len;
uintptr_t src_cb;
uint64_t dstoff, dstoff_cb;
int src_co, dst_co;
const uintptr_t mask = NM_BUF_ALIGN - 1;
slot = &ring->slot[j];
dst = NMB(&dst_na->up, slot);
dstoff = nm_get_offset(kring, slot);
dstoff_cb = dstoff & ~mask;
src_cb = ((uintptr_t)src) & ~mask;
src_co = ((uintptr_t)src) & mask;
dst_co = ((uintptr_t)(dst + dstoff)) & mask;
if (dst_co < src_co) {
dstoff_cb += NM_BUF_ALIGN;
}
dstoff = dstoff_cb + src_co;
copy_len += src_co;
nm_prdis("send [%d] %d(%d) bytes at %s:%d",
i, (int)copy_len, (int)dst_len,
dst_na->up.name, j);
/* round to a multiple of 64 */
copy_len = (copy_len + 63) & ~63;
NM_IFPNAME(dst_ifp), j);
if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
copy_len > NETMAP_BUF_SIZE(&na->up))) {
nm_prlim(5, "invalid len %d, down to 64", (int)copy_len);
copy_len = dst_len = 64; // XXX
if (unlikely(dstoff > NETMAP_BUF_SIZE(&dst_na->up) ||
dst_len > NETMAP_BUF_SIZE(&dst_na->up) - dstoff)) {
nm_prlim(5, "dropping packet/fragment of len %zu, dest offset %llu",
dst_len, (unsigned long long)dstoff);
copy_len = dst_len = 0;
dstoff = nm_get_offset(kring, slot);
}
if (ft_p->ft_flags & NS_INDIRECT) {
if (copyin(src, dst, copy_len)) {
// invalid user pointer, pretend len is 0
@ -1130,10 +1007,11 @@ nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
}
} else {
//memcpy(dst, src, copy_len);
pkt_copy(src, dst, (int)copy_len);
pkt_copy((char *)src_cb, dst + dstoff_cb, (int)copy_len);
}
slot->len = dst_len;
slot->flags = (cnt << 8)| NS_MOREFRAG;
nm_write_offset(kring, slot, dstoff);
j = nm_next(j, lim);
needed--;
ft_p++;
@ -1312,7 +1190,7 @@ netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
if (netmap_verbose)
nm_prinf("max frame size %u", vpna->mfs);
na->na_flags |= NAF_BDG_MAYSLEEP;
na->na_flags |= (NAF_BDG_MAYSLEEP | NAF_OFFSETS);
/* persistent VALE ports look like hw devices
* with a native netmap adapter
*/
@ -1409,6 +1287,7 @@ netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
na->nm_krings_create = netmap_vale_bwrap_krings_create;
na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
na->nm_notify = netmap_bwrap_notify;
bna->nm_intr_notify = netmap_bwrap_intr_notify;
bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
/* Set the mfs, needed on the VALE mismatch datapath. */
bna->up.mfs = NM_BDG_MFS_DEFAULT;

View File

@ -235,6 +235,7 @@ struct netmap_slot {
#define NETMAP_MAX_FRAGS 64 /* max number of fragments */
/*
* struct netmap_ring
*
@ -296,6 +297,19 @@ struct netmap_ring {
struct timeval ts; /* (k) time of last *sync() */
/* offset_mask is used to isolate the part of the ptr field
* in the slots used to contain an offset in the buffer.
* It is zero if the ring has not be opened using the
* NETMAP_REQ_OPT_OFFSETS option.
*/
const uint64_t offset_mask;
/* the alignment requirement, in bytes, for the start
* of the packets inside the buffers.
* User programs should take this alignment into
* account when specifing buffer-offsets in TX slots.
*/
const uint64_t buf_align;
/* opaque room for a mutex or similar object */
#if !defined(_WIN32) || defined(__CYGWIN__)
uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128];
@ -307,6 +321,7 @@ struct netmap_ring {
struct netmap_slot slot[0]; /* array of slots. */
};
/*
* RING FLAGS
*/
@ -561,6 +576,12 @@ enum {
*/
NETMAP_REQ_OPT_SYNC_KLOOP_MODE,
/* On NETMAP_REQ_REGISTER, ask for (part of) the ptr field in the
* slots of the registered rings to be used as an offset field
* for the start of the packets inside the netmap buffer.
*/
NETMAP_REQ_OPT_OFFSETS,
/* This is a marker to count the number of available options.
* New options must be added above it. */
NETMAP_REQ_OPT_MAX,
@ -811,7 +832,16 @@ static inline void nm_ldld_barrier(void)
#define nm_ldld_barrier atomic_thread_fence_acq
#define nm_stld_barrier atomic_thread_fence_seq_cst
#else /* !_KERNEL */
#ifdef __cplusplus
#include <atomic>
using std::memory_order_release;
using std::memory_order_acquire;
#else /* __cplusplus */
#include <stdatomic.h>
#endif /* __cplusplus */
static inline void nm_stst_barrier(void)
{
atomic_thread_fence(memory_order_release);
@ -933,4 +963,29 @@ struct nmreq_opt_csb {
uint64_t csb_ktoa;
};
/* option NETMAP_REQ_OPT_OFFSETS */
struct nmreq_opt_offsets {
struct nmreq_option nro_opt;
/* the user must declare the maximum offset value that she is
* going to put into the offset slot-fields. Any larger value
* found at runtime will be cropped. On output the (possibly
* higher) effective max value is returned.
*/
uint64_t nro_max_offset;
/* optional initial offset value, to be set in all slots. */
uint64_t nro_initial_offset;
/* number of bits in the lower part of the 'ptr' field to be
* used as the offset field. On output the (possibily larger)
* effective number of bits is returned.
* 0 means: use the whole ptr field.
*/
uint32_t nro_offset_bits;
/* required alignment for the beginning of the packets
* (base of the buffer plus offset) in the TX slots.
*/
uint32_t nro_tx_align;
/* Reserved: set to zero. */
uint64_t nro_min_gap;
};
#endif /* _NET_NETMAP_H_ */

View File

@ -144,6 +144,7 @@
*
*/
/*
* struct nmreq overlays a struct ifreq (just the name)
*/
@ -215,6 +216,7 @@ struct nmreq {
#define NETMAP_SETSOCKOPT _IO('i', 140)
#define NETMAP_GETSOCKOPT _IO('i', 141)
/* These linknames are for the Netmap Core Driver */
#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP"
#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap"

View File

@ -123,12 +123,29 @@
( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
(ring)->nr_buf_size )
/* read the offset field in a ring's slot */
#define NETMAP_ROFFSET(ring, slot) \
((slot)->ptr & (ring)->offset_mask)
/* update the offset field in a ring's slot */
#define NETMAP_WOFFSET(ring, slot, offset) \
do { (slot)->ptr = ((slot)->ptr & ~(ring)->offset_mask) | \
((offset) & (ring)->offset_mask); } while (0)
/* obtain the start of the buffer pointed to by a ring's slot, taking the
* offset field into accout
*/
#define NETMAP_BUF_OFFSET(ring, slot) \
(NETMAP_BUF(ring, (slot)->buf_idx) + NETMAP_ROFFSET(ring, slot))
static inline uint32_t
nm_ring_next(struct netmap_ring *r, uint32_t i)
{
return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1);
}
/*
* Return 1 if we have pending transmissions in the tx ring.
* When everything is complete ring->head = ring->tail + 1 (modulo ring size)
@ -350,6 +367,7 @@ enum {
NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */
};
/*
* nm_close() closes and restores the port to its previous state
*/
@ -430,6 +448,7 @@ win_remove_fd_record(int fd)
}
}
HANDLE
win_get_netmap_handle(int fd)
{
@ -916,6 +935,7 @@ nm_open(const char *ifname, const struct nmreq *req,
goto fail;
}
#ifdef DEBUG_NETMAP_USER
{ /* debugging code */
int i;
@ -947,6 +967,7 @@ nm_open(const char *ifname, const struct nmreq *req,
return NULL;
}
static int
nm_close(struct nm_desc *d)
{
@ -1059,6 +1080,7 @@ nm_inject(struct nm_desc *d, const void *buf, size_t size)
return 0; /* fail */
}
/*
* Same prototype as pcap_dispatch(), only need to cast.
*/
@ -1108,7 +1130,7 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
slot = &ring->slot[i];
d->hdr.len += slot->len;
nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx);
if (oldbuf != NULL && nbuf - oldbuf == (int)ring->nr_buf_size &&
if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size &&
oldlen == ring->nr_buf_size) {
d->hdr.caplen += slot->len;
oldbuf = nbuf;

View File

@ -44,8 +44,8 @@
/* PCI identifiers and PCI BARs for ptnetmap-memdev and ptnet. */
#define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev"
#define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */
#define PTNETMAP_PCI_DEVICE_ID 0xcccc /* memory device */
#define PTNETMAP_PCI_NETIF_ID 0xcccd /* ptnet network interface */
#define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */
#define PTNETMAP_PCI_NETIF_ID 0x000d /* ptnet network interface */
#define PTNETMAP_IO_PCI_BAR 0
#define PTNETMAP_MEM_PCI_BAR 1
#define PTNETMAP_MSIX_PCI_BAR 2