diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index f9698096b47a..18de5ef205eb 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -805,6 +805,14 @@ netmap_update_config(struct netmap_adapter *na) static int netmap_txsync_to_host(struct netmap_kring *kring, int flags); static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags); +static int +netmap_default_bufcfg(struct netmap_kring *kring, uint64_t target) +{ + kring->hwbuf_len = target; + kring->buf_align = 0; /* no alignment */ + return 0; +} + /* create the krings array and initialize the fields common to all adapters. * The array layout is this: * @@ -885,12 +893,16 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom) kring->nr_pending_mode = NKR_NETMAP_OFF; if (i < nma_get_nrings(na, t)) { kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync); + kring->nm_bufcfg = na->nm_bufcfg; + if (kring->nm_bufcfg == NULL) + kring->nm_bufcfg = netmap_default_bufcfg; } else { if (!(na->na_flags & NAF_HOST_RINGS)) kring->nr_kflags |= NKR_FAKERING; kring->nm_sync = (t == NR_TX ? netmap_txsync_to_host: netmap_rxsync_from_host); + kring->nm_bufcfg = netmap_default_bufcfg; } kring->nm_notify = na->nm_notify; kring->rhead = kring->rcur = kring->nr_hwcur = 0; @@ -969,20 +981,27 @@ netmap_hw_krings_delete(struct netmap_adapter *na) netmap_krings_delete(na); } -static void -netmap_mem_drop(struct netmap_adapter *na) +void +netmap_mem_restore(struct netmap_adapter *na) { - int last = netmap_mem_deref(na->nm_mem, na); - /* if the native allocator had been overrided on regif, - * restore it now and drop the temporary one - */ - if (last && na->nm_mem_prev) { + if (na->nm_mem_prev) { netmap_mem_put(na->nm_mem); na->nm_mem = na->nm_mem_prev; na->nm_mem_prev = NULL; } } +static void +netmap_mem_drop(struct netmap_adapter *na) +{ + /* if the native allocator had been overrided on regif, + * restore it now and drop the temporary one + */ + if (netmap_mem_deref(na->nm_mem, na)) { + netmap_mem_restore(na); + } +} + /* * Undo everything that was done in netmap_do_regif(). In particular, * call nm_register(ifp,0) to stop netmap mode on the interface and @@ -1571,7 +1590,7 @@ netmap_get_na(struct nmreq_header *hdr, if (error || *na != NULL) goto out; - /* try to see if this is a bridge port */ + /* try to see if this is a vale port */ error = netmap_get_vale_na(hdr, na, nmd, create); if (error) goto out; @@ -2232,6 +2251,198 @@ netmap_buf_size_validate(const struct netmap_adapter *na, unsigned mtu) { return 0; } +/* Handle the offset option, if present in the hdr. + * Returns 0 on success, or an error. + */ +static int +netmap_offsets_init(struct netmap_priv_d *priv, struct nmreq_header *hdr) +{ + struct nmreq_opt_offsets *opt; + struct netmap_adapter *na = priv->np_na; + struct netmap_kring *kring; + uint64_t mask = 0, bits = 0, maxbits = sizeof(uint64_t) * 8, + max_offset = 0, initial_offset = 0, min_gap = 0; + u_int i; + enum txrx t; + int error = 0; + + opt = (struct nmreq_opt_offsets *) + nmreq_getoption(hdr, NETMAP_REQ_OPT_OFFSETS); + if (opt == NULL) + return 0; + + if (!(na->na_flags & NAF_OFFSETS)) { + if (netmap_verbose) + nm_prerr("%s does not support offsets", + na->name); + error = EOPNOTSUPP; + goto out; + } + + /* check sanity of the opt values */ + max_offset = opt->nro_max_offset; + min_gap = opt->nro_min_gap; + initial_offset = opt->nro_initial_offset; + bits = opt->nro_offset_bits; + + if (bits > maxbits) { + if (netmap_verbose) + nm_prerr("bits: %llu too large (max %llu)", + (unsigned long long)bits, + (unsigned long long)maxbits); + error = EINVAL; + goto out; + } + /* we take bits == 0 as a request to use the entire field */ + if (bits == 0 || bits == maxbits) { + /* shifting a type by sizeof(type) is undefined */ + bits = maxbits; + mask = 0xffffffffffffffff; + } else { + mask = (1ULL << bits) - 1; + } + if (max_offset > NETMAP_BUF_SIZE(na)) { + if (netmap_verbose) + nm_prerr("max offset %llu > buf size %u", + (unsigned long long)max_offset, NETMAP_BUF_SIZE(na)); + error = EINVAL; + goto out; + } + if ((max_offset & mask) != max_offset) { + if (netmap_verbose) + nm_prerr("max offset %llu to large for %llu bits", + (unsigned long long)max_offset, + (unsigned long long)bits); + error = EINVAL; + goto out; + } + if (initial_offset > max_offset) { + if (netmap_verbose) + nm_prerr("initial offset %llu > max offset %llu", + (unsigned long long)initial_offset, + (unsigned long long)max_offset); + error = EINVAL; + goto out; + } + + /* initialize the kring and ring fields. */ + foreach_selected_ring(priv, t, i, kring) { + struct netmap_kring *kring = NMR(na, t)[i]; + struct netmap_ring *ring = kring->ring; + u_int j; + + /* it the ring is already in use we check that the + * new request is compatible with the existing one + */ + if (kring->offset_mask) { + if ((kring->offset_mask & mask) != mask || + kring->offset_max < max_offset) { + if (netmap_verbose) + nm_prinf("%s: cannot increase" + "offset mask and/or max" + "(current: mask=%llx,max=%llu", + kring->name, + (unsigned long long)kring->offset_mask, + (unsigned long long)kring->offset_max); + error = EBUSY; + goto out; + } + mask = kring->offset_mask; + max_offset = kring->offset_max; + } else { + kring->offset_mask = mask; + *(uint64_t *)(uintptr_t)&ring->offset_mask = mask; + kring->offset_max = max_offset; + kring->offset_gap = min_gap; + } + + /* if there is an initial offset, put it into + * all the slots + * + * Note: we cannot change the offsets if the + * ring is already in use. + */ + if (!initial_offset || kring->users > 1) + continue; + + for (j = 0; j < kring->nkr_num_slots; j++) { + struct netmap_slot *slot = ring->slot + j; + + nm_write_offset(kring, slot, initial_offset); + } + } + +out: + opt->nro_opt.nro_status = error; + if (!error) { + opt->nro_max_offset = max_offset; + } + return error; + +} + +static int +netmap_compute_buf_len(struct netmap_priv_d *priv) +{ + enum txrx t; + u_int i; + struct netmap_kring *kring; + int error = 0; + unsigned mtu = 0; + struct netmap_adapter *na = priv->np_na; + uint64_t target, maxframe; + + if (na->ifp != NULL) + mtu = nm_os_ifnet_mtu(na->ifp); + + foreach_selected_ring(priv, t, i, kring) { + + if (kring->users > 1) + continue; + + target = NETMAP_BUF_SIZE(kring->na) - + kring->offset_max; + if (!kring->offset_gap) + kring->offset_gap = + NETMAP_BUF_SIZE(kring->na); + if (kring->offset_gap < target) + target = kring->offset_gap; + + if (mtu) { + maxframe = mtu + ETH_HLEN + + ETH_FCS_LEN + VLAN_HLEN; + if (maxframe < target) { + target = kring->offset_gap; + } + } + + error = kring->nm_bufcfg(kring, target); + if (error) + goto out; + + *(uint64_t *)(uintptr_t)&kring->ring->buf_align = kring->buf_align; + + if (mtu && t == NR_RX && kring->hwbuf_len < mtu) { + if (!(na->na_flags & NAF_MOREFRAG)) { + nm_prerr("error: large MTU (%d) needed " + "but %s does not support " + "NS_MOREFRAG", mtu, + na->name); + error = EINVAL; + goto out; + } else { + nm_prinf("info: netmap application on " + "%s needs to support " + "NS_MOREFRAG " + "(MTU=%u,buf_size=%llu)", + kring->name, mtu, + (unsigned long long)kring->hwbuf_len); + } + } + } +out: + return error; +} /* * possibly move the interface to netmap-mode. @@ -2381,6 +2592,16 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, if (error) goto err_rel_excl; + /* initialize offsets if requested */ + error = netmap_offsets_init(priv, hdr); + if (error) + goto err_rel_excl; + + /* compute and validate the buf lenghts */ + error = netmap_compute_buf_len(priv); + if (error) + goto err_rel_excl; + /* in all cases, create a new netmap if */ nifp = netmap_mem_if_new(na, priv); if (nifp == NULL) { @@ -2713,17 +2934,12 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, } #ifdef WITH_VALE case NETMAP_REQ_VALE_ATTACH: { - error = netmap_vale_attach(hdr, NULL /* userspace request */); + error = netmap_bdg_attach(hdr, NULL /* userspace request */); break; } case NETMAP_REQ_VALE_DETACH: { - error = netmap_vale_detach(hdr, NULL /* userspace request */); - break; - } - - case NETMAP_REQ_VALE_LIST: { - error = netmap_vale_list(hdr); + error = netmap_bdg_detach(hdr, NULL /* userspace request */); break; } @@ -2795,6 +3011,11 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, break; } + case NETMAP_REQ_VALE_LIST: { + error = netmap_vale_list(hdr); + break; + } + case NETMAP_REQ_VALE_NEWIF: { error = nm_vi_create(hdr); break; @@ -2804,13 +3025,13 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, error = nm_vi_destroy(hdr->nr_name); break; } +#endif /* WITH_VALE */ case NETMAP_REQ_VALE_POLLING_ENABLE: case NETMAP_REQ_VALE_POLLING_DISABLE: { error = nm_bdg_polling(hdr); break; } -#endif /* WITH_VALE */ case NETMAP_REQ_POOLS_INFO_GET: { /* Get information from the memory allocator used for * hdr->nr_name. */ @@ -3029,6 +3250,9 @@ nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size) case NETMAP_REQ_OPT_SYNC_KLOOP_MODE: rv = sizeof(struct nmreq_opt_sync_kloop_mode); break; + case NETMAP_REQ_OPT_OFFSETS: + rv = sizeof(struct nmreq_opt_offsets); + break; } /* subtract the common header */ return rv - sizeof(struct nmreq_option); @@ -3733,16 +3957,14 @@ netmap_attach_common(struct netmap_adapter *na) na->active_fds = 0; if (na->nm_mem == NULL) { - /* use the global allocator */ - na->nm_mem = netmap_mem_get(&nm_mem); + /* use iommu or global allocator */ + na->nm_mem = netmap_mem_get_iommu(na); } -#ifdef WITH_VALE if (na->nm_bdg_attach == NULL) /* no special nm_bdg_attach callback. On VALE * attach, we need to interpose a bwrap */ na->nm_bdg_attach = netmap_default_bdg_attach; -#endif return 0; } diff --git a/sys/dev/netmap/netmap_bdg.c b/sys/dev/netmap/netmap_bdg.c index 4d18859e2091..57659f3a7a6e 100644 --- a/sys/dev/netmap/netmap_bdg.c +++ b/sys/dev/netmap/netmap_bdg.c @@ -540,6 +540,85 @@ netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, return error; } +/* Process NETMAP_REQ_VALE_ATTACH. + */ +int +netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter * vpna; + struct netmap_adapter *na = NULL; + struct netmap_mem_d *nmd = NULL; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + if (req->reg.nr_mem_id) { + nmd = netmap_mem_find(req->reg.nr_mem_id); + if (nmd == NULL) { + error = EINVAL; + goto unlock_exit; + } + } + + /* check for existing one */ + error = netmap_get_vale_na(hdr, &na, nmd, 0); + if (na) { + error = EBUSY; + goto unref_exit; + } + error = netmap_get_vale_na(hdr, &na, + nmd, 1 /* create if not exists */); + if (error) { /* no device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } + + if (NETMAP_OWNED_BY_ANY(na)) { + error = EBUSY; + goto unref_exit; + } + + if (na->nm_bdg_ctl) { + /* nop for VALE ports. The bwrap needs to put the hwna + * in netmap mode (see netmap_bwrap_bdg_ctl) + */ + error = na->nm_bdg_ctl(hdr, na); + if (error) + goto unref_exit; + nm_prdis("registered %s to netmap-mode", na->name); + } + vpna = (struct netmap_vp_adapter *)na; + req->port_index = vpna->bdg_port; + + if (nmd) + netmap_mem_put(nmd); + + NMG_UNLOCK(); + return 0; + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + if (nmd) + netmap_mem_put(nmd); + + NMG_UNLOCK(); + return error; +} + int nm_is_bwrap(struct netmap_adapter *na) @@ -547,6 +626,74 @@ nm_is_bwrap(struct netmap_adapter *na) return na->nm_register == netmap_bwrap_reg; } +/* Process NETMAP_REQ_VALE_DETACH. + */ +int +netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token) +{ + int error; + + NMG_LOCK(); + error = netmap_bdg_detach_locked(hdr, auth_token); + NMG_UNLOCK(); + return error; +} + +int +netmap_bdg_detach_locked(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter *vpna; + struct netmap_adapter *na; + struct nm_bridge *b = NULL; + int error; + + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto error_exit; + } + + error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); + if (error) { /* no device, or another bridge or user owns the device */ + goto error_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto error_exit; + } else if (nm_is_bwrap(na) && + ((struct netmap_bwrap_adapter *)na)->na_polling_state) { + /* Don't detach a NIC with polling */ + error = EBUSY; + goto unref_exit; + } + + vpna = (struct netmap_vp_adapter *)na; + if (na->na_vp != vpna) { + /* trying to detach first attach of VALE persistent port attached + * to 2 bridges + */ + error = EBUSY; + goto unref_exit; + } + nmreq_det->port_index = vpna->bdg_port; + + if (na->nm_bdg_ctl) { + /* remove the port from bridge. The bwrap + * also needs to put the hwna in normal mode + */ + error = na->nm_bdg_ctl(hdr, na); + } + +unref_exit: + netmap_adapter_put(na); +error_exit: + return error; + +} + struct nm_bdg_polling_state; struct @@ -1092,7 +1239,7 @@ netmap_bwrap_dtor(struct netmap_adapter *na) * hwna rx ring. * The bridge wrapper then sends the packets through the bridge. */ -static int +int netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; @@ -1217,7 +1364,7 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* intercept the hwna nm_nofify callback on the hw rings */ for (i = 0; i < hwna->num_rx_rings; i++) { hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; - hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; + hwna->rx_rings[i]->nm_notify = bna->nm_intr_notify; } i = hwna->num_rx_rings; /* for safety */ /* save the host ring notify unconditionally */ @@ -1250,12 +1397,6 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) hwna->na_lut.objtotal = 0; hwna->na_lut.objsize = 0; - /* pass ownership of the netmap rings to the hwna */ - for_rx_tx(t) { - for (i = 0; i < netmap_all_rings(na, t); i++) { - NMR(na, t)[i]->ring = NULL; - } - } /* reset the number of host rings to default */ for_rx_tx(t) { nma_set_host_nrings(hwna, t, 1); @@ -1275,6 +1416,11 @@ netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) struct netmap_adapter *hwna = bna->hwna; int error; + /* cache the lut in the embedded host adapter */ + error = netmap_mem_get_lut(hwna->nm_mem, &bna->host.up.na_lut); + if (error) + return error; + /* Forward the request to the hwna. It may happen that nobody * registered hwna yet, so netmap_mem_get_lut() may have not * been called yet. */ @@ -1289,9 +1435,69 @@ netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) info->num_rx_descs = hwna->num_tx_desc; info->rx_buf_maxsize = hwna->rx_buf_maxsize; + if (na->na_flags & NAF_HOST_RINGS) { + struct netmap_adapter *hostna = &bna->host.up; + enum txrx t; + + /* limit the number of host rings to that of hw */ + if (na->na_flags & NAF_HOST_ALL) { + hostna->num_tx_rings = nma_get_nrings(hwna, NR_RX); + hostna->num_rx_rings = nma_get_nrings(hwna, NR_TX); + } else { + nm_bound_var(&hostna->num_tx_rings, 1, 1, + nma_get_nrings(hwna, NR_TX), NULL); + nm_bound_var(&hostna->num_rx_rings, 1, 1, + nma_get_nrings(hwna, NR_RX), NULL); + } + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); + u_int nr = nma_get_nrings(hostna, t); + + nma_set_host_nrings(na, t, nr); + if (nma_get_host_nrings(hwna, t) < nr) { + nma_set_host_nrings(hwna, t, nr); + } + nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); + } + } + return 0; } +/* nm_bufcfg callback for bwrap */ +static int +netmap_bwrap_bufcfg(struct netmap_kring *kring, uint64_t target) +{ + struct netmap_adapter *na = kring->na; + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + struct netmap_kring *hwkring; + enum txrx r; + int error; + + /* we need the hw kring that corresponds to the bwrap one: + * remember that rx and tx are swapped + */ + r = nm_txrx_swap(kring->tx); + hwkring = NMR(hwna, r)[kring->ring_id]; + + /* copy down the offset information, forward the request + * and copy up the results + */ + hwkring->offset_mask = kring->offset_mask; + hwkring->offset_max = kring->offset_max; + hwkring->offset_gap = kring->offset_gap; + + error = hwkring->nm_bufcfg(hwkring, target); + if (error) + return error; + + kring->hwbuf_len = hwkring->hwbuf_len; + kring->buf_align = hwkring->buf_align; + + return 0; +} /* nm_krings_create callback for bwrap */ int @@ -1314,6 +1520,9 @@ netmap_bwrap_krings_create_common(struct netmap_adapter *na) for_rx_tx(t) { for (i = 0; i < netmap_all_rings(hwna, t); i++) { NMR(hwna, t)[i]->users++; + /* this to prevent deletion of the rings through + * our krings, instead of through the hwna ones */ + NMR(na, t)[i]->nr_kflags |= NKR_NEEDRING; } } @@ -1355,6 +1564,7 @@ netmap_bwrap_krings_create_common(struct netmap_adapter *na) for_rx_tx(t) { for (i = 0; i < netmap_all_rings(hwna, t); i++) { NMR(hwna, t)[i]->users--; + NMR(na, t)[i]->users--; } } hwna->nm_krings_delete(hwna); @@ -1377,6 +1587,7 @@ netmap_bwrap_krings_delete_common(struct netmap_adapter *na) for_rx_tx(t) { for (i = 0; i < netmap_all_rings(hwna, t); i++) { NMR(hwna, t)[i]->users--; + NMR(na, t)[i]->users--; } } @@ -1480,6 +1691,7 @@ netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) error = netmap_do_regif(npriv, na, hdr); if (error) { netmap_priv_delete(npriv); + netmap_mem_restore(bna->hwna); return error; } bna->na_kpriv = npriv; @@ -1490,6 +1702,7 @@ netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) netmap_priv_delete(bna->na_kpriv); bna->na_kpriv = NULL; na->na_flags &= ~NAF_BUSY; + netmap_mem_restore(bna->hwna); } return error; @@ -1527,6 +1740,7 @@ netmap_bwrap_attach_common(struct netmap_adapter *na, } na->nm_dtor = netmap_bwrap_dtor; na->nm_config = netmap_bwrap_config; + na->nm_bufcfg = netmap_bwrap_bufcfg; na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; na->pdev = hwna->pdev; na->nm_mem = netmap_mem_get(hwna->nm_mem); @@ -1546,25 +1760,8 @@ netmap_bwrap_attach_common(struct netmap_adapter *na, na->na_flags |= NAF_HOST_RINGS; hostna = &bna->host.up; - /* limit the number of host rings to that of hw */ - nm_bound_var(&hostna->num_tx_rings, 1, 1, - nma_get_nrings(hwna, NR_TX), NULL); - nm_bound_var(&hostna->num_rx_rings, 1, 1, - nma_get_nrings(hwna, NR_RX), NULL); - snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name); hostna->ifp = hwna->ifp; - for_rx_tx(t) { - enum txrx r = nm_txrx_swap(t); - u_int nr = nma_get_nrings(hostna, t); - - nma_set_nrings(hostna, t, nr); - nma_set_host_nrings(na, t, nr); - if (nma_get_host_nrings(hwna, t) < nr) { - nma_set_host_nrings(hwna, t, nr); - } - nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); - } // hostna->nm_txsync = netmap_bwrap_host_txsync; // hostna->nm_rxsync = netmap_bwrap_host_rxsync; hostna->nm_mem = netmap_mem_get(na->nm_mem); @@ -1574,6 +1771,7 @@ netmap_bwrap_attach_common(struct netmap_adapter *na, hostna->na_hostvp = &bna->host; hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; + /* bwrap_config() will determine the number of host rings */ } if (hwna->na_flags & NAF_MOREFRAG) na->na_flags |= NAF_MOREFRAG; diff --git a/sys/dev/netmap/netmap_bdg.h b/sys/dev/netmap/netmap_bdg.h index e4683885e66c..a88eaf11b07c 100644 --- a/sys/dev/netmap/netmap_bdg.h +++ b/sys/dev/netmap/netmap_bdg.h @@ -178,8 +178,10 @@ int netmap_bdg_free(struct nm_bridge *b); void netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw); int netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na); int netmap_bwrap_reg(struct netmap_adapter *, int onoff); +int netmap_bdg_detach_locked(struct nmreq_header *hdr, void *auth_token); int netmap_vp_reg(struct netmap_adapter *na, int onoff); int netmap_vp_rxsync(struct netmap_kring *kring, int flags); +int netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags); int netmap_bwrap_notify(struct netmap_kring *kring, int flags); int netmap_bwrap_attach_common(struct netmap_adapter *na, struct netmap_adapter *hwna); diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 2cedea4440fe..a47cb508de04 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -1057,7 +1057,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, vm_page_replace(page, object, (*mres)->pindex, *mres); *mres = page; } - vm_page_valid(page); + page->valid = VM_PAGE_BITS_ALL; return (VM_PAGER_OK); } diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index 09ba550cae92..f999576736fb 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -106,7 +106,7 @@ __FBSDID("$FreeBSD$"); static inline struct mbuf * nm_os_get_mbuf(struct ifnet *ifp, int len) { - return alloc_skb(ifp->needed_headroom + len + + return alloc_skb(LL_RESERVED_SPACE(ifp) + len + ifp->needed_tailroom, GFP_ATOMIC); } diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index fd9db5842df3..d9ae6a4f2054 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -459,8 +459,16 @@ struct netmap_kring { * On a NIC reset, the NIC ring indexes may be reset but the * indexes in the netmap rings remain the same. nkr_hwofs * keeps track of the offset between the two. + * + * Moreover, during reset, we can restore only the subset of + * the NIC ring that corresponds to the kernel-owned part of + * the netmap ring. The rest of the slots must be restored + * by the *sync routines when the user releases more slots. + * The nkr_to_refill field keeps track of the number of slots + * that still need to be restored. */ int32_t nkr_hwofs; + int32_t nkr_to_refill; /* last_reclaim is opaque marker to help reduce the frequency * of operations such as reclaiming tx buffers. A possible use @@ -535,6 +543,36 @@ struct netmap_kring { uint32_t pipe_tail; /* hwtail updated by the other end */ #endif /* WITH_PIPES */ + /* mask for the offset-related part of the ptr field in the slots */ + uint64_t offset_mask; + /* maximum user-specified offset, as stipulated at bind time. + * Larger offset requests will be silently capped to offset_max. + */ + uint64_t offset_max; + /* minimum gap between two consecutive offsets into the same + * buffer, as stipulated at bind time. This is used to choose + * the hwbuf_len, but is not otherwise checked for compliance + * at runtime. + */ + uint64_t offset_gap; + + /* size of hardware buffer. This may be less than the size of + * the netmap buffers because of non-zero offsets, or because + * the netmap buffer size exceeds the capability of the hardware. + */ + uint64_t hwbuf_len; + + /* required aligment (in bytes) for the buffers used by this ring. + * Netmap buffers are aligned to cachelines, which should suffice + * for most NICs. If the user is passing offsets, though, we need + * to check that the resulting buf address complies with any + * alignment restriction. + */ + uint64_t buf_align; + + /* harware specific logic for the selection of the hwbuf_len */ + int (*nm_bufcfg)(struct netmap_kring *kring, uint64_t target); + int (*save_notify)(struct netmap_kring *kring, int flags); #ifdef WITH_MONITOR @@ -719,6 +757,8 @@ struct netmap_adapter { #define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */ /* free */ #define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */ +#define NAF_OFFSETS 1024 /* the adapter supports the slot offsets */ +#define NAF_HOST_ALL 2048 /* the adapter wants as many host rings as hw */ #define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */ #define NAF_BUSY (1U<<31) /* the adapter is used internally and * cannot be registered from userspace @@ -782,6 +822,22 @@ struct netmap_adapter { * nm_config() returns configuration information from the OS * Called with NMG_LOCK held. * + * nm_bufcfg() + * the purpose of this callback is to fill the kring->hwbuf_len + * (l) and kring->buf_align fields. The l value is most important + * for RX rings, where we want to disallow writes outside of the + * netmap buffer. The l value must be computed taking into account + * the stipulated max_offset (o), possibily increased if there are + * alignment constraints, the maxframe (m), if known, and the + * current NETMAP_BUF_SIZE (b) of the memory region used by the + * adapter. We want the largest supported l such that o + l <= b. + * If m is known to be <= b - o, the callback may also choose the + * largest l <= b, ignoring the offset. The buf_align field is + * most important for TX rings when there are offsets. The user + * will see this value in the ring->buf_align field. Misaligned + * offsets will cause the corresponding packets to be silently + * dropped. + * * nm_krings_create() create and init the tx_rings and * rx_rings arrays of kring structures. In particular, * set the nm_sync callbacks for each ring. @@ -811,6 +867,7 @@ struct netmap_adapter { int (*nm_txsync)(struct netmap_kring *kring, int flags); int (*nm_rxsync)(struct netmap_kring *kring, int flags); int (*nm_notify)(struct netmap_kring *kring, int flags); + int (*nm_bufcfg)(struct netmap_kring *kring, uint64_t target); #define NAF_FORCE_READ 1 #define NAF_FORCE_RECLAIM 2 #define NAF_CAN_FORWARD_DOWN 4 @@ -1096,12 +1153,13 @@ struct netmap_bwrap_adapter { * here its original value, to be restored at detach */ struct netmap_vp_adapter *saved_na_vp; + int (*nm_intr_notify)(struct netmap_kring *kring, int flags); }; int nm_bdg_polling(struct nmreq_header *hdr); +int netmap_bdg_attach(struct nmreq_header *hdr, void *auth_token); +int netmap_bdg_detach(struct nmreq_header *hdr, void *auth_token); #ifdef WITH_VALE -int netmap_vale_attach(struct nmreq_header *hdr, void *auth_token); -int netmap_vale_detach(struct nmreq_header *hdr, void *auth_token); int netmap_vale_list(struct nmreq_header *hdr); int netmap_vi_create(struct nmreq_header *hdr, int); int nm_vi_create(struct nmreq_header *); @@ -1431,6 +1489,12 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *); } while (0) #endif +#define NM_CHECK_ADDR_LEN_OFF(na_, l_, o_) do { \ + if ((l_) + (o_) < (l_) || \ + (l_) + (o_) > NETMAP_BUF_SIZE(na_)) { \ + (l_) = NETMAP_BUF_SIZE(na_) - (o_); \ + } } while (0) + /*---------------------------------------------------------------*/ /* @@ -1493,6 +1557,7 @@ int netmap_get_na(struct nmreq_header *hdr, struct netmap_adapter **na, void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp); int netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na); +void netmap_mem_restore(struct netmap_adapter *na); #ifdef WITH_VALE uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, @@ -1680,7 +1745,7 @@ extern int netmap_generic_txqdisc; /* Assigns the device IOMMU domain to an allocator. * Returns -ENOMEM in case the domain is different */ -#define nm_iommu_group_id(dev) (0) +#define nm_iommu_group_id(dev) (-1) /* Callback invoked by the dma machinery after a successful dmamap_load */ static void netmap_dmamap_cb(__unused void *arg, @@ -1890,6 +1955,9 @@ struct plut_entry { struct netmap_obj_pool; +/* alignment for netmap buffers */ +#define NM_BUF_ALIGN 64 + /* * NMB return the virtual address of a buffer (buffer 0 on bad index) * PNMB also fills the physical address @@ -1919,6 +1987,40 @@ PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp) return ret; } +static inline void +nm_write_offset(struct netmap_kring *kring, + struct netmap_slot *slot, uint64_t offset) +{ + slot->ptr = (slot->ptr & ~kring->offset_mask) | + (offset & kring->offset_mask); +} + +static inline uint64_t +nm_get_offset(struct netmap_kring *kring, struct netmap_slot *slot) +{ + uint64_t offset = (slot->ptr & kring->offset_mask); + if (unlikely(offset > kring->offset_max)) + offset = kring->offset_max; + return offset; +} + +static inline void * +NMB_O(struct netmap_kring *kring, struct netmap_slot *slot) +{ + void *addr = NMB(kring->na, slot); + return (char *)addr + nm_get_offset(kring, slot); +} + +static inline void * +PNMB_O(struct netmap_kring *kring, struct netmap_slot *slot, uint64_t *pp) +{ + void *addr = PNMB(kring->na, slot, pp); + uint64_t offset = nm_get_offset(kring, slot); + addr = (char *)addr + offset; + *pp += offset; + return addr; +} + /* * Structure associated to each netmap file descriptor. @@ -2418,4 +2520,15 @@ void netmap_uninit_bridges(void); #define CSB_WRITE(csb, field, v) (suword32(&csb->field, v)) #endif /* ! linux */ +/* some macros that may not be defined */ +#ifndef ETH_HLEN +#define ETH_HLEN 6 +#endif +#ifndef ETH_FCS_LEN +#define ETH_FCS_LEN 4 +#endif +#ifndef VLAN_HLEN +#define VLAN_HLEN 4 +#endif + #endif /* _NET_NETMAP_KERN_H_ */ diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index 7d798ee5a7fa..069e0fa75b34 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -146,16 +146,19 @@ struct netmap_mem_ops { vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t); int (*nmd_config)(struct netmap_mem_d *); - int (*nmd_finalize)(struct netmap_mem_d *); - void (*nmd_deref)(struct netmap_mem_d *); + int (*nmd_finalize)(struct netmap_mem_d *, struct netmap_adapter *); + void (*nmd_deref)(struct netmap_mem_d *, struct netmap_adapter *); ssize_t (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr); void (*nmd_delete)(struct netmap_mem_d *); - struct netmap_if * (*nmd_if_new)(struct netmap_adapter *, - struct netmap_priv_d *); - void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *); - int (*nmd_rings_create)(struct netmap_adapter *); - void (*nmd_rings_delete)(struct netmap_adapter *); + struct netmap_if * (*nmd_if_new)(struct netmap_mem_d *, + struct netmap_adapter *, struct netmap_priv_d *); + void (*nmd_if_delete)(struct netmap_mem_d *, + struct netmap_adapter *, struct netmap_if *); + int (*nmd_rings_create)(struct netmap_mem_d *, + struct netmap_adapter *); + void (*nmd_rings_delete)(struct netmap_mem_d *, + struct netmap_adapter *); }; struct netmap_mem_d { @@ -165,6 +168,7 @@ struct netmap_mem_d { u_int flags; #define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */ #define NETMAP_MEM_HIDDEN 0x8 /* beeing prepared */ +#define NETMAP_MEM_NOMAP 0x10 /* do not map/unmap pdevs */ int lasterr; /* last error for curr config */ int active; /* active users */ int refcount; @@ -267,7 +271,7 @@ netmap_mem_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) struct netmap_mem_d *nmd = na->nm_mem; NMA_LOCK(nmd); - nifp = nmd->ops->nmd_if_new(na, priv); + nifp = nmd->ops->nmd_if_new(nmd, na, priv); NMA_UNLOCK(nmd); return nifp; @@ -279,7 +283,7 @@ netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nif) struct netmap_mem_d *nmd = na->nm_mem; NMA_LOCK(nmd); - nmd->ops->nmd_if_delete(na, nif); + nmd->ops->nmd_if_delete(nmd, na, nif); NMA_UNLOCK(nmd); } @@ -290,7 +294,7 @@ netmap_mem_rings_create(struct netmap_adapter *na) struct netmap_mem_d *nmd = na->nm_mem; NMA_LOCK(nmd); - rv = nmd->ops->nmd_rings_create(na); + rv = nmd->ops->nmd_rings_create(nmd, na); NMA_UNLOCK(nmd); return rv; @@ -302,13 +306,13 @@ netmap_mem_rings_delete(struct netmap_adapter *na) struct netmap_mem_d *nmd = na->nm_mem; NMA_LOCK(nmd); - nmd->ops->nmd_rings_delete(na); + nmd->ops->nmd_rings_delete(nmd, na); NMA_UNLOCK(nmd); } static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *); static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *); -static int nm_mem_assign_group(struct netmap_mem_d *, struct device *); +static int nm_mem_check_group(struct netmap_mem_d *, struct device *); static void nm_mem_release_id(struct netmap_mem_d *); nm_memid_t @@ -319,14 +323,14 @@ netmap_mem_get_id(struct netmap_mem_d *nmd) #ifdef NM_DEBUG_MEM_PUTGET #define NM_DBG_REFC(nmd, func, line) \ - nm_prinf("%d mem[%d] -> %d", line, (nmd)->nm_id, (nmd)->refcount); + nm_prinf("%d mem[%d:%d] -> %d", line, (nmd)->nm_id, (nmd)->nm_grp, (nmd)->refcount); #else #define NM_DBG_REFC(nmd, func, line) #endif /* circular list of all existing allocators */ static struct netmap_mem_d *netmap_last_mem_d = &nm_mem; -NM_MTX_T nm_mem_list_lock; +static NM_MTX_T nm_mem_list_lock; struct netmap_mem_d * __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line) @@ -356,7 +360,7 @@ int netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) { int lasterr = 0; - if (nm_mem_assign_group(nmd, na->pdev) < 0) { + if (nm_mem_check_group(nmd, na->pdev) < 0) { return ENOMEM; } @@ -367,9 +371,9 @@ netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) nmd->active++; - nmd->lasterr = nmd->ops->nmd_finalize(nmd); + nmd->lasterr = nmd->ops->nmd_finalize(nmd, na); - if (!nmd->lasterr && na->pdev) { + if (!nmd->lasterr && !(nmd->flags & NETMAP_MEM_NOMAP)) { nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); } @@ -473,7 +477,7 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) { int last_user = 0; NMA_LOCK(nmd); - if (na->active_fds <= 0) + if (na->active_fds <= 0 && !(nmd->flags & NETMAP_MEM_NOMAP)) netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na); if (nmd->active == 1) { last_user = 1; @@ -484,11 +488,10 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) */ netmap_mem_init_bitmaps(nmd); } - nmd->ops->nmd_deref(nmd); + nmd->ops->nmd_deref(nmd, na); nmd->active--; if (last_user) { - nmd->nm_grp = -1; nmd->lasterr = 0; } @@ -584,6 +587,7 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */ .name = "1" }; +static struct netmap_mem_d nm_mem_blueprint; /* blueprint for the private memory allocators */ /* XXX clang is not happy about using name as a print format */ @@ -649,7 +653,7 @@ DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf); /* call with nm_mem_list_lock held */ static int -nm_mem_assign_id_locked(struct netmap_mem_d *nmd) +nm_mem_assign_id_locked(struct netmap_mem_d *nmd, int grp_id) { nm_memid_t id; struct netmap_mem_d *scan = netmap_last_mem_d; @@ -663,6 +667,7 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd) scan = scan->next; if (id != scan->nm_id) { nmd->nm_id = id; + nmd->nm_grp = grp_id; nmd->prev = scan->prev; nmd->next = scan; scan->prev->next = nmd; @@ -680,12 +685,12 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd) /* call with nm_mem_list_lock *not* held */ static int -nm_mem_assign_id(struct netmap_mem_d *nmd) +nm_mem_assign_id(struct netmap_mem_d *nmd, int grp_id) { int ret; NM_MTX_LOCK(nm_mem_list_lock); - ret = nm_mem_assign_id_locked(nmd); + ret = nm_mem_assign_id_locked(nmd, grp_id); NM_MTX_UNLOCK(nm_mem_list_lock); return ret; @@ -725,21 +730,24 @@ netmap_mem_find(nm_memid_t id) } static int -nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev) +nm_mem_check_group(struct netmap_mem_d *nmd, struct device *dev) { int err = 0, id; + + /* Skip not hw adapters. + * Vale port can use particular allocator through vale-ctl -m option + */ + if (!dev) + return 0; id = nm_iommu_group_id(dev); if (netmap_debug & NM_DEBUG_MEM) nm_prinf("iommu_group %d", id); NMA_LOCK(nmd); - if (nmd->nm_grp < 0) - nmd->nm_grp = id; - if (nmd->nm_grp != id) { if (netmap_verbose) - nm_prerr("iommu group mismatch: %u vs %u", + nm_prerr("iommu group mismatch: %d vs %d", nmd->nm_grp, id); nmd->lasterr = err = ENOMEM; } @@ -1327,7 +1335,7 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj p->r_objsize = objsize; #define MAX_CLUSTSIZE (1<<22) // 4 MB -#define LINE_ROUND NM_CACHE_ALIGN // 64 +#define LINE_ROUND NM_BUF_ALIGN // 64 if (objsize >= MAX_CLUSTSIZE) { /* we could do it but there is no point */ nm_prerr("unsupported allocation for %d bytes", objsize); @@ -1524,11 +1532,13 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) { int i, lim = p->objtotal; struct netmap_lut *lut; - if (na == NULL || na->pdev == NULL) return 0; lut = &na->na_lut; + + + #if defined(__FreeBSD__) /* On FreeBSD mapping and unmapping is performed by the txsync * and rxsync routine, packet by packet. */ @@ -1542,7 +1552,7 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) nm_prerr("unsupported on Windows"); #else /* linux */ nm_prdis("unmapping and freeing plut for %s", na->name); - if (lut->plut == NULL) + if (lut->plut == NULL || na->pdev == NULL) return 0; for (i = 0; i < lim; i += p->_clustentries) { if (lut->plut[i].paddr) @@ -1634,6 +1644,7 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) goto error; nmd->nm_totalsize += nmd->pools[i].memtotal; } + nmd->nm_totalsize = (nmd->nm_totalsize + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); nmd->lasterr = netmap_mem_init_bitmaps(nmd); if (nmd->lasterr) goto error; @@ -1660,11 +1671,17 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) * allocator for private memory */ static void * -_netmap_mem_private_new(size_t size, struct netmap_obj_params *p, - struct netmap_mem_ops *ops, int *perr) +_netmap_mem_private_new(size_t size, struct netmap_obj_params *p, int grp_id, + struct netmap_mem_ops *ops, uint64_t memtotal, int *perr) { struct netmap_mem_d *d = NULL; int i, err = 0; + int checksz = 0; + + /* if memtotal is !=0 we check that the request fits the available + * memory. Moreover, any surprlus memory is assigned to buffers. + */ + checksz = (memtotal > 0); d = nm_os_malloc(size); if (d == NULL) { @@ -1675,7 +1692,7 @@ _netmap_mem_private_new(size_t size, struct netmap_obj_params *p, *d = nm_blueprint; d->ops = ops; - err = nm_mem_assign_id(d); + err = nm_mem_assign_id(d, grp_id); if (err) goto error_free; snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id); @@ -1684,9 +1701,31 @@ _netmap_mem_private_new(size_t size, struct netmap_obj_params *p, snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ, nm_blueprint.pools[i].name, d->name); + if (checksz) { + uint64_t poolsz = p[i].num * p[i].size; + if (memtotal < poolsz) { + nm_prerr("%s: request too large", d->pools[i].name); + err = ENOMEM; + goto error; + } + memtotal -= poolsz; + } d->params[i].num = p[i].num; d->params[i].size = p[i].size; } + if (checksz && memtotal > 0) { + uint64_t sz = d->params[NETMAP_BUF_POOL].size; + uint64_t n = (memtotal + sz - 1) / sz; + + if (n) { + if (netmap_verbose) { + nm_prinf("%s: adding %llu more buffers", + d->pools[NETMAP_BUF_POOL].name, + (unsigned long long)n); + } + d->params[NETMAP_BUF_POOL].num += n; + } + } NMA_LOCK_INIT(d); @@ -1762,11 +1801,65 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd, p[NETMAP_BUF_POOL].num, p[NETMAP_BUF_POOL].size); - d = _netmap_mem_private_new(sizeof(*d), p, &netmap_mem_global_ops, perr); + d = _netmap_mem_private_new(sizeof(*d), p, -1, &netmap_mem_global_ops, 0, perr); return d; } +/* Reference iommu allocator - find existing or create new, + * for not hw addapeters fallback to global allocator. + */ +struct netmap_mem_d * +netmap_mem_get_iommu(struct netmap_adapter *na) +{ + int i, err, grp_id; + struct netmap_mem_d *nmd; + + if (na == NULL || na->pdev == NULL) + return netmap_mem_get(&nm_mem); + + grp_id = nm_iommu_group_id(na->pdev); + + NM_MTX_LOCK(nm_mem_list_lock); + nmd = netmap_last_mem_d; + do { + if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_grp == grp_id) { + nmd->refcount++; + NM_DBG_REFC(nmd, __FUNCTION__, __LINE__); + NM_MTX_UNLOCK(nm_mem_list_lock); + return nmd; + } + nmd = nmd->next; + } while (nmd != netmap_last_mem_d); + + nmd = nm_os_malloc(sizeof(*nmd)); + if (nmd == NULL) + goto error; + + *nmd = nm_mem_blueprint; + + err = nm_mem_assign_id_locked(nmd, grp_id); + if (err) + goto error_free; + + snprintf(nmd->name, sizeof(nmd->name), "%d", nmd->nm_id); + + for (i = 0; i < NETMAP_POOLS_NR; i++) { + snprintf(nmd->pools[i].name, NETMAP_POOL_MAX_NAMSZ, "%s-%s", + nm_mem_blueprint.pools[i].name, nmd->name); + } + + NMA_LOCK_INIT(nmd); + + NM_MTX_UNLOCK(nm_mem_list_lock); + return nmd; + +error_free: + nm_os_free(nmd); +error: + NM_MTX_UNLOCK(nm_mem_list_lock); + return NULL; +} /* call with lock held */ static int @@ -1800,7 +1893,7 @@ netmap_mem2_config(struct netmap_mem_d *nmd) } static int -netmap_mem2_finalize(struct netmap_mem_d *nmd) +netmap_mem2_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) { if (nmd->flags & NETMAP_MEM_FINALIZED) goto out; @@ -1837,6 +1930,7 @@ NM_MTX_T nm_mem_ext_list_lock; int netmap_mem_init(void) { + nm_mem_blueprint = nm_mem; NM_MTX_INIT(nm_mem_list_lock); NMA_LOCK_INIT(&nm_mem); netmap_mem_get(&nm_mem); @@ -1852,37 +1946,23 @@ netmap_mem_fini(void) netmap_mem_put(&nm_mem); } -static void -netmap_free_rings(struct netmap_adapter *na) +static int +netmap_mem_ring_needed(struct netmap_kring *kring) { - enum txrx t; - - for_rx_tx(t) { - u_int i; - for (i = 0; i < netmap_all_rings(na, t); i++) { - struct netmap_kring *kring = NMR(na, t)[i]; - struct netmap_ring *ring = kring->ring; - - if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) { - if (netmap_debug & NM_DEBUG_MEM) - nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)", - kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); - continue; - } - if (netmap_debug & NM_DEBUG_MEM) - nm_prinf("deleting ring %s", kring->name); - if (!(kring->nr_kflags & NKR_FAKERING)) { - nm_prdis("freeing bufs for %s", kring->name); - netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); - } else { - nm_prdis("NOT freeing bufs for %s", kring->name); - } - netmap_ring_free(na->nm_mem, ring); - kring->ring = NULL; - } - } + return kring->ring == NULL && + (kring->users > 0 || + (kring->nr_kflags & NKR_NEEDRING)); } +static int +netmap_mem_ring_todelete(struct netmap_kring *kring) +{ + return kring->ring != NULL && + kring->users == 0 && + !(kring->nr_kflags & NKR_NEEDRING); +} + + /* call with NMA_LOCK held * * * Allocate netmap rings and buffers for this card @@ -1891,7 +1971,7 @@ netmap_free_rings(struct netmap_adapter *na) * in netmap_krings_create(). */ static int -netmap_mem2_rings_create(struct netmap_adapter *na) +netmap_mem2_rings_create(struct netmap_mem_d *nmd, struct netmap_adapter *na) { enum txrx t; @@ -1903,7 +1983,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na) struct netmap_ring *ring = kring->ring; u_int len, ndesc; - if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) { + if (!netmap_mem_ring_needed(kring)) { /* uneeded, or already created by somebody else */ if (netmap_debug & NM_DEBUG_MEM) nm_prinf("NOT creating ring %s (ring %p, users %d neekring %d)", @@ -1915,7 +1995,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na) ndesc = kring->nkr_num_slots; len = sizeof(struct netmap_ring) + ndesc * sizeof(struct netmap_slot); - ring = netmap_ring_malloc(na->nm_mem, len); + ring = netmap_ring_malloc(nmd, len); if (ring == NULL) { nm_prerr("Cannot allocate %s_ring", nm_txrx2str(t)); goto cleanup; @@ -1924,16 +2004,16 @@ netmap_mem2_rings_create(struct netmap_adapter *na) kring->ring = ring; *(uint32_t *)(uintptr_t)&ring->num_slots = ndesc; *(int64_t *)(uintptr_t)&ring->buf_ofs = - (na->nm_mem->pools[NETMAP_IF_POOL].memtotal + - na->nm_mem->pools[NETMAP_RING_POOL].memtotal) - - netmap_ring_offset(na->nm_mem, ring); + (nmd->pools[NETMAP_IF_POOL].memtotal + + nmd->pools[NETMAP_RING_POOL].memtotal) - + netmap_ring_offset(nmd, ring); /* copy values from kring */ ring->head = kring->rhead; ring->cur = kring->rcur; ring->tail = kring->rtail; *(uint32_t *)(uintptr_t)&ring->nr_buf_size = - netmap_mem_bufsize(na->nm_mem); + netmap_mem_bufsize(nmd); nm_prdis("%s h %d c %d t %d", kring->name, ring->head, ring->cur, ring->tail); nm_prdis("initializing slots for %s_ring", nm_txrx2str(t)); @@ -1941,7 +2021,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na) /* this is a real ring */ if (netmap_debug & NM_DEBUG_MEM) nm_prinf("allocating buffers for %s", kring->name); - if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { + if (netmap_new_bufs(nmd, ring->slot, ndesc)) { nm_prerr("Cannot allocate buffers for %s_ring", nm_txrx2str(t)); goto cleanup; } @@ -1949,7 +2029,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na) /* this is a fake ring, set all indices to 0 */ if (netmap_debug & NM_DEBUG_MEM) nm_prinf("NOT allocating buffers for %s", kring->name); - netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0); + netmap_mem_set_ring(nmd, ring->slot, ndesc, 0); } /* ring info */ *(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id; @@ -1970,12 +2050,35 @@ netmap_mem2_rings_create(struct netmap_adapter *na) } static void -netmap_mem2_rings_delete(struct netmap_adapter *na) +netmap_mem2_rings_delete(struct netmap_mem_d *nmd, struct netmap_adapter *na) { - /* last instance, release bufs and rings */ - netmap_free_rings(na); -} + enum txrx t; + for_rx_tx(t) { + u_int i; + for (i = 0; i < netmap_all_rings(na, t); i++) { + struct netmap_kring *kring = NMR(na, t)[i]; + struct netmap_ring *ring = kring->ring; + + if (!netmap_mem_ring_todelete(kring)) { + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)", + kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); + continue; + } + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("deleting ring %s", kring->name); + if (!(kring->nr_kflags & NKR_FAKERING)) { + nm_prdis("freeing bufs for %s", kring->name); + netmap_free_bufs(nmd, ring->slot, kring->nkr_num_slots); + } else { + nm_prdis("NOT freeing bufs for %s", kring->name); + } + netmap_ring_free(nmd, ring); + kring->ring = NULL; + } + } +} /* call with NMA_LOCK held */ /* @@ -1986,7 +2089,8 @@ netmap_mem2_rings_delete(struct netmap_adapter *na) * the interface is in netmap mode. */ static struct netmap_if * -netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) +netmap_mem2_if_new(struct netmap_mem_d *nmd, + struct netmap_adapter *na, struct netmap_priv_d *priv) { struct netmap_if *nifp; ssize_t base; /* handy for relative offsets between rings and nifp */ @@ -2005,7 +2109,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) */ len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t)); - nifp = netmap_if_malloc(na->nm_mem, len); + nifp = netmap_if_malloc(nmd, len); if (nifp == NULL) { return NULL; } @@ -2024,7 +2128,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) * between the ring and nifp, so the information is usable in * userspace to reach the ring from the nifp. */ - base = netmap_if_offset(na->nm_mem, nifp); + base = netmap_if_offset(nmd, nifp); for (i = 0; i < n[NR_TX]; i++) { /* XXX instead of ofs == 0 maybe use the offset of an error * ring, like we do for buffers? */ @@ -2032,7 +2136,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) if (na->tx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_TX] && i < priv->np_qlast[NR_TX]) { - ofs = netmap_ring_offset(na->nm_mem, + ofs = netmap_ring_offset(nmd, na->tx_rings[i]->ring) - base; } *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs; @@ -2044,7 +2148,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) if (na->rx_rings[i]->ring != NULL && i >= priv->np_qfirst[NR_RX] && i < priv->np_qlast[NR_RX]) { - ofs = netmap_ring_offset(na->nm_mem, + ofs = netmap_ring_offset(nmd, na->rx_rings[i]->ring) - base; } *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs; @@ -2054,18 +2158,19 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) } static void -netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) +netmap_mem2_if_delete(struct netmap_mem_d *nmd, + struct netmap_adapter *na, struct netmap_if *nifp) { if (nifp == NULL) /* nothing to do */ return; if (nifp->ni_bufs_head) netmap_extra_free(na, nifp->ni_bufs_head); - netmap_if_free(na->nm_mem, nifp); + netmap_if_free(nmd, nifp); } static void -netmap_mem2_deref(struct netmap_mem_d *nmd) +netmap_mem2_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) { if (netmap_debug & NM_DEBUG_MEM) @@ -2257,11 +2362,14 @@ netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror) nm_prinf("not found, creating new"); nme = _netmap_mem_private_new(sizeof(*nme), + (struct netmap_obj_params[]){ { pi->nr_if_pool_objsize, pi->nr_if_pool_objtotal }, { pi->nr_ring_pool_objsize, pi->nr_ring_pool_objtotal }, { pi->nr_buf_pool_objsize, pi->nr_buf_pool_objtotal }}, + -1, &netmap_mem_ext_ops, + pi->nr_memsize, &error); if (nme == NULL) goto out_unmap; @@ -2517,7 +2625,7 @@ netmap_mem_pt_guest_config(struct netmap_mem_d *nmd) } static int -netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) +netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; uint64_t mem_size; @@ -2590,7 +2698,7 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) } static void -netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd) +netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; @@ -2630,13 +2738,14 @@ netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd) } static struct netmap_if * -netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv) +netmap_mem_pt_guest_if_new(struct netmap_mem_d *nmd, + struct netmap_adapter *na, struct netmap_priv_d *priv) { - struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem; + struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; struct mem_pt_if *ptif; struct netmap_if *nifp = NULL; - ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); + ptif = netmap_mem_pt_guest_ifp_lookup(nmd, na->ifp); if (ptif == NULL) { nm_prerr("interface %s is not in passthrough", na->name); goto out; @@ -2649,25 +2758,27 @@ netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv } static void -netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) +netmap_mem_pt_guest_if_delete(struct netmap_mem_d * nmd, + struct netmap_adapter *na, struct netmap_if *nifp) { struct mem_pt_if *ptif; - ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); + ptif = netmap_mem_pt_guest_ifp_lookup(nmd, na->ifp); if (ptif == NULL) { nm_prerr("interface %s is not in passthrough", na->name); } } static int -netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) +netmap_mem_pt_guest_rings_create(struct netmap_mem_d *nmd, + struct netmap_adapter *na) { - struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem; + struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; struct mem_pt_if *ptif; struct netmap_if *nifp; int i, error = -1; - ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); + ptif = netmap_mem_pt_guest_ifp_lookup(nmd, na->ifp); if (ptif == NULL) { nm_prerr("interface %s is not in passthrough", na->name); goto out; @@ -2698,7 +2809,7 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) } static void -netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na) +netmap_mem_pt_guest_rings_delete(struct netmap_mem_d *nmd, struct netmap_adapter *na) { #if 0 enum txrx t; @@ -2712,6 +2823,8 @@ netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na) } } #endif + (void)nmd; + (void)na; } static struct netmap_mem_ops netmap_mem_pt_guest_ops = { @@ -2769,7 +2882,7 @@ netmap_mem_pt_guest_create(nm_memid_t mem_id) ptnmd->pt_ifs = NULL; /* Assign new id in the guest (We have the lock) */ - err = nm_mem_assign_id_locked(&ptnmd->up); + err = nm_mem_assign_id_locked(&ptnmd->up, -1); if (err) goto error; diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index 4f2075507651..c0e039b42128 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -147,6 +147,7 @@ struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_ #define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__) #define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__) struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int); +struct netmap_mem_d* netmap_mem_get_iommu(struct netmap_adapter *); void __netmap_mem_put(struct netmap_mem_d *, const char *, int); struct netmap_mem_d* netmap_mem_find(nm_memid_t); unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd); @@ -172,7 +173,6 @@ int netmap_mem_pools_info_get(struct nmreq_pools_info *, #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */ #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */ -#define NETMAP_MEM_EXT 0x10 /* external memory (not remappable) */ uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n); diff --git a/sys/dev/netmap/netmap_null.c b/sys/dev/netmap/netmap_null.c index e880304e7379..c91afdb55e1d 100644 --- a/sys/dev/netmap/netmap_null.c +++ b/sys/dev/netmap/netmap_null.c @@ -151,6 +151,7 @@ netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na, nna->up.num_rx_rings = req->nr_rx_rings; nna->up.num_tx_desc = req->nr_tx_slots; nna->up.num_rx_desc = req->nr_rx_slots; + nna->up.na_flags = NAF_OFFSETS; error = netmap_attach_common(&nna->up); if (error) goto free_nna; diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c index 09261ca4c273..01fd79dedc26 100644 --- a/sys/dev/netmap/netmap_pipe.c +++ b/sys/dev/netmap/netmap_pipe.c @@ -211,8 +211,12 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) m--, k = nm_next(k, lim), nk = (complete ? k : nk)) { struct netmap_slot *rs = &rxring->slot[k]; struct netmap_slot *ts = &txring->slot[k]; + uint64_t off = nm_get_offset(rxkring, rs); *rs = *ts; + if (nm_get_offset(rxkring, rs) < off) { + nm_write_offset(rxkring, rs, off); + } if (ts->flags & NS_BUF_CHANGED) { ts->flags &= ~NS_BUF_CHANGED; } @@ -263,9 +267,9 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags) struct netmap_slot *rs = &rxring->slot[k]; struct netmap_slot *ts = &txring->slot[k]; + /* copy the slot. This also propagates any offset */ + *ts = *rs; if (rs->flags & NS_BUF_CHANGED) { - /* copy the slot and report the buffer change */ - *ts = *rs; rs->flags &= ~NS_BUF_CHANGED; } } @@ -414,7 +418,6 @@ netmap_pipe_reg_both(struct netmap_adapter *na, struct netmap_adapter *ona) for (i = 0; i < nma_get_nrings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { - struct netmap_kring *sring, *dring; kring->nr_mode = NKR_NETMAP_ON; if ((kring->nr_kflags & NKR_FAKERING) && @@ -426,27 +429,25 @@ netmap_pipe_reg_both(struct netmap_adapter *na, struct netmap_adapter *ona) continue; } - /* copy the buffers from the non-fake ring */ - if (kring->nr_kflags & NKR_FAKERING) { - sring = kring->pipe; - dring = kring; - } else { - sring = kring; - dring = kring->pipe; - } - memcpy(dring->ring->slot, - sring->ring->slot, + /* copy the buffers from the non-fake ring + * (this also propagates any initial offset) + */ + memcpy(kring->pipe->ring->slot, + kring->ring->slot, sizeof(struct netmap_slot) * - sring->nkr_num_slots); + kring->nkr_num_slots); + /* copy the offset-related fields */ + *(uint64_t *)(uintptr_t)&kring->pipe->ring->offset_mask = + kring->ring->offset_mask; + *(uint64_t *)(uintptr_t)&kring->pipe->ring->buf_align = + kring->ring->buf_align; /* mark both rings as fake and needed, * so that buffers will not be * deleted by the standard machinery * (we will delete them by ourselves in * netmap_pipe_krings_delete) */ - sring->nr_kflags |= - (NKR_FAKERING | NKR_NEEDRING); - dring->nr_kflags |= + kring->nr_kflags |= (NKR_FAKERING | NKR_NEEDRING); kring->nr_mode = NKR_NETMAP_ON; } @@ -660,7 +661,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, const char *pipe_id = NULL; int role = 0; int error, retries = 0; - char *cbra; + char *cbra, pipe_char; /* Try to parse the pipe syntax 'xx{yy' or 'xx}yy'. */ cbra = strrchr(hdr->nr_name, '{'); @@ -675,6 +676,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, return 0; } } + pipe_char = *cbra; pipe_id = cbra + 1; if (*pipe_id == '\0' || cbra == hdr->nr_name) { /* Bracket is the last character, so pipe name is missing; @@ -690,15 +692,13 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, /* first, try to find the parent adapter */ for (;;) { - char nr_name_orig[NETMAP_REQ_IFNAMSIZ]; int create_error; /* Temporarily remove the pipe suffix. */ - strlcpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig)); *cbra = '\0'; error = netmap_get_na(hdr, &pna, &ifp, nmd, create); /* Restore the pipe suffix. */ - strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); + *cbra = pipe_char; if (!error) break; if (error != ENXIO || retries++) { @@ -711,7 +711,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, NMG_UNLOCK(); create_error = netmap_vi_create(hdr, 1 /* autodelete */); NMG_LOCK(); - strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); + *cbra = pipe_char; if (create_error && create_error != EEXIST) { if (create_error != EOPNOTSUPP) { nm_prerr("failed to create a persistent vale port: %d", @@ -771,7 +771,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na, mna->up.nm_krings_create = netmap_pipe_krings_create; mna->up.nm_krings_delete = netmap_pipe_krings_delete; mna->up.nm_mem = netmap_mem_get(pna->nm_mem); - mna->up.na_flags |= NAF_MEM_OWNER; + mna->up.na_flags |= NAF_MEM_OWNER | NAF_OFFSETS; mna->up.na_lut = pna->na_lut; mna->up.num_tx_rings = req->nr_tx_rings; diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index d32fc246bf57..db3321a4ff83 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -99,7 +99,7 @@ __FBSDID("$FreeBSD$"); * In the tx loop, we aggregate traffic in batches to make all operations * faster. The batch size is bridge_batch. */ -#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ +#define NM_BDG_MAXRINGS 16 /* XXX unclear how many (must be a pow of 2). */ #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ @@ -154,8 +154,9 @@ struct netmap_bdg_ops vale_bdg_ops = { * with other odd sizes. We assume there is enough room * in the source and destination buffers. * - * XXX only for multiples of 64 bytes, non overlapped. + * XXX only for multiples of NM_BUF_ALIGN bytes, non overlapped. */ + static inline void pkt_copy(void *_src, void *_dst, int l) { @@ -165,7 +166,8 @@ pkt_copy(void *_src, void *_dst, int l) memcpy(dst, src, l); return; } - for (; likely(l > 0); l-=64) { + for (; likely(l > 0); l -= NM_BUF_ALIGN) { + /* XXX NM_BUF_ALIGN/sizeof(uint64_t) statements */ *dst++ = *src++; *dst++ = *src++; *dst++ = *src++; @@ -387,144 +389,6 @@ netmap_vale_list(struct nmreq_header *hdr) return error; } -/* Process NETMAP_REQ_VALE_ATTACH. - */ -int -netmap_vale_attach(struct nmreq_header *hdr, void *auth_token) -{ - struct nmreq_vale_attach *req = - (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; - struct netmap_vp_adapter * vpna; - struct netmap_adapter *na = NULL; - struct netmap_mem_d *nmd = NULL; - struct nm_bridge *b = NULL; - int error; - - NMG_LOCK(); - /* permission check for modified bridges */ - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); - if (b && !nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_exit; - } - - if (req->reg.nr_mem_id) { - nmd = netmap_mem_find(req->reg.nr_mem_id); - if (nmd == NULL) { - error = EINVAL; - goto unlock_exit; - } - } - - /* check for existing one */ - error = netmap_get_vale_na(hdr, &na, nmd, 0); - if (na) { - error = EBUSY; - goto unref_exit; - } - error = netmap_get_vale_na(hdr, &na, - nmd, 1 /* create if not exists */); - if (error) { /* no device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } - - if (NETMAP_OWNED_BY_ANY(na)) { - error = EBUSY; - goto unref_exit; - } - - if (na->nm_bdg_ctl) { - /* nop for VALE ports. The bwrap needs to put the hwna - * in netmap mode (see netmap_bwrap_bdg_ctl) - */ - error = na->nm_bdg_ctl(hdr, na); - if (error) - goto unref_exit; - nm_prdis("registered %s to netmap-mode", na->name); - } - vpna = (struct netmap_vp_adapter *)na; - req->port_index = vpna->bdg_port; - - if (nmd) - netmap_mem_put(nmd); - - NMG_UNLOCK(); - return 0; - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - if (nmd) - netmap_mem_put(nmd); - - NMG_UNLOCK(); - return error; -} - -/* Process NETMAP_REQ_VALE_DETACH. - */ -int -netmap_vale_detach(struct nmreq_header *hdr, void *auth_token) -{ - struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; - struct netmap_vp_adapter *vpna; - struct netmap_adapter *na; - struct nm_bridge *b = NULL; - int error; - - NMG_LOCK(); - /* permission check for modified bridges */ - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); - if (b && !nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_exit; - } - - error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); - if (error) { /* no device, or another bridge or user owns the device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } else if (nm_is_bwrap(na) && - ((struct netmap_bwrap_adapter *)na)->na_polling_state) { - /* Don't detach a NIC with polling */ - error = EBUSY; - goto unref_exit; - } - - vpna = (struct netmap_vp_adapter *)na; - if (na->na_vp != vpna) { - /* trying to detach first attach of VALE persistent port attached - * to 2 bridges - */ - error = EBUSY; - goto unref_exit; - } - nmreq_det->port_index = vpna->bdg_port; - - if (na->nm_bdg_ctl) { - /* remove the port from bridge. The bwrap - * also needs to put the hwna in normal mode - */ - error = na->nm_bdg_ctl(hdr, na); - } - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - return error; - -} - /* nm_dtor callback for ephemeral VALE ports */ static void @@ -651,8 +515,9 @@ nm_vale_preflush(struct netmap_kring *kring, u_int end) /* this slot goes into a list so initialize the link field */ ft[ft_i].ft_next = NM_FT_NULL; buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? - (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); - if (unlikely(buf == NULL)) { + (void *)(uintptr_t)slot->ptr : NMB_O(kring, slot); + if (unlikely(buf == NULL || + slot->len > NETMAP_BUF_SIZE(&na->up) - nm_get_offset(kring, slot))) { nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d", (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", kring->name, j, ft[ft_i].ft_len); @@ -939,9 +804,6 @@ nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, /* * Broadcast traffic goes to ring 0 on all destinations. * So we need to add these rings to the list of ports to scan. - * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is - * expensive. We should keep a compact list of active destinations - * so we could shorten this loop. */ brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS; if (brddst->bq_head != NM_FT_NULL) { @@ -998,7 +860,7 @@ nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, next = d->bq_head; /* we need to reserve this many slots. If fewer are * available, some packets will be dropped. - * Packets may have multiple fragments, so we may not use + * Packets may have multiple fragments, so * there is a chance that we may not use all of the slots * we have claimed, so we will need to handle the leftover * ones when we regain the lock. @@ -1108,21 +970,36 @@ nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, do { char *dst, *src = ft_p->ft_buf; size_t copy_len = ft_p->ft_len, dst_len = copy_len; + uintptr_t src_cb; + uint64_t dstoff, dstoff_cb; + int src_co, dst_co; + const uintptr_t mask = NM_BUF_ALIGN - 1; slot = &ring->slot[j]; dst = NMB(&dst_na->up, slot); + dstoff = nm_get_offset(kring, slot); + dstoff_cb = dstoff & ~mask; + src_cb = ((uintptr_t)src) & ~mask; + src_co = ((uintptr_t)src) & mask; + dst_co = ((uintptr_t)(dst + dstoff)) & mask; + if (dst_co < src_co) { + dstoff_cb += NM_BUF_ALIGN; + } + dstoff = dstoff_cb + src_co; + copy_len += src_co; nm_prdis("send [%d] %d(%d) bytes at %s:%d", i, (int)copy_len, (int)dst_len, - dst_na->up.name, j); - /* round to a multiple of 64 */ - copy_len = (copy_len + 63) & ~63; + NM_IFPNAME(dst_ifp), j); - if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) || - copy_len > NETMAP_BUF_SIZE(&na->up))) { - nm_prlim(5, "invalid len %d, down to 64", (int)copy_len); - copy_len = dst_len = 64; // XXX + if (unlikely(dstoff > NETMAP_BUF_SIZE(&dst_na->up) || + dst_len > NETMAP_BUF_SIZE(&dst_na->up) - dstoff)) { + nm_prlim(5, "dropping packet/fragment of len %zu, dest offset %llu", + dst_len, (unsigned long long)dstoff); + copy_len = dst_len = 0; + dstoff = nm_get_offset(kring, slot); } + if (ft_p->ft_flags & NS_INDIRECT) { if (copyin(src, dst, copy_len)) { // invalid user pointer, pretend len is 0 @@ -1130,10 +1007,11 @@ nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, } } else { //memcpy(dst, src, copy_len); - pkt_copy(src, dst, (int)copy_len); + pkt_copy((char *)src_cb, dst + dstoff_cb, (int)copy_len); } slot->len = dst_len; slot->flags = (cnt << 8)| NS_MOREFRAG; + nm_write_offset(kring, slot, dstoff); j = nm_next(j, lim); needed--; ft_p++; @@ -1312,7 +1190,7 @@ netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp, if (netmap_verbose) nm_prinf("max frame size %u", vpna->mfs); - na->na_flags |= NAF_BDG_MAYSLEEP; + na->na_flags |= (NAF_BDG_MAYSLEEP | NAF_OFFSETS); /* persistent VALE ports look like hw devices * with a native netmap adapter */ @@ -1409,6 +1287,7 @@ netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) na->nm_krings_create = netmap_vale_bwrap_krings_create; na->nm_krings_delete = netmap_vale_bwrap_krings_delete; na->nm_notify = netmap_bwrap_notify; + bna->nm_intr_notify = netmap_bwrap_intr_notify; bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ /* Set the mfs, needed on the VALE mismatch datapath. */ bna->up.mfs = NM_BDG_MFS_DEFAULT; diff --git a/sys/net/netmap.h b/sys/net/netmap.h index c040683651d0..7da40d6869f1 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -235,6 +235,7 @@ struct netmap_slot { #define NETMAP_MAX_FRAGS 64 /* max number of fragments */ + /* * struct netmap_ring * @@ -296,6 +297,19 @@ struct netmap_ring { struct timeval ts; /* (k) time of last *sync() */ + /* offset_mask is used to isolate the part of the ptr field + * in the slots used to contain an offset in the buffer. + * It is zero if the ring has not be opened using the + * NETMAP_REQ_OPT_OFFSETS option. + */ + const uint64_t offset_mask; + /* the alignment requirement, in bytes, for the start + * of the packets inside the buffers. + * User programs should take this alignment into + * account when specifing buffer-offsets in TX slots. + */ + const uint64_t buf_align; + /* opaque room for a mutex or similar object */ #if !defined(_WIN32) || defined(__CYGWIN__) uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; @@ -307,6 +321,7 @@ struct netmap_ring { struct netmap_slot slot[0]; /* array of slots. */ }; + /* * RING FLAGS */ @@ -561,6 +576,12 @@ enum { */ NETMAP_REQ_OPT_SYNC_KLOOP_MODE, + /* On NETMAP_REQ_REGISTER, ask for (part of) the ptr field in the + * slots of the registered rings to be used as an offset field + * for the start of the packets inside the netmap buffer. + */ + NETMAP_REQ_OPT_OFFSETS, + /* This is a marker to count the number of available options. * New options must be added above it. */ NETMAP_REQ_OPT_MAX, @@ -811,7 +832,16 @@ static inline void nm_ldld_barrier(void) #define nm_ldld_barrier atomic_thread_fence_acq #define nm_stld_barrier atomic_thread_fence_seq_cst #else /* !_KERNEL */ + +#ifdef __cplusplus +#include +using std::memory_order_release; +using std::memory_order_acquire; + +#else /* __cplusplus */ #include +#endif /* __cplusplus */ + static inline void nm_stst_barrier(void) { atomic_thread_fence(memory_order_release); @@ -933,4 +963,29 @@ struct nmreq_opt_csb { uint64_t csb_ktoa; }; +/* option NETMAP_REQ_OPT_OFFSETS */ +struct nmreq_opt_offsets { + struct nmreq_option nro_opt; + /* the user must declare the maximum offset value that she is + * going to put into the offset slot-fields. Any larger value + * found at runtime will be cropped. On output the (possibly + * higher) effective max value is returned. + */ + uint64_t nro_max_offset; + /* optional initial offset value, to be set in all slots. */ + uint64_t nro_initial_offset; + /* number of bits in the lower part of the 'ptr' field to be + * used as the offset field. On output the (possibily larger) + * effective number of bits is returned. + * 0 means: use the whole ptr field. + */ + uint32_t nro_offset_bits; + /* required alignment for the beginning of the packets + * (base of the buffer plus offset) in the TX slots. + */ + uint32_t nro_tx_align; + /* Reserved: set to zero. */ + uint64_t nro_min_gap; +}; + #endif /* _NET_NETMAP_H_ */ diff --git a/sys/net/netmap_legacy.h b/sys/net/netmap_legacy.h index ba0ffa980285..115a09e5fbaf 100644 --- a/sys/net/netmap_legacy.h +++ b/sys/net/netmap_legacy.h @@ -144,6 +144,7 @@ * */ + /* * struct nmreq overlays a struct ifreq (just the name) */ @@ -215,6 +216,7 @@ struct nmreq { #define NETMAP_SETSOCKOPT _IO('i', 140) #define NETMAP_GETSOCKOPT _IO('i', 141) + /* These linknames are for the Netmap Core Driver */ #define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" #define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h index de9b1ed3077e..eb1a7057972d 100644 --- a/sys/net/netmap_user.h +++ b/sys/net/netmap_user.h @@ -123,12 +123,29 @@ ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ (ring)->nr_buf_size ) +/* read the offset field in a ring's slot */ +#define NETMAP_ROFFSET(ring, slot) \ + ((slot)->ptr & (ring)->offset_mask) + +/* update the offset field in a ring's slot */ +#define NETMAP_WOFFSET(ring, slot, offset) \ + do { (slot)->ptr = ((slot)->ptr & ~(ring)->offset_mask) | \ + ((offset) & (ring)->offset_mask); } while (0) + +/* obtain the start of the buffer pointed to by a ring's slot, taking the + * offset field into accout + */ +#define NETMAP_BUF_OFFSET(ring, slot) \ + (NETMAP_BUF(ring, (slot)->buf_idx) + NETMAP_ROFFSET(ring, slot)) + + static inline uint32_t nm_ring_next(struct netmap_ring *r, uint32_t i) { return ( unlikely(i + 1 == r->num_slots) ? 0 : i + 1); } + /* * Return 1 if we have pending transmissions in the tx ring. * When everything is complete ring->head = ring->tail + 1 (modulo ring size) @@ -350,6 +367,7 @@ enum { NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ }; + /* * nm_close() closes and restores the port to its previous state */ @@ -430,6 +448,7 @@ win_remove_fd_record(int fd) } } + HANDLE win_get_netmap_handle(int fd) { @@ -916,6 +935,7 @@ nm_open(const char *ifname, const struct nmreq *req, goto fail; } + #ifdef DEBUG_NETMAP_USER { /* debugging code */ int i; @@ -947,6 +967,7 @@ nm_open(const char *ifname, const struct nmreq *req, return NULL; } + static int nm_close(struct nm_desc *d) { @@ -1059,6 +1080,7 @@ nm_inject(struct nm_desc *d, const void *buf, size_t size) return 0; /* fail */ } + /* * Same prototype as pcap_dispatch(), only need to cast. */ @@ -1108,7 +1130,7 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) slot = &ring->slot[i]; d->hdr.len += slot->len; nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); - if (oldbuf != NULL && nbuf - oldbuf == (int)ring->nr_buf_size && + if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && oldlen == ring->nr_buf_size) { d->hdr.caplen += slot->len; oldbuf = nbuf; diff --git a/sys/net/netmap_virt.h b/sys/net/netmap_virt.h index 12c2565a734a..07e551aff009 100644 --- a/sys/net/netmap_virt.h +++ b/sys/net/netmap_virt.h @@ -44,8 +44,8 @@ /* PCI identifiers and PCI BARs for ptnetmap-memdev and ptnet. */ #define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev" #define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */ -#define PTNETMAP_PCI_DEVICE_ID 0xcccc /* memory device */ -#define PTNETMAP_PCI_NETIF_ID 0xcccd /* ptnet network interface */ +#define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */ +#define PTNETMAP_PCI_NETIF_ID 0x000d /* ptnet network interface */ #define PTNETMAP_IO_PCI_BAR 0 #define PTNETMAP_MEM_PCI_BAR 1 #define PTNETMAP_MSIX_PCI_BAR 2