This is an import of code, mostly from Giuseppe Lettieri,

that revises the netmap memory allocator so that the
various parameters (number and size of buffers, rings, descriptors)
can be modified at runtime through sysctl variables.
The changes become effective when no netmap clients are active.

The API is mostly unchanged, although the NIOCUNREGIF ioctl now
does not bring the interface back to normal mode: and you
need to close the file descriptor for that.
This change was necessary to track who is using the mapped region,
and since it is a simplification of the API there was no
incentive in trying to preserve NIOCUNREGIF.
We will remove the ioctl from the kernel next time we need
a real API change (and version bump).

Among other things, buffer allocation when opening devices is
now much faster: it used to take O(N^2) time, now it is linear.

Submitted by:	Giuseppe Lettieri
This commit is contained in:
Luigi Rizzo 2012-10-19 04:13:12 +00:00
parent 48f219c0da
commit 8241616dc5
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=241719
3 changed files with 799 additions and 309 deletions

View File

@ -98,15 +98,8 @@ MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
/*
* lock and unlock for the netmap memory allocator
*/
#define NMA_LOCK() mtx_lock(&nm_mem->nm_mtx);
#define NMA_UNLOCK() mtx_unlock(&nm_mem->nm_mtx);
struct netmap_mem_d;
static struct netmap_mem_d *nm_mem; /* Our memory allocator. */
u_int netmap_total_buffers;
u_int netmap_buf_size;
char *netmap_buffer_base; /* address of an invalid buffer */
/* user-controlled variables */
@ -119,10 +112,6 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
u_int netmap_buf_size = 2048;
TUNABLE_INT("hw.netmap.buf_size", (u_int *)&netmap_buf_size);
SYSCTL_INT(_dev_netmap, OID_AUTO, buf_size,
CTLFLAG_RD, &netmap_buf_size, 0, "Size of packet buffers");
int netmap_mitigate = 1;
SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
int netmap_no_pendintr = 1;
@ -294,23 +283,62 @@ nm_find_bridge(const char *name)
#endif /* !NETMAP_MEM2 */
/*------------ end of memory allocator ----------*/
/* Structure associated to each thread which registered an interface. */
/* Structure associated to each thread which registered an interface.
*
* The first 4 fields of this structure are written by NIOCREGIF and
* read by poll() and NIOC?XSYNC.
* There is low contention among writers (actually, a correct user program
* should have no contention among writers) and among writers and readers,
* so we use a single global lock to protect the structure initialization.
* Since initialization involves the allocation of memory, we reuse the memory
* allocator lock.
* Read access to the structure is lock free. Readers must check that
* np_nifp is not NULL before using the other fields.
* If np_nifp is NULL initialization has not been performed, so they should
* return an error to userlevel.
*
* The ref_done field is used to regulate access to the refcount in the
* memory allocator. The refcount must be incremented at most once for
* each open("/dev/netmap"). The increment is performed by the first
* function that calls netmap_get_memory() (currently called by
* mmap(), NIOCGINFO and NIOCREGIF).
* If the refcount is incremented, it is then decremented when the
* private structure is destroyed.
*/
struct netmap_priv_d {
struct netmap_if *np_nifp; /* netmap interface descriptor. */
struct netmap_if * volatile np_nifp; /* netmap interface descriptor. */
struct ifnet *np_ifp; /* device for which we hold a reference */
int np_ringid; /* from the ioctl */
u_int np_qfirst, np_qlast; /* range of rings to scan */
uint16_t np_txpoll;
unsigned long ref_done; /* use with NMA_LOCK held */
};
static int
netmap_get_memory(struct netmap_priv_d* p)
{
int error = 0;
NMA_LOCK();
if (!p->ref_done) {
error = netmap_memory_finalize();
if (!error)
p->ref_done = 1;
}
NMA_UNLOCK();
return error;
}
/*
* File descriptor's private data destructor.
*
* Call nm_register(ifp,0) to stop netmap mode on the interface and
* revert to normal operation. We expect that np_ifp has not gone.
*/
/* call with NMA_LOCK held */
static void
netmap_dtor_locked(void *data)
{
@ -350,7 +378,6 @@ netmap_dtor_locked(void *data)
selwakeuppri(&na->tx_si, PI_NET);
selwakeuppri(&na->rx_si, PI_NET);
/* release all buffers */
NMA_LOCK();
for (i = 0; i < na->num_tx_rings + 1; i++) {
struct netmap_ring *ring = na->tx_rings[i].ring;
lim = na->tx_rings[i].nkr_num_slots;
@ -370,7 +397,6 @@ netmap_dtor_locked(void *data)
/* XXX kqueue(9) needed; these will mirror knlist_init. */
/* knlist_destroy(&na->tx_si.si_note); */
/* knlist_destroy(&na->rx_si.si_note); */
NMA_UNLOCK();
netmap_free_rings(na);
wakeup(na);
}
@ -403,7 +429,7 @@ nm_if_rele(struct ifnet *ifp)
bzero(ifp, sizeof(*ifp));
free(ifp, M_DEVBUF);
break;
}
}
else if (b->bdg_ports[i] != NULL)
full = 1;
}
@ -423,17 +449,83 @@ netmap_dtor(void *data)
{
struct netmap_priv_d *priv = data;
struct ifnet *ifp = priv->np_ifp;
struct netmap_adapter *na = NA(ifp);
struct netmap_adapter *na;
na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
netmap_dtor_locked(data);
na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
NMA_LOCK();
if (ifp) {
na = NA(ifp);
na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
netmap_dtor_locked(data);
na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
nm_if_rele(ifp);
nm_if_rele(ifp);
}
if (priv->ref_done) {
netmap_memory_deref();
}
NMA_UNLOCK();
bzero(priv, sizeof(*priv)); /* XXX for safety */
free(priv, M_DEVBUF);
}
#ifdef __FreeBSD__
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/uma.h>
static struct cdev_pager_ops saved_cdev_pager_ops;
static int
netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_ooffset_t foff, struct ucred *cred, u_short *color)
{
D("first mmap for %p", handle);
return saved_cdev_pager_ops.cdev_pg_ctor(handle,
size, prot, foff, cred, color);
}
static void
netmap_dev_pager_dtor(void *handle)
{
saved_cdev_pager_ops.cdev_pg_dtor(handle);
D("ready to release memory for %p", handle);
}
static struct cdev_pager_ops netmap_cdev_pager_ops = {
.cdev_pg_ctor = netmap_dev_pager_ctor,
.cdev_pg_dtor = netmap_dev_pager_dtor,
.cdev_pg_fault = NULL,
};
static int
netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
vm_size_t objsize, vm_object_t *objp, int prot)
{
vm_object_t obj;
D("cdev %p foff %d size %d objp %p prot %d", cdev, *foff,
objsize, objp, prot);
obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
curthread->td_ucred);
ND("returns obj %p", obj);
if (obj == NULL)
return EINVAL;
if (saved_cdev_pager_ops.cdev_pg_fault == NULL) {
D("initialize cdev_pager_ops");
saved_cdev_pager_ops = *(obj->un_pager.devp.ops);
netmap_cdev_pager_ops.cdev_pg_fault =
saved_cdev_pager_ops.cdev_pg_fault;
};
obj->un_pager.devp.ops = &netmap_cdev_pager_ops;
*objp = obj;
return 0;
}
#endif /* __FreeBSD__ */
/*
* mmap(2) support for the "netmap" device.
@ -456,13 +548,50 @@ netmap_mmap(__unused struct cdev *dev,
#endif
)
{
int error = 0;
struct netmap_priv_d *priv;
if (nprot & PROT_EXEC)
return (-1); // XXX -1 or EINVAL ?
error = devfs_get_cdevpriv((void **)&priv);
if (error == EBADF) { /* called on fault, memory is initialized */
ND(5, "handling fault at ofs 0x%x", offset);
error = 0;
} else if (error == 0) /* make sure memory is set */
error = netmap_get_memory(priv);
if (error)
return (error);
ND("request for offset 0x%x", (uint32_t)offset);
*paddr = netmap_ofstophys(offset);
return (0);
return (*paddr ? 0 : ENOMEM);
}
static int
netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
D("dev %p fflag 0x%x devtype %d td %p", dev, fflag, devtype, td);
return 0;
}
static int
netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
struct netmap_priv_d *priv;
int error;
priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (priv == NULL)
return ENOMEM;
error = devfs_set_cdevpriv(priv, netmap_dtor);
if (error)
return error;
return 0;
}
#endif /* __FreeBSD__ */
@ -650,7 +779,7 @@ get_ifp(const char *name, struct ifnet **ifp)
/* can do this if the capability exists and if_pspare[0]
* points to the netmap descriptor.
*/
if ((*ifp)->if_capabilities & IFCAP_NETMAP && NA(*ifp))
if (NETMAP_CAPABLE(*ifp))
return 0; /* valid pointer, we hold the refcount */
nm_if_rele(*ifp);
return EINVAL; // not NETMAP capable
@ -676,7 +805,7 @@ netmap_ring_reinit(struct netmap_kring *kring)
u_int i, lim = kring->nkr_num_slots - 1;
int errors = 0;
D("called for %s", kring->na->ifp->if_xname);
RD(10, "called for %s", kring->na->ifp->if_xname);
if (ring->cur > lim)
errors++;
for (i = 0; i <= lim; i++) {
@ -698,9 +827,9 @@ netmap_ring_reinit(struct netmap_kring *kring)
int pos = kring - kring->na->tx_rings;
int n = kring->na->num_tx_rings + 1;
D("total %d errors", errors);
RD(10, "total %d errors", errors);
errors++;
D("%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
kring->na->ifp->if_xname,
pos < n ? "TX" : "RX", pos < n ? pos : pos - n,
ring->cur, kring->nr_hwcur,
@ -803,20 +932,16 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
CURVNET_SET(TD_TO_VNET(td));
error = devfs_get_cdevpriv((void **)&priv);
if (error != ENOENT && error != 0) {
if (error) {
CURVNET_RESTORE();
return (error);
/* XXX ENOENT should be impossible, since the priv
* is now created in the open */
return (error == ENOENT ? ENXIO : error);
}
error = 0; /* Could be ENOENT */
nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0'; /* truncate name */
switch (cmd) {
case NIOCGINFO: /* return capabilities etc */
/* memsize is always valid */
nmr->nr_memsize = nm_mem->nm_totalsize;
nmr->nr_offset = 0;
nmr->nr_rx_rings = nmr->nr_tx_rings = 0;
nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
if (nmr->nr_version != NETMAP_API) {
D("API mismatch got %d have %d",
nmr->nr_version, NETMAP_API);
@ -824,6 +949,16 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
error = EINVAL;
break;
}
/* update configuration */
error = netmap_get_memory(priv);
ND("get_memory returned %d", error);
if (error)
break;
/* memsize is always valid */
nmr->nr_memsize = nm_mem.nm_totalsize;
nmr->nr_offset = 0;
nmr->nr_rx_rings = nmr->nr_tx_rings = 0;
nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
if (nmr->nr_name[0] == '\0') /* just get memory info */
break;
error = get_ifp(nmr->nr_name, &ifp); /* get a refcount */
@ -843,26 +978,26 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
error = EINVAL;
break;
}
if (priv != NULL) { /* thread already registered */
/* ensure allocators are ready */
error = netmap_get_memory(priv);
ND("get_memory returned %d", error);
if (error)
break;
/* protect access to priv from concurrent NIOCREGIF */
NMA_LOCK();
if (priv->np_ifp != NULL) { /* thread already registered */
error = netmap_set_ringid(priv, nmr->nr_ringid);
NMA_UNLOCK();
break;
}
/* find the interface and a reference */
error = get_ifp(nmr->nr_name, &ifp); /* keep reference */
if (error)
break;
na = NA(ifp); /* retrieve netmap adapter */
/*
* Allocate the private per-thread structure.
* XXX perhaps we can use a blocking malloc ?
*/
priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (priv == NULL) {
error = ENOMEM;
nm_if_rele(ifp); /* return the refcount */
if (error) {
NMA_UNLOCK();
break;
}
na = NA(ifp); /* retrieve netmap adapter */
for (i = 10; i > 0; i--) {
na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
@ -874,8 +1009,8 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
if (i == 0) {
D("too many NIOCREGIF attempts, give up");
error = EINVAL;
free(priv, M_DEVBUF);
nm_if_rele(ifp); /* return the refcount */
NMA_UNLOCK();
break;
}
@ -883,7 +1018,7 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
error = netmap_set_ringid(priv, nmr->nr_ringid);
if (error)
goto error;
priv->np_nifp = nifp = netmap_if_new(nmr->nr_name, na);
nifp = netmap_if_new(nmr->nr_name, na);
if (nifp == NULL) { /* allocation failed */
error = ENOMEM;
} else if (ifp->if_capenable & IFCAP_NETMAP) {
@ -898,57 +1033,66 @@ netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
mtx_init(&na->rx_rings[i].q_lock, "nm_rxq_lock", MTX_NETWORK_LOCK, MTX_DEF);
}
error = na->nm_register(ifp, 1); /* mode on */
if (error)
if (error) {
netmap_dtor_locked(priv);
netmap_if_free(nifp);
}
}
if (error) { /* reg. failed, release priv and ref */
error:
na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
nm_if_rele(ifp); /* return the refcount */
bzero(priv, sizeof(*priv));
free(priv, M_DEVBUF);
priv->np_ifp = NULL;
priv->np_nifp = NULL;
NMA_UNLOCK();
break;
}
na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
error = devfs_set_cdevpriv(priv, netmap_dtor);
if (error != 0) {
/* could not assign the private storage for the
* thread, call the destructor explicitly.
*/
netmap_dtor(priv);
break;
}
/* the following assignment is a commitment.
* Readers (i.e., poll and *SYNC) check for
* np_nifp != NULL without locking
*/
wmb(); /* make sure previous writes are visible to all CPUs */
priv->np_nifp = nifp;
NMA_UNLOCK();
/* return the offset of the netmap_if object */
nmr->nr_rx_rings = na->num_rx_rings;
nmr->nr_tx_rings = na->num_tx_rings;
nmr->nr_rx_slots = na->num_rx_desc;
nmr->nr_tx_slots = na->num_tx_desc;
nmr->nr_memsize = nm_mem->nm_totalsize;
nmr->nr_memsize = nm_mem.nm_totalsize;
nmr->nr_offset = netmap_if_offset(nifp);
break;
case NIOCUNREGIF:
if (priv == NULL) {
error = ENXIO;
break;
}
/* the interface is unregistered inside the
destructor of the private data. */
devfs_clear_cdevpriv();
// XXX we have no data here ?
D("deprecated, data is %p", nmr);
error = EINVAL;
break;
case NIOCTXSYNC:
case NIOCRXSYNC:
if (priv == NULL) {
case NIOCRXSYNC:
nifp = priv->np_nifp;
if (nifp == NULL) {
error = ENXIO;
break;
}
rmb(); /* make sure following reads are not from cache */
ifp = priv->np_ifp; /* we have a reference */
if (ifp == NULL) {
D("Internal error: nifp != NULL && ifp == NULL");
error = ENXIO;
break;
}
na = NA(ifp); /* retrieve netmap adapter */
if (priv->np_qfirst == NETMAP_SW_RING) { /* host rings */
if (cmd == NIOCTXSYNC)
@ -1047,6 +1191,12 @@ netmap_poll(struct cdev *dev, int events, struct thread *td)
if (devfs_get_cdevpriv((void **)&priv) != 0 || priv == NULL)
return POLLERR;
if (priv->np_nifp == NULL) {
D("No if registered");
return POLLERR;
}
rmb(); /* make sure following reads are not from cache */
ifp = priv->np_ifp;
// XXX check for deleting() ?
if ( (ifp->if_capenable & IFCAP_NETMAP) == 0)
@ -1322,7 +1472,7 @@ netmap_attach(struct netmap_adapter *na, int num_queues)
na->tx_rings = (void *)((char *)buf + sizeof(*na));
na->rx_rings = na->tx_rings + na->num_tx_rings + 1;
bcopy(na, buf, sizeof(*na));
ifp->if_capabilities |= IFCAP_NETMAP;
NETMAP_SET_CAPABLE(ifp);
na = buf;
/* Core lock initialized here. Others are initialized after
@ -1337,7 +1487,7 @@ netmap_attach(struct netmap_adapter *na, int num_queues)
}
#ifdef linux
if (ifp->netdev_ops) {
D("netdev_ops %p", ifp->netdev_ops);
ND("netdev_ops %p", ifp->netdev_ops);
/* prepare a clone of the netdev ops */
na->nm_ndo = *ifp->netdev_ops;
}
@ -1440,9 +1590,13 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
return NULL; /* nothing to reinitialize */
if (tx == NR_TX) {
if (n >= na->num_tx_rings)
return NULL;
kring = na->tx_rings + n;
new_hwofs = kring->nr_hwcur - new_cur;
} else {
if (n >= na->num_rx_rings)
return NULL;
kring = na->rx_rings + n;
new_hwofs = kring->nr_hwcur + kring->nr_hwavail - new_cur;
}
@ -1454,7 +1608,7 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, int n,
kring->nkr_hwofs = new_hwofs;
if (tx == NR_TX)
kring->nr_hwavail = kring->nkr_num_slots - 1;
D("new hwofs %d on %s %s[%d]",
ND(10, "new hwofs %d on %s %s[%d]",
kring->nkr_hwofs, na->ifp->if_xname,
tx == NR_TX ? "TX" : "RX", n);
@ -1501,12 +1655,22 @@ netmap_rx_irq(struct ifnet *ifp, int q, int *work_done)
if (!(ifp->if_capenable & IFCAP_NETMAP))
return 0;
ND(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
na = NA(ifp);
if (na->na_flags & NAF_SKIP_INTR) {
ND("use regular interrupt");
return 0;
}
if (work_done) { /* RX path */
if (q >= na->num_rx_rings)
return 0; // regular queue
r = na->rx_rings + q;
r->nr_kflags |= NKR_PENDINTR;
main_wq = (na->num_rx_rings > 1) ? &na->rx_si : NULL;
} else { /* tx path */
if (q >= na->num_tx_rings)
return 0; // regular queue
r = na->tx_rings + q;
main_wq = (na->num_tx_rings > 1) ? &na->tx_si : NULL;
work_done = &q; /* dummy */
@ -1560,38 +1724,65 @@ linux_netmap_mmap(struct file *f, struct vm_area_struct *vma)
int lut_skip, i, j;
int user_skip = 0;
struct lut_entry *l_entry;
const struct netmap_obj_pool *p[] = {
nm_mem->nm_if_pool,
nm_mem->nm_ring_pool,
nm_mem->nm_buf_pool };
int error = 0;
unsigned long off, tomap;
/*
* vma->vm_start: start of mapping user address space
* vma->vm_end: end of the mapping user address space
* vma->vm_pfoff: offset of first page in the device
*/
(void)f; /* UNUSED */
// XXX security checks
for (i = 0; i < 3; i++) { /* loop through obj_pools */
error = netmap_get_memory(f->private_data);
ND("get_memory returned %d", error);
if (error)
return -error;
off = vma->vm_pgoff << PAGE_SHIFT; /* offset in bytes */
tomap = vma->vm_end - vma->vm_start;
for (i = 0; i < NETMAP_POOLS_NR; i++) { /* loop through obj_pools */
const struct netmap_obj_pool *p = &nm_mem.pools[i];
/*
* In each pool memory is allocated in clusters
* of size _clustsize , each containing clustentries
* of size _clustsize, each containing clustentries
* entries. For each object k we already store the
* vtophys malling in lut[k] so we use that, scanning
* vtophys mapping in lut[k] so we use that, scanning
* the lut[] array in steps of clustentries,
* and we map each cluster (not individual pages,
* it would be overkill).
*/
for (lut_skip = 0, j = 0; j < p[i]->_numclusters; j++) {
l_entry = &p[i]->lut[lut_skip];
/*
* We interpret vm_pgoff as an offset into the whole
* netmap memory, as if all clusters where contiguous.
*/
for (lut_skip = 0, j = 0; j < p->_numclusters; j++, lut_skip += p->clustentries) {
unsigned long paddr, mapsize;
if (p->_clustsize <= off) {
off -= p->_clustsize;
continue;
}
l_entry = &p->lut[lut_skip]; /* first obj in the cluster */
paddr = l_entry->paddr + off;
mapsize = p->_clustsize - off;
off = 0;
if (mapsize > tomap)
mapsize = tomap;
ND("remap_pfn_range(%lx, %lx, %lx)",
vma->vm_start + user_skip,
paddr >> PAGE_SHIFT, mapsize);
if (remap_pfn_range(vma, vma->vm_start + user_skip,
l_entry->paddr >> PAGE_SHIFT, p[i]->_clustsize,
paddr >> PAGE_SHIFT, mapsize,
vma->vm_page_prot))
return -EAGAIN; // XXX check return value
lut_skip += p[i]->clustentries;
user_skip += p[i]->_clustsize;
user_skip += mapsize;
tomap -= mapsize;
if (tomap == 0)
goto done;
}
}
done:
return 0;
}
@ -1636,8 +1827,24 @@ netmap_release(struct inode *inode, struct file *file)
return (0);
}
static int
linux_netmap_open(struct inode *inode, struct file *file)
{
struct netmap_priv_d *priv;
(void)inode; /* UNUSED */
priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (priv == NULL)
return -ENOMEM;
file->private_data = priv;
return (0);
}
static struct file_operations netmap_fops = {
.open = linux_netmap_open,
.mmap = linux_netmap_mmap,
LIN_IOCTL_NAME = linux_netmap_ioctl,
.poll = linux_netmap_poll,
@ -1683,9 +1890,12 @@ MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */
static struct cdevsw netmap_cdevsw = {
.d_version = D_VERSION,
.d_name = "netmap",
.d_open = netmap_open,
.d_mmap = netmap_mmap,
.d_mmap_single = netmap_mmap_single,
.d_ioctl = netmap_ioctl,
.d_poll = netmap_poll,
.d_close = netmap_close,
};
#endif /* __FreeBSD__ */
@ -2048,8 +2258,7 @@ netmap_init(void)
printf("netmap: unable to initialize the memory allocator.\n");
return (error);
}
printf("netmap: loaded module with %d Mbytes\n",
(int)(nm_mem->nm_totalsize >> 20));
printf("netmap: loaded module\n");
netmap_dev = make_dev(&netmap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0660,
"netmap");

View File

@ -1,6 +1,6 @@
/*
* Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -9,7 +9,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@ -25,7 +25,7 @@
/*
* $FreeBSD$
* $Id: netmap_kern.h 11343 2012-07-03 09:08:38Z luigi $
* $Id: netmap_kern.h 11829 2012-09-26 04:06:34Z luigi $
*
* The header contains the definitions of constants and function
* prototypes used only in kernelspace.
@ -55,11 +55,10 @@
#endif
/*
* IFCAP_NETMAP goes into net_device's flags (if_capabilities)
* and priv_flags (if_capenable). The latter used to be 16 bits
* up to linux 2.6.36, so we need to use a 16 bit value on older
* IFCAP_NETMAP goes into net_device's priv_flags (if_capenable).
* This was 16 bits up to linux 2.6.36, so we need a 16 bit value on older
* platforms and tolerate the clash with IFF_DYNAMIC and IFF_BRIDGE_PORT.
* For the 32-bit value, 0x100000 (bit 20) has no clashes up to 3.3.1
* For the 32-bit value, 0x100000 has no clashes until at least 3.5.1
*/
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
#define IFCAP_NETMAP 0x8000
@ -68,7 +67,7 @@
#endif
#elif defined (__APPLE__)
#warning apple support is experimental
#warning apple support is incomplete.
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#define NM_LOCK_T IOLock *
@ -89,7 +88,19 @@
(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
__FUNCTION__, __LINE__, ##__VA_ARGS__); \
} while (0)
/* rate limited, lps indicates how many per second */
#define RD(lps, format, ...) \
do { \
static int t0, __cnt; \
if (t0 != time_second) { \
t0 = time_second; \
__cnt = 0; \
} \
if (__cnt++ < lps) \
D(format, ##__VA_ARGS__); \
} while (0)
struct netmap_adapter;
/*
@ -129,6 +140,18 @@ struct netmap_kring {
* support netmap operation.
*/
struct netmap_adapter {
/*
* On linux we do not have a good way to tell if an interface
* is netmap-capable. So we use the following trick:
* NA(ifp) points here, and the first entry (which hopefully
* always exists and is at least 32 bits) contains a magic
* value which we can use to detect that the interface is good.
*/
uint32_t magic;
uint32_t na_flags; /* future place for IFCAP_NETMAP */
#define NAF_SKIP_INTR 1 /* use the regular interrupt handler.
* useful during initialization
*/
int refcount; /* number of user-space descriptors using this
interface, which is equal to the number of
struct netmap_if objs in the mapped region. */
@ -149,7 +172,6 @@ struct netmap_adapter {
u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_rx_desc;
//u_int buff_size; // XXX deprecate, use NETMAP_BUF_SIZE
/* tx_rings and rx_rings are private but allocated
* as a contiguous chunk of memory. Each array has
@ -185,7 +207,7 @@ struct netmap_adapter {
};
/*
* The combination of "enable" (ifp->if_capabilities &IFCAP_NETMAP)
* The combination of "enable" (ifp->if_capenable & IFCAP_NETMAP)
* and refcount gives the status of the interface, namely:
*
* enable refcount Status
@ -268,6 +290,36 @@ enum { /* verbose flags */
#endif
#define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp))
/*
* Macros to determine if an interface is netmap capable or netmap enabled.
* See the magic field in struct netmap_adapter.
*/
#ifdef __FreeBSD__
/*
* on FreeBSD just use if_capabilities and if_capenable.
*/
#define NETMAP_CAPABLE(ifp) (NA(ifp) && \
(ifp)->if_capabilities & IFCAP_NETMAP )
#define NETMAP_SET_CAPABLE(ifp) \
(ifp)->if_capabilities |= IFCAP_NETMAP
#else /* linux */
/*
* on linux:
* we check if NA(ifp) is set and its first element has a related
* magic value. The capenable is within the struct netmap_adapter.
*/
#define NETMAP_MAGIC 0x52697a7a
#define NETMAP_CAPABLE(ifp) (NA(ifp) && \
((uint32_t)(uintptr_t)NA(ifp) ^ NA(ifp)->magic) == NETMAP_MAGIC )
#define NETMAP_SET_CAPABLE(ifp) \
NA(ifp)->magic = ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC
#endif /* linux */
#ifdef __FreeBSD__
/* Callback invoked by the dma machinery after a successfull dmamap_load */

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2012 Matteo Landi, Luigi Rizzo. All rights reserved.
* Copyright (C) 2012 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -25,19 +25,19 @@
/*
* $FreeBSD$
* $Id: netmap_mem2.c 11445 2012-07-30 10:49:07Z luigi $
* $Id: netmap_mem2.c 11881 2012-10-18 23:24:15Z luigi $
*
* New memory allocator for netmap
* (New) memory allocator for netmap
*/
/*
* The new version allocates three regions:
* nm_if_pool for the struct netmap_if
* nm_ring_pool for the struct netmap_ring
* nm_buf_pool for the packet buffers.
* This allocator creates three memory regions:
* nm_if_pool for the struct netmap_if
* nm_ring_pool for the struct netmap_ring
* nm_buf_pool for the packet buffers.
*
* All regions need to be page-sized as we export them to
* userspace through mmap. Only the latter need to be dma-able,
* All regions need to be multiple of a page size as we export them to
* userspace through mmap. Only the latter needs to be dma-able,
* but for convenience use the same type of allocator for all.
*
* Once mapped, the three regions are exported to userspace
@ -51,58 +51,97 @@
* of the object, and from there locate the offset from the beginning
* of the region.
*
* Allocator for a pool of memory objects of the same size.
* The invididual allocators manage a pool of memory for objects of
* the same size.
* The pool is split into smaller clusters, whose size is a
* multiple of the page size. The cluster size is chosen
* to minimize the waste for a given max cluster size
* (we do it by brute force, as we have relatively few object
* per cluster).
*
* To be polite with the cache, objects are aligned to
* the cache line, or 64 bytes. Sizes are rounded to multiple of 64.
* For each object we have
* one entry in the bitmap to signal the state. Allocation scans
* the bitmap, but since this is done only on attach, we are not
* Objects are aligned to the cache line (64 bytes) rounding up object
* sizes when needed. A bitmap contains the state of each object.
* Allocation scans the bitmap; this is done only on attach, so we are not
* too worried about performance
*/
/*
* MEMORY SIZES:
*
* (all the parameters below will become tunables)
* For each allocator we can define (thorugh sysctl) the size and
* number of each object. Memory is allocated at the first use of a
* netmap file descriptor, and can be freed when all such descriptors
* have been released (including unmapping the memory).
* If memory is scarce, the system tries to get as much as possible
* and the sysctl values reflect the actual allocation.
* Together with desired values, the sysctl export also absolute
* min and maximum values that cannot be overridden.
*
* struct netmap_if is variable size but small.
* Assuming each NIC has 8+2 rings, (4+1 tx, 4+1 rx) the netmap_if
* uses 120 bytes on a 64-bit machine.
* We allocate NETMAP_IF_MAX_SIZE (1024) which should work even for
* cards with 48 ring pairs.
* The total number of 'struct netmap_if' could be slightly larger
* that the total number of rings on all interfaces on the system.
* struct netmap_if:
* variable size, max 16 bytes per ring pair plus some fixed amount.
* 1024 bytes should be large enough in practice.
*
* In the worst case we have one netmap_if per ring in the system.
*
* struct netmap_ring
* variable too, 8 byte per slot plus some fixed amount.
* Rings can be large (e.g. 4k slots, or >32Kbytes).
* We default to 36 KB (9 pages), and a few hundred rings.
*
* struct netmap_buffer
* The more the better, both because fast interfaces tend to have
* many slots, and because we may want to use buffers to store
* packets in userspace avoiding copies.
* Must contain a full frame (eg 1518, or more for vlans, jumbo
* frames etc.) plus be nicely aligned, plus some NICs restrict
* the size to multiple of 1K or so. Default to 2K
*/
#define NETMAP_IF_MAX_SIZE 1024
#define NETMAP_IF_MAX_NUM 512
/*
* netmap rings are up to 2..4k descriptors, 8 bytes each,
* plus some glue at the beginning (32 bytes).
* We set the default ring size to 9 pages (36K) and enable
* a few hundreds of them.
*/
#define NETMAP_RING_MAX_SIZE (9*PAGE_SIZE)
#define NETMAP_RING_MAX_NUM 200 /* approx 8MB */
/*
* Buffers: the more the better. Buffer size is NETMAP_BUF_SIZE,
* 2k or slightly less, aligned to 64 bytes.
* A large 10G interface can have 2k*18 = 36k buffers per interface,
* or about 72MB of memory. Up to us to use more.
*/
#ifndef CONSERVATIVE
#define NETMAP_BUF_MAX_NUM 100000 /* 200MB */
#define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */
#else /* CONSERVATIVE */
#define NETMAP_BUF_MAX_NUM 20000 /* 40MB */
#endif
#ifdef linux
#define NMA_LOCK_T struct semaphore
#define NMA_LOCK_INIT() sema_init(&nm_mem.nm_mtx, 1)
#define NMA_LOCK_DESTROY()
#define NMA_LOCK() down(&nm_mem.nm_mtx)
#define NMA_UNLOCK() up(&nm_mem.nm_mtx)
#else /* !linux */
#define NMA_LOCK_T struct mtx
#define NMA_LOCK_INIT() mtx_init(&nm_mem.nm_mtx, "netmap memory allocator lock", NULL, MTX_DEF)
#define NMA_LOCK_DESTROY() mtx_destroy(&nm_mem.nm_mtx)
#define NMA_LOCK() mtx_lock(&nm_mem.nm_mtx)
#define NMA_UNLOCK() mtx_unlock(&nm_mem.nm_mtx)
#endif /* linux */
enum {
NETMAP_IF_POOL = 0,
NETMAP_RING_POOL,
NETMAP_BUF_POOL,
NETMAP_POOLS_NR
};
struct netmap_obj_params {
u_int size;
u_int num;
};
struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
[NETMAP_IF_POOL] = {
.size = 1024,
.num = 100,
},
[NETMAP_RING_POOL] = {
.size = 9*PAGE_SIZE,
.num = 200,
},
[NETMAP_BUF_POOL] = {
.size = 2048,
.num = NETMAP_BUF_MAX_NUM,
},
};
struct netmap_obj_pool {
char name[16]; /* name of the allocator */
@ -110,6 +149,12 @@ struct netmap_obj_pool {
u_int objfree; /* number of free objects. */
u_int clustentries; /* actual objects per cluster */
/* limits */
u_int objminsize; /* minimum object size */
u_int objmaxsize; /* maximum object size */
u_int nummin; /* minimum number of objects */
u_int nummax; /* maximum number of objects */
/* the total memory space is _numclusters*_clustsize */
u_int _numclusters; /* how many clusters */
u_int _clustsize; /* cluster size */
@ -118,20 +163,69 @@ struct netmap_obj_pool {
u_int _memtotal; /* _numclusters*_clustsize */
struct lut_entry *lut; /* virt,phys addresses, objtotal entries */
uint32_t *bitmap; /* one bit per buffer, 1 means free */
uint32_t bitmap_slots; /* number of uint32 entries in bitmap */
};
struct netmap_mem_d {
NM_LOCK_T nm_mtx; /* protect the allocator ? */
NMA_LOCK_T nm_mtx; /* protect the allocator */
u_int nm_totalsize; /* shorthand */
/* pointers to the three allocators */
struct netmap_obj_pool *nm_if_pool;
struct netmap_obj_pool *nm_ring_pool;
struct netmap_obj_pool *nm_buf_pool;
int finalized; /* !=0 iff preallocation done */
int lasterr; /* last error for curr config */
int refcount; /* existing priv structures */
/* the three allocators */
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
};
static struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.pools = {
[NETMAP_IF_POOL] = {
.name = "netmap_if",
.objminsize = sizeof(struct netmap_if),
.objmaxsize = 4096,
.nummin = 10, /* don't be stingy */
.nummax = 10000, /* XXX very large */
},
[NETMAP_RING_POOL] = {
.name = "netmap_ring",
.objminsize = sizeof(struct netmap_ring),
.objmaxsize = 32*PAGE_SIZE,
.nummin = 2,
.nummax = 1024,
},
[NETMAP_BUF_POOL] = {
.name = "netmap_buf",
.objminsize = 64,
.objmaxsize = 65536,
.nummin = 4,
.nummax = 1000000, /* one million! */
},
},
};
struct lut_entry *netmap_buffer_lut; /* exported */
/* memory allocator related sysctls */
#define STRINGIFY(x) #x
#define DECLARE_SYSCTLS(id, name) \
/* TUNABLE_INT("hw.netmap." STRINGIFY(name) "_size", &netmap_params[id].size); */ \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
/* TUNABLE_INT("hw.netmap." STRINGIFY(name) "_num", &netmap_params[id].num); */ \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s")
DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
/*
* Convert a userspace offset to a phisical address.
@ -146,24 +240,25 @@ struct lut_entry *netmap_buffer_lut; /* exported */
static inline vm_paddr_t
netmap_ofstophys(vm_offset_t offset)
{
const struct netmap_obj_pool *p[] = {
nm_mem->nm_if_pool,
nm_mem->nm_ring_pool,
nm_mem->nm_buf_pool };
int i;
vm_offset_t o = offset;
struct netmap_obj_pool *p = nm_mem.pools;
for (i = 0; i < 3; offset -= p[i]->_memtotal, i++) {
if (offset >= p[i]->_memtotal)
for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i]._memtotal, i++) {
if (offset >= p[i]._memtotal)
continue;
// XXX now scan the clusters
return p[i]->lut[offset / p[i]->_objsize].paddr +
offset % p[i]->_objsize;
return p[i].lut[offset / p[i]._objsize].paddr +
offset % p[i]._objsize;
}
/* this is only in case of errors */
D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
p[0]->_memtotal, p[0]->_memtotal + p[1]->_memtotal,
p[0]->_memtotal + p[1]->_memtotal + p[2]->_memtotal);
p[NETMAP_IF_POOL]._memtotal,
p[NETMAP_IF_POOL]._memtotal
+ p[NETMAP_RING_POOL]._memtotal,
p[NETMAP_IF_POOL]._memtotal
+ p[NETMAP_RING_POOL]._memtotal
+ p[NETMAP_BUF_POOL]._memtotal);
return 0; // XXX bad address
}
@ -198,20 +293,24 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
/* Helper functions which convert virtual addresses to offsets */
#define netmap_if_offset(v) \
netmap_obj_offset(nm_mem->nm_if_pool, (v))
netmap_obj_offset(&nm_mem.pools[NETMAP_IF_POOL], (v))
#define netmap_ring_offset(v) \
(nm_mem->nm_if_pool->_memtotal + \
netmap_obj_offset(nm_mem->nm_ring_pool, (v)))
(nm_mem.pools[NETMAP_IF_POOL]._memtotal + \
netmap_obj_offset(&nm_mem.pools[NETMAP_RING_POOL], (v)))
#define netmap_buf_offset(v) \
(nm_mem->nm_if_pool->_memtotal + \
nm_mem->nm_ring_pool->_memtotal + \
netmap_obj_offset(nm_mem->nm_buf_pool, (v)))
(nm_mem.pools[NETMAP_IF_POOL]._memtotal + \
nm_mem.pools[NETMAP_RING_POOL]._memtotal + \
netmap_obj_offset(&nm_mem.pools[NETMAP_BUF_POOL], (v)))
/*
* report the index, and use start position as a hint,
* otherwise buffer allocation becomes terribly expensive.
*/
static void *
netmap_obj_malloc(struct netmap_obj_pool *p, int len)
netmap_obj_malloc(struct netmap_obj_pool *p, int len, uint32_t *start, uint32_t *index)
{
uint32_t i = 0; /* index in the bitmap */
uint32_t mask, j; /* slot counter */
@ -227,9 +326,11 @@ netmap_obj_malloc(struct netmap_obj_pool *p, int len)
D("%s allocator: run out of memory", p->name);
return NULL;
}
if (start)
i = *start;
/* termination is guaranteed by p->free */
while (vaddr == NULL) {
/* termination is guaranteed by p->free, but better check bounds on i */
while (vaddr == NULL && i < p->bitmap_slots) {
uint32_t cur = p->bitmap[i];
if (cur == 0) { /* bitmask is fully used */
i++;
@ -243,9 +344,13 @@ netmap_obj_malloc(struct netmap_obj_pool *p, int len)
p->objfree--;
vaddr = p->lut[i * 32 + j].vaddr;
if (index)
*index = i * 32 + j;
}
ND("%s allocator: allocated object @ [%d][%d]: vaddr %p", i, j, vaddr);
if (start)
*start = i;
return vaddr;
}
@ -287,62 +392,93 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
vaddr, p->name);
}
#define netmap_if_malloc(len) netmap_obj_malloc(nm_mem->nm_if_pool, len)
#define netmap_if_free(v) netmap_obj_free_va(nm_mem->nm_if_pool, (v))
#define netmap_ring_malloc(len) netmap_obj_malloc(nm_mem->nm_ring_pool, len)
#define netmap_buf_malloc() \
netmap_obj_malloc(nm_mem->nm_buf_pool, NETMAP_BUF_SIZE)
#define netmap_if_malloc(len) netmap_obj_malloc(&nm_mem.pools[NETMAP_IF_POOL], len, NULL, NULL)
#define netmap_if_free(v) netmap_obj_free_va(&nm_mem.pools[NETMAP_IF_POOL], (v))
#define netmap_ring_malloc(len) netmap_obj_malloc(&nm_mem.pools[NETMAP_RING_POOL], len, NULL, NULL)
#define netmap_ring_free(v) netmap_obj_free_va(&nm_mem.pools[NETMAP_RING_POOL], (v))
#define netmap_buf_malloc(_pos, _index) \
netmap_obj_malloc(&nm_mem.pools[NETMAP_BUF_POOL], NETMAP_BUF_SIZE, _pos, _index)
/* Return the index associated to the given packet buffer */
#define netmap_buf_index(v) \
(netmap_obj_offset(nm_mem->nm_buf_pool, (v)) / nm_mem->nm_buf_pool->_objsize)
(netmap_obj_offset(&nm_mem.pools[NETMAP_BUF_POOL], (v)) / nm_mem.pools[NETMAP_BUF_POOL]._objsize)
static void
/* Return nonzero on error */
static int
netmap_new_bufs(struct netmap_if *nifp,
struct netmap_slot *slot, u_int n)
{
struct netmap_obj_pool *p = nm_mem->nm_buf_pool;
uint32_t i = 0; /* slot counter */
struct netmap_obj_pool *p = &nm_mem.pools[NETMAP_BUF_POOL];
int i = 0; /* slot counter */
uint32_t pos = 0; /* slot in p->bitmap */
uint32_t index = 0; /* buffer index */
(void)nifp; /* UNUSED */
for (i = 0; i < n; i++) {
void *vaddr = netmap_buf_malloc();
void *vaddr = netmap_buf_malloc(&pos, &index);
if (vaddr == NULL) {
D("unable to locate empty packet buffer");
goto cleanup;
}
slot[i].buf_idx = netmap_buf_index(vaddr);
KASSERT(slot[i].buf_idx != 0,
("Assigning buf_idx=0 to just created slot"));
slot[i].buf_idx = index;
slot[i].len = p->_objsize;
slot[i].flags = NS_BUF_CHANGED; // XXX GAETANO hack
/* XXX setting flags=NS_BUF_CHANGED forces a pointer reload
* in the NIC ring. This is a hack that hides missing
* initializations in the drivers, and should go away.
*/
slot[i].flags = NS_BUF_CHANGED;
}
ND("allocated %d buffers, %d available", n, p->objfree);
return;
ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
return (0);
cleanup:
while (i > 0) {
i--;
netmap_obj_free(nm_mem->nm_buf_pool, slot[i].buf_idx);
netmap_obj_free(p, slot[i].buf_idx);
}
bzero(slot, n * sizeof(slot[0]));
return (ENOMEM);
}
static void
netmap_free_buf(struct netmap_if *nifp, uint32_t i)
{
struct netmap_obj_pool *p = nm_mem->nm_buf_pool;
struct netmap_obj_pool *p = &nm_mem.pools[NETMAP_BUF_POOL];
if (i < 2 || i >= p->objtotal) {
D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
return;
}
netmap_obj_free(nm_mem->nm_buf_pool, i);
netmap_obj_free(p, i);
}
static void
netmap_reset_obj_allocator(struct netmap_obj_pool *p)
{
if (p == NULL)
return;
if (p->bitmap)
free(p->bitmap, M_NETMAP);
p->bitmap = NULL;
if (p->lut) {
int i;
for (i = 0; i < p->objtotal; i += p->clustentries) {
if (p->lut[i].vaddr)
contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
}
bzero(p->lut, sizeof(struct lut_entry) * p->objtotal);
#ifdef linux
vfree(p->lut);
#else
free(p->lut, M_NETMAP);
#endif
}
p->lut = NULL;
}
/*
* Free all resources related to an allocator.
@ -352,19 +488,7 @@ netmap_destroy_obj_allocator(struct netmap_obj_pool *p)
{
if (p == NULL)
return;
if (p->bitmap)
free(p->bitmap, M_NETMAP);
if (p->lut) {
int i;
for (i = 0; i < p->objtotal; i += p->clustentries) {
if (p->lut[i].vaddr)
contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP);
}
bzero(p->lut, sizeof(struct lut_entry) * p->objtotal);
free(p->lut, M_NETMAP);
}
bzero(p, sizeof(*p));
free(p, M_NETMAP);
netmap_reset_obj_allocator(p);
}
/*
@ -378,10 +502,12 @@ netmap_destroy_obj_allocator(struct netmap_obj_pool *p)
* XXX note -- userspace needs the buffers to be contiguous,
* so we cannot afford gaps at the end of a cluster.
*/
static struct netmap_obj_pool *
netmap_new_obj_allocator(const char *name, u_int objtotal, u_int objsize)
/* call with NMA_LOCK held */
static int
netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int objsize)
{
struct netmap_obj_pool *p;
int i, n;
u_int clustsize; /* the cluster size, multiple of page size */
u_int clustentries; /* how many objects per entry */
@ -391,7 +517,7 @@ netmap_new_obj_allocator(const char *name, u_int objtotal, u_int objsize)
if (objsize >= MAX_CLUSTSIZE) {
/* we could do it but there is no point */
D("unsupported allocation for %d bytes", objsize);
return NULL;
goto error;
}
/* make sure objsize is a multiple of LINE_ROUND */
i = (objsize & (LINE_ROUND - 1));
@ -399,6 +525,16 @@ netmap_new_obj_allocator(const char *name, u_int objtotal, u_int objsize)
D("XXX aligning object by %d bytes", LINE_ROUND - i);
objsize += LINE_ROUND - i;
}
if (objsize < p->objminsize || objsize > p->objmaxsize) {
D("requested objsize %d out of range [%d, %d]",
objsize, p->objminsize, p->objmaxsize);
goto error;
}
if (objtotal < p->nummin || objtotal > p->nummax) {
D("requested objtotal %d out of range [%d, %d]",
objtotal, p->nummin, p->nummax);
goto error;
}
/*
* Compute number of objects using a brute-force approach:
* given a max cluster size,
@ -426,32 +562,43 @@ netmap_new_obj_allocator(const char *name, u_int objtotal, u_int objsize)
D("objsize %d clustsize %d objects %d",
objsize, clustsize, clustentries);
p = malloc(sizeof(struct netmap_obj_pool), M_NETMAP,
M_WAITOK | M_ZERO);
if (p == NULL) {
D("Unable to create '%s' allocator", name);
return NULL;
}
/*
* Allocate and initialize the lookup table.
*
* The number of clusters is n = ceil(objtotal/clustentries)
* objtotal' = n * clustentries
*/
strncpy(p->name, name, sizeof(p->name));
p->clustentries = clustentries;
p->_clustsize = clustsize;
n = (objtotal + clustentries - 1) / clustentries;
p->_numclusters = n;
p->objtotal = n * clustentries;
p->objfree = p->objtotal - 2; /* obj 0 and 1 are reserved */
p->_objsize = objsize;
p->_memtotal = p->_numclusters * p->_clustsize;
p->_objsize = objsize;
p->lut = malloc(sizeof(struct lut_entry) * p->objtotal,
M_NETMAP, M_WAITOK | M_ZERO);
return 0;
error:
p->_objsize = objsize;
p->objtotal = objtotal;
return EINVAL;
}
/* call with NMA_LOCK held */
static int
netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
{
int i, n;
n = sizeof(struct lut_entry) * p->objtotal;
#ifdef linux
p->lut = vmalloc(n);
#else
p->lut = malloc(n, M_NETMAP, M_WAITOK | M_ZERO);
#endif
if (p->lut == NULL) {
D("Unable to create lookup table for '%s' allocator", name);
D("Unable to create lookup table (%d bytes) for '%s'", n, p->name);
goto clean;
}
@ -460,40 +607,42 @@ netmap_new_obj_allocator(const char *name, u_int objtotal, u_int objsize)
p->bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP, M_WAITOK | M_ZERO);
if (p->bitmap == NULL) {
D("Unable to create bitmap (%d entries) for allocator '%s'", n,
name);
p->name);
goto clean;
}
p->bitmap_slots = n;
/*
* Allocate clusters, init pointers and bitmap
*/
for (i = 0; i < p->objtotal;) {
int lim = i + clustentries;
int lim = i + p->clustentries;
char *clust;
clust = contigmalloc(clustsize, M_NETMAP, M_WAITOK | M_ZERO,
clust = contigmalloc(p->_clustsize, M_NETMAP, M_NOWAIT | M_ZERO,
0, -1UL, PAGE_SIZE, 0);
if (clust == NULL) {
/*
* If we get here, there is a severe memory shortage,
* so halve the allocated memory to reclaim some.
* XXX check boundaries
*/
D("Unable to create cluster at %d for '%s' allocator",
i, name);
i, p->name);
lim = i / 2;
for (; i >= lim; i--) {
for (i--; i >= lim; i--) {
p->bitmap[ (i>>5) ] &= ~( 1 << (i & 31) );
if (i % clustentries == 0 && p->lut[i].vaddr)
if (i % p->clustentries == 0 && p->lut[i].vaddr)
contigfree(p->lut[i].vaddr,
p->_clustsize, M_NETMAP);
}
p->objtotal = i;
p->objfree = p->objtotal - 2;
p->_numclusters = i / clustentries;
p->_numclusters = i / p->clustentries;
p->_memtotal = p->_numclusters * p->_clustsize;
break;
}
for (; i < lim; i++, clust += objsize) {
for (; i < lim; i++, clust += p->_objsize) {
p->bitmap[ (i>>5) ] |= ( 1 << (i & 31) );
p->lut[i].vaddr = clust;
p->lut[i].paddr = vtophys(clust);
@ -502,83 +651,164 @@ netmap_new_obj_allocator(const char *name, u_int objtotal, u_int objsize)
p->bitmap[0] = ~3; /* objs 0 and 1 is always busy */
D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
p->_numclusters, p->_clustsize >> 10,
p->_memtotal >> 10, name);
p->_memtotal >> 10, p->name);
return p;
return 0;
clean:
netmap_destroy_obj_allocator(p);
return NULL;
netmap_reset_obj_allocator(p);
return ENOMEM;
}
/* call with lock held */
static int
netmap_memory_config_changed(void)
{
int i;
for (i = 0; i < NETMAP_POOLS_NR; i++) {
if (nm_mem.pools[i]._objsize != netmap_params[i].size ||
nm_mem.pools[i].objtotal != netmap_params[i].num)
return 1;
}
return 0;
}
/* call with lock held */
static int
netmap_memory_config(void)
{
int i;
if (!netmap_memory_config_changed())
goto out;
D("reconfiguring");
if (nm_mem.finalized) {
/* reset previous allocation */
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_reset_obj_allocator(&nm_mem.pools[i]);
}
nm_mem.finalized = 0;
}
for (i = 0; i < NETMAP_POOLS_NR; i++) {
nm_mem.lasterr = netmap_config_obj_allocator(&nm_mem.pools[i],
netmap_params[i].num, netmap_params[i].size);
if (nm_mem.lasterr)
goto out;
}
D("Have %d KB for interfaces, %d KB for rings and %d MB for buffers",
nm_mem.pools[NETMAP_IF_POOL]._memtotal >> 10,
nm_mem.pools[NETMAP_RING_POOL]._memtotal >> 10,
nm_mem.pools[NETMAP_BUF_POOL]._memtotal >> 20);
out:
return nm_mem.lasterr;
}
/* call with lock held */
static int
netmap_memory_finalize(void)
{
int i;
u_int totalsize = 0;
nm_mem.refcount++;
if (nm_mem.refcount > 1) {
D("busy (refcount %d)", nm_mem.refcount);
goto out;
}
/* update configuration if changed */
if (netmap_memory_config())
goto out;
if (nm_mem.finalized) {
/* may happen if config is not changed */
ND("nothing to do");
goto out;
}
for (i = 0; i < NETMAP_POOLS_NR; i++) {
nm_mem.lasterr = netmap_finalize_obj_allocator(&nm_mem.pools[i]);
if (nm_mem.lasterr)
goto cleanup;
totalsize += nm_mem.pools[i]._memtotal;
}
nm_mem.nm_totalsize = totalsize;
/* backward compatibility */
netmap_buf_size = nm_mem.pools[NETMAP_BUF_POOL]._objsize;
netmap_total_buffers = nm_mem.pools[NETMAP_BUF_POOL].objtotal;
netmap_buffer_lut = nm_mem.pools[NETMAP_BUF_POOL].lut;
netmap_buffer_base = nm_mem.pools[NETMAP_BUF_POOL].lut[0].vaddr;
nm_mem.finalized = 1;
nm_mem.lasterr = 0;
/* make sysctl values match actual values in the pools */
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_params[i].size = nm_mem.pools[i]._objsize;
netmap_params[i].num = nm_mem.pools[i].objtotal;
}
out:
if (nm_mem.lasterr)
nm_mem.refcount--;
return nm_mem.lasterr;
cleanup:
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_reset_obj_allocator(&nm_mem.pools[i]);
}
nm_mem.refcount--;
return nm_mem.lasterr;
}
static int
netmap_memory_init(void)
{
struct netmap_obj_pool *p;
nm_mem = malloc(sizeof(struct netmap_mem_d), M_NETMAP,
M_WAITOK | M_ZERO);
if (nm_mem == NULL)
goto clean;
p = netmap_new_obj_allocator("netmap_if",
NETMAP_IF_MAX_NUM, NETMAP_IF_MAX_SIZE);
if (p == NULL)
goto clean;
nm_mem->nm_if_pool = p;
p = netmap_new_obj_allocator("netmap_ring",
NETMAP_RING_MAX_NUM, NETMAP_RING_MAX_SIZE);
if (p == NULL)
goto clean;
nm_mem->nm_ring_pool = p;
p = netmap_new_obj_allocator("netmap_buf",
NETMAP_BUF_MAX_NUM, NETMAP_BUF_SIZE);
if (p == NULL)
goto clean;
netmap_total_buffers = p->objtotal;
netmap_buffer_lut = p->lut;
nm_mem->nm_buf_pool = p;
netmap_buffer_base = p->lut[0].vaddr;
mtx_init(&nm_mem->nm_mtx, "netmap memory allocator lock", NULL,
MTX_DEF);
nm_mem->nm_totalsize =
nm_mem->nm_if_pool->_memtotal +
nm_mem->nm_ring_pool->_memtotal +
nm_mem->nm_buf_pool->_memtotal;
D("Have %d KB for interfaces, %d KB for rings and %d MB for buffers",
nm_mem->nm_if_pool->_memtotal >> 10,
nm_mem->nm_ring_pool->_memtotal >> 10,
nm_mem->nm_buf_pool->_memtotal >> 20);
return 0;
clean:
if (nm_mem) {
netmap_destroy_obj_allocator(nm_mem->nm_ring_pool);
netmap_destroy_obj_allocator(nm_mem->nm_if_pool);
free(nm_mem, M_NETMAP);
}
return ENOMEM;
NMA_LOCK_INIT();
return (0);
}
static void
netmap_memory_fini(void)
{
if (!nm_mem)
return;
netmap_destroy_obj_allocator(nm_mem->nm_if_pool);
netmap_destroy_obj_allocator(nm_mem->nm_ring_pool);
netmap_destroy_obj_allocator(nm_mem->nm_buf_pool);
mtx_destroy(&nm_mem->nm_mtx);
free(nm_mem, M_NETMAP);
int i;
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_destroy_obj_allocator(&nm_mem.pools[i]);
}
NMA_LOCK_DESTROY();
}
static void
netmap_free_rings(struct netmap_adapter *na)
{
int i;
for (i = 0; i < na->num_tx_rings + 1; i++) {
netmap_ring_free(na->tx_rings[i].ring);
na->tx_rings[i].ring = NULL;
}
for (i = 0; i < na->num_rx_rings + 1; i++) {
netmap_ring_free(na->rx_rings[i].ring);
na->rx_rings[i].ring = NULL;
}
}
/* call with NMA_LOCK held */
static void *
netmap_if_new(const char *ifname, struct netmap_adapter *na)
{
@ -590,7 +820,6 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
u_int nrx = na->num_rx_rings + 1; /* shorthand, include stack ring */
struct netmap_kring *kring;
NMA_LOCK();
/*
* the descriptor is followed inline by an array of offsets
* to the tx and rx rings in the shared memory region.
@ -598,7 +827,6 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t);
nifp = netmap_if_malloc(len);
if (nifp == NULL) {
NMA_UNLOCK();
return NULL;
}
@ -609,7 +837,6 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
(na->refcount)++; /* XXX atomic ? we are under lock */
if (na->refcount > 1) { /* already setup, we are done */
NMA_UNLOCK();
goto final;
}
@ -633,8 +860,8 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
kring->ring = ring;
*(int *)(uintptr_t)&ring->num_slots = kring->nkr_num_slots = ndesc;
*(ssize_t *)(uintptr_t)&ring->buf_ofs =
(nm_mem->nm_if_pool->_memtotal +
nm_mem->nm_ring_pool->_memtotal) -
(nm_mem.pools[NETMAP_IF_POOL]._memtotal +
nm_mem.pools[NETMAP_RING_POOL]._memtotal) -
netmap_ring_offset(ring);
/*
@ -647,7 +874,10 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
ring->cur = kring->nr_hwcur = 0;
*(int *)(uintptr_t)&ring->nr_buf_size = NETMAP_BUF_SIZE;
ND("initializing slots for txring[%d]", i);
netmap_new_bufs(nifp, ring->slot, ndesc);
if (netmap_new_bufs(nifp, ring->slot, ndesc)) {
D("Cannot allocate buffers for tx_ring[%d] for %s", i, ifname);
goto cleanup;
}
}
for (i = 0; i < nrx; i++) { /* Receive rings */
@ -667,17 +897,19 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
kring->ring = ring;
*(int *)(uintptr_t)&ring->num_slots = kring->nkr_num_slots = ndesc;
*(ssize_t *)(uintptr_t)&ring->buf_ofs =
(nm_mem->nm_if_pool->_memtotal +
nm_mem->nm_ring_pool->_memtotal) -
(nm_mem.pools[NETMAP_IF_POOL]._memtotal +
nm_mem.pools[NETMAP_RING_POOL]._memtotal) -
netmap_ring_offset(ring);
ring->cur = kring->nr_hwcur = 0;
ring->avail = kring->nr_hwavail = 0; /* empty */
*(int *)(uintptr_t)&ring->nr_buf_size = NETMAP_BUF_SIZE;
ND("initializing slots for rxring[%d]", i);
netmap_new_bufs(nifp, ring->slot, ndesc);
if (netmap_new_bufs(nifp, ring->slot, ndesc)) {
D("Cannot allocate buffers for rx_ring[%d] for %s", i, ifname);
goto cleanup;
}
}
NMA_UNLOCK();
#ifdef linux
// XXX initialize the selrecord structs.
for (i = 0; i < ntx; i++)
@ -704,19 +936,16 @@ netmap_if_new(const char *ifname, struct netmap_adapter *na)
}
return (nifp);
cleanup:
// XXX missing
NMA_UNLOCK();
netmap_free_rings(na);
netmap_if_free(nifp);
(na->refcount)--;
return NULL;
}
/* call with NMA_LOCK held */
static void
netmap_free_rings(struct netmap_adapter *na)
netmap_memory_deref(void)
{
int i;
for (i = 0; i < na->num_tx_rings + 1; i++)
netmap_obj_free_va(nm_mem->nm_ring_pool,
na->tx_rings[i].ring);
for (i = 0; i < na->num_rx_rings + 1; i++)
netmap_obj_free_va(nm_mem->nm_ring_pool,
na->rx_rings[i].ring);
nm_mem.refcount--;
D("refcount = %d", nm_mem.refcount);
}