Import the 'iflib' API library for network drivers. From the author:

"iflib is a library to eliminate the need for frequently duplicated device independent logic propagated (poorly) across many network drivers." Participation is purely optional. The IFLIB kernel config option is provided for drivers that want to transition between legacy and iflib modes of operation. ixl and ixgbe driver conversions will be committed shortly. We hope to see participation from the Broadcom and maybe Chelsio drivers in the near future. Submitted by: mmacy@nextbsd.org Reviewed by: gallatin Differential Revision: D5211
svn path=/head/; revision=300113
2016-05-18 04:35:58 +00:00 · 2016-05-18 04:35:58 +00:00 · 4c7070db25 · 2020-12-20 02:59:44 +00:00
commit 4c7070db25
parent 679afe0d78
15 changed files with 6571 additions and 13 deletions
--- a/sys/conf/files
+++ b/sys/conf/files
@ -3523,6 +3523,9 @@ net/if_tun.c			optional tun
 net/if_tap.c			optional tap
 net/if_vlan.c			optional vlan
 net/if_vxlan.c			optional vxlan inet | vxlan inet6
+net/ifdi_if.m                  optional ether pci
+net/iflib.c                    optional ether pci
+net/mp_ring.c                  optional ether
 net/mppcc.c			optional netgraph_mppc_compression
 net/mppcd.c			optional netgraph_mppc_compression
 net/netisr.c			standard
--- a/sys/conf/options
+++ b/sys/conf/options
@ -139,6 +139,7 @@ GEOM_VINUM	opt_geom.h
 GEOM_VIRSTOR	opt_geom.h
 GEOM_VOL	opt_geom.h
 GEOM_ZERO	opt_geom.h
+IFLIB		opt_iflib.h
 KDTRACE_HOOKS	opt_global.h
 KDTRACE_FRAME	opt_kdtrace.h
 KN_HASHSIZE	opt_kqueue.h
--- a/sys/kern/device_if.m
+++ b/sys/kern/device_if.m
@ -62,6 +62,11 @@ CODE {
 	{
 	    return 0;
 	}
+
+	static void * null_register(device_t dev)
+	{
+		return NULL;
+	}
 };
 	
 /**
@ -316,3 +321,24 @@ METHOD int resume {
 METHOD int quiesce {
 	device_t dev;
 } DEFAULT null_quiesce;
+
+/**
+ * @brief This is called when the driver is asked to register handlers.
+ *
+ *
+ * To include this method in a device driver, use a line like this
+ * in the driver's method list:
+ *
+ * @code
+ * 	KOBJMETHOD(device_register, foo_register)
+ * @endcode
+ *
+ * @param dev		the device for which handlers are being registered
+ *
+ * @retval NULL     method not implemented
+ * @retval non-NULL	a pointer to implementation specific static driver state
+ *
+ */
+METHOD void * register {
+	device_t dev;
+} DEFAULT null_register;
--- a/sys/kern/kern_mbuf.c
+++ b/sys/kern/kern_mbuf.c
@ -444,7 +444,7 @@ mb_dtor_mbuf(void *mem, int size, void *arg)
 	flags = (unsigned long)arg;

 	KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
-	if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
+	if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
 		m_tag_delete_chain(m, NULL);
 #ifdef INVARIANTS
 	trash_dtor(mem, size, arg);
--- a/sys/kern/subr_taskqueue.c
+++ b/sys/kern/subr_taskqueue.c
@ -34,12 +34,14 @@ __FBSDID("$FreeBSD$");
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
+#include <sys/libkern.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
+#include <sys/smp.h>
 #include <sys/taskqueue.h>
 #include <sys/unistd.h>
 #include <machine/stdarg.h>
@ -62,9 +64,11 @@ struct taskqueue {
 	STAILQ_HEAD(, task)	tq_queue;
 	taskqueue_enqueue_fn	tq_enqueue;
 	void			*tq_context;
+	char			*tq_name;
 	TAILQ_HEAD(, taskqueue_busy) tq_active;
 	struct mtx		tq_mutex;
 	struct thread		**tq_threads;
+	struct thread		*tq_curthread;
 	int			tq_tcount;
 	int			tq_spin;
 	int			tq_flags;
@ -119,11 +123,17 @@ TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
 }

 static struct taskqueue *
-_taskqueue_create(const char *name __unused, int mflags,
+_taskqueue_create(const char *name, int mflags,
 		 taskqueue_enqueue_fn enqueue, void *context,
-		 int mtxflags, const char *mtxname)
+		 int mtxflags, const char *mtxname __unused)
 {
 	struct taskqueue *queue;
+	char *tq_name = NULL;
+
+	if (name != NULL)
+		tq_name = strndup(name, 32, M_TASKQUEUE);
+	if (tq_name == NULL)
+		tq_name = "taskqueue";

 	queue = malloc(sizeof(struct taskqueue), M_TASKQUEUE, mflags | M_ZERO);
 	if (!queue)
@ -133,6 +143,7 @@ _taskqueue_create(const char *name __unused, int mflags,
 	TAILQ_INIT(&queue->tq_active);
 	queue->tq_enqueue = enqueue;
 	queue->tq_context = context;
+	queue->tq_name = tq_name;
 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
 	if (enqueue == taskqueue_fast_enqueue ||
@ -140,7 +151,7 @@ _taskqueue_create(const char *name __unused, int mflags,
 	    enqueue == taskqueue_swi_giant_enqueue ||
 	    enqueue == taskqueue_thread_enqueue)
 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
-	mtx_init(&queue->tq_mutex, mtxname, NULL, mtxflags);
+	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);

 	return queue;
 }
@ -149,8 +160,9 @@ struct taskqueue *
 taskqueue_create(const char *name, int mflags,
 		 taskqueue_enqueue_fn enqueue, void *context)
 {
+
 	return _taskqueue_create(name, mflags, enqueue, context,
-			MTX_DEF, "taskqueue");
+			MTX_DEF, name);
 }

 void
@ -194,6 +206,7 @@ taskqueue_free(struct taskqueue *queue)
 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 	mtx_destroy(&queue->tq_mutex);
 	free(queue->tq_threads, M_TASKQUEUE);
+	free(queue->tq_name, M_TASKQUEUE);
 	free(queue, M_TASKQUEUE);
 }

@ -203,11 +216,12 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
 	struct task *ins;
 	struct task *prev;

+	KASSERT(task->ta_func != NULL, ("enqueueing task with NULL func"));
 	/*
 	 * Count multiple enqueues.
 	 */
 	if (task->ta_pending) {
-		if (task->ta_pending < USHRT_MAX)
+		if (task->ta_pending < UCHAR_MAX)
 			task->ta_pending++;
 		TQ_UNLOCK(queue);
 		return (0);
@ -244,6 +258,22 @@ taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task)
 	return (0);
 }

+int
+grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task)
+{
+	TQ_LOCK(queue);
+	if (task->ta_pending) {
+		TQ_UNLOCK(queue);
+		return (0);
+	}
+	STAILQ_INSERT_TAIL(&queue->tq_queue, task, ta_link);
+	task->ta_pending = 1;
+	TQ_UNLOCK(queue);
+	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
+		queue->tq_enqueue(queue->tq_context);
+	return (0);
+}
+
 int
 taskqueue_enqueue(struct taskqueue *queue, struct task *task)
 {
@ -410,6 +440,7 @@ taskqueue_run_locked(struct taskqueue *queue)
 	struct task *task;
 	int pending;

+	KASSERT(queue != NULL, ("tq is NULL"));
 	TQ_ASSERT_LOCKED(queue);
 	tb.tb_running = NULL;

@ -421,17 +452,20 @@ taskqueue_run_locked(struct taskqueue *queue)
 		 * zero its pending count.
 		 */
 		task = STAILQ_FIRST(&queue->tq_queue);
+		KASSERT(task != NULL, ("task is NULL"));
 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 		pending = task->ta_pending;
 		task->ta_pending = 0;
 		tb.tb_running = task;
 		TQ_UNLOCK(queue);

+		KASSERT(task->ta_func != NULL, ("task->ta_func is NULL"));
 		task->ta_func(task->ta_context, pending);

 		TQ_LOCK(queue);
 		tb.tb_running = NULL;
-		wakeup(task);
+		if ((task->ta_flags & TASK_SKIP_WAKEUP) == 0)
+			wakeup(task);

 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
 		tb_first = TAILQ_FIRST(&queue->tq_active);
@ -446,7 +480,9 @@ taskqueue_run(struct taskqueue *queue)
 {

 	TQ_LOCK(queue);
+	queue->tq_curthread = curthread;
 	taskqueue_run_locked(queue);
+	queue->tq_curthread = NULL;
 	TQ_UNLOCK(queue);
 }

@ -679,7 +715,9 @@ taskqueue_thread_loop(void *arg)
 	tq = *tqp;
 	taskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
 	TQ_LOCK(tq);
+	tq->tq_curthread = curthread;
 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
+		/* XXX ? */
 		taskqueue_run_locked(tq);
 		/*
 		 * Because taskqueue_run() can drop tq_mutex, we need to
@ -691,7 +729,7 @@ taskqueue_thread_loop(void *arg)
 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
 	}
 	taskqueue_run_locked(tq);
-
+	tq->tq_curthread = NULL;
 	/*
 	 * This thread is on its way out, so just drop the lock temporarily
 	 * in order to call the shutdown callback.  This allows the callback
@ -715,8 +753,8 @@ taskqueue_thread_enqueue(void *context)

 	tqp = context;
 	tq = *tqp;
-
-	wakeup_one(tq);
+	if (tq->tq_curthread != curthread)
+		wakeup_one(tq);
 }

 TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,
@ -772,3 +810,334 @@ taskqueue_member(struct taskqueue *queue, struct thread *td)
 	}
 	return (ret);
 }
+
+struct taskqgroup_cpu {
+	LIST_HEAD(, grouptask)	tgc_tasks;
+	struct taskqueue	*tgc_taskq;
+	int	tgc_cnt;
+	int	tgc_cpu;
+};
+
+struct taskqgroup {
+	struct taskqgroup_cpu tqg_queue[MAXCPU];
+	struct mtx	tqg_lock;
+	char *		tqg_name;
+	int		tqg_adjusting;
+	int		tqg_stride;
+	int		tqg_cnt;
+};
+
+struct taskq_bind_task {
+	struct task bt_task;
+	int	bt_cpuid;
+};
+
+static void
+taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx)
+{
+	struct taskqgroup_cpu *qcpu;
+
+	qcpu = &qgroup->tqg_queue[idx];
+	LIST_INIT(&qcpu->tgc_tasks);
+	qcpu->tgc_taskq = taskqueue_create_fast(NULL, M_WAITOK,
+	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
+	taskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
+	    "%s_%d", qgroup->tqg_name, idx);
+	qcpu->tgc_cpu = idx * qgroup->tqg_stride;
+}
+
+static void
+taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
+{
+
+	taskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
+}
+
+/*
+ * Find the taskq with least # of tasks that doesn't currently have any
+ * other queues from the uniq identifier.
+ */
+static int
+taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
+{
+	struct grouptask *n;
+	int i, idx, mincnt;
+	int strict;
+
+	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
+	if (qgroup->tqg_cnt == 0)
+		return (0);
+	idx = -1;
+	mincnt = INT_MAX;
+	/*
+	 * Two passes;  First scan for a queue with the least tasks that
+	 * does not already service this uniq id.  If that fails simply find
+	 * the queue with the least total tasks;
+	 */
+	for (strict = 1; mincnt == INT_MAX; strict = 0) {
+		for (i = 0; i < qgroup->tqg_cnt; i++) {
+			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
+				continue;
+			if (strict) {
+				LIST_FOREACH(n,
+				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
+					if (n->gt_uniq == uniq)
+						break;
+				if (n != NULL)
+					continue;
+			}
+			mincnt = qgroup->tqg_queue[i].tgc_cnt;
+			idx = i;
+		}
+	}
+	if (idx == -1)
+		panic("taskqgroup_find: Failed to pick a qid.");
+
+	return (idx);
+}
+
+void
+taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
+    void *uniq, int irq, char *name)
+{
+	cpuset_t mask;
+	int qid;
+
+	gtask->gt_uniq = uniq;
+	gtask->gt_name = name;
+	gtask->gt_irq = irq;
+	gtask->gt_cpu = -1;
+	mtx_lock(&qgroup->tqg_lock);
+	qid = taskqgroup_find(qgroup, uniq);
+	qgroup->tqg_queue[qid].tgc_cnt++;
+	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
+	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+	if (irq != -1 && smp_started) {
+		CPU_ZERO(&mask);
+		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
+		mtx_unlock(&qgroup->tqg_lock);
+		intr_setaffinity(irq, &mask);
+	} else
+		mtx_unlock(&qgroup->tqg_lock);
+}
+
+int
+taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
+	void *uniq, int cpu, int irq, char *name)
+{
+	cpuset_t mask;
+	int i, qid;
+
+	qid = -1;
+	gtask->gt_uniq = uniq;
+	gtask->gt_name = name;
+	gtask->gt_irq = irq;
+	gtask->gt_cpu = cpu;
+	mtx_lock(&qgroup->tqg_lock);
+	if (smp_started) {
+		for (i = 0; i < qgroup->tqg_cnt; i++)
+			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
+				qid = i;
+				break;
+			}
+		if (qid == -1) {
+			mtx_unlock(&qgroup->tqg_lock);
+			return (EINVAL);
+		}
+	} else
+		qid = 0;
+	qgroup->tqg_queue[qid].tgc_cnt++;
+	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
+	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+	if (irq != -1 && smp_started) {
+		CPU_ZERO(&mask);
+		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
+		mtx_unlock(&qgroup->tqg_lock);
+		intr_setaffinity(irq, &mask);
+	} else
+		mtx_unlock(&qgroup->tqg_lock);
+	return (0);
+}
+
+void
+taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
+{
+	int i;
+
+	mtx_lock(&qgroup->tqg_lock);
+	for (i = 0; i < qgroup->tqg_cnt; i++)
+		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
+			break;
+	if (i == qgroup->tqg_cnt)
+		panic("taskqgroup_detach: task not in group\n");
+	qgroup->tqg_queue[i].tgc_cnt--;
+	LIST_REMOVE(gtask, gt_list);
+	mtx_unlock(&qgroup->tqg_lock);
+	gtask->gt_taskqueue = NULL;
+}
+
+static void
+taskqgroup_binder(void *ctx, int pending)
+{
+	struct taskq_bind_task *task = (struct taskq_bind_task *)ctx;
+	cpuset_t mask;
+	int error;
+
+	CPU_ZERO(&mask);
+	CPU_SET(task->bt_cpuid, &mask);
+	error = cpuset_setthread(curthread->td_tid, &mask);
+	thread_lock(curthread);
+	sched_bind(curthread, task->bt_cpuid);
+	thread_unlock(curthread);
+
+	if (error)
+		printf("taskqgroup_binder: setaffinity failed: %d\n",
+		    error);
+	free(task, M_DEVBUF);
+}
+
+static void
+taskqgroup_bind(struct taskqgroup *qgroup)
+{
+	struct taskq_bind_task *task;
+	int i;
+
+	/*
+	 * Bind taskqueue threads to specific CPUs, if they have been assigned
+	 * one.
+	 */
+	for (i = 0; i < qgroup->tqg_cnt; i++) {
+		task = malloc(sizeof (*task), M_DEVBUF, M_NOWAIT);
+		TASK_INIT(&task->bt_task, 0, taskqgroup_binder, task);
+		task->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
+		taskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
+		    &task->bt_task);
+	}
+}
+
+static int
+_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
+{
+	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
+	cpuset_t mask;
+	struct grouptask *gtask;
+	int i, old_cnt, qid;
+
+	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
+
+	if (cnt < 1 || cnt * stride > mp_ncpus || !smp_started) {
+		printf("taskqgroup_adjust failed cnt: %d stride: %d mp_ncpus: %d smp_started: %d\n",
+			   cnt, stride, mp_ncpus, smp_started);
+		return (EINVAL);
+	}
+	if (qgroup->tqg_adjusting) {
+		printf("taskqgroup_adjust failed: adjusting\n");
+		return (EBUSY);
+	}
+	qgroup->tqg_adjusting = 1;
+	old_cnt = qgroup->tqg_cnt;
+	mtx_unlock(&qgroup->tqg_lock);
+	/*
+	 * Set up queue for tasks added before boot.
+	 */
+	if (old_cnt == 0) {
+		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
+		    grouptask, gt_list);
+		qgroup->tqg_queue[0].tgc_cnt = 0;
+	}
+
+	/*
+	 * If new taskq threads have been added.
+	 */
+	for (i = old_cnt; i < cnt; i++)
+		taskqgroup_cpu_create(qgroup, i);
+	mtx_lock(&qgroup->tqg_lock);
+	qgroup->tqg_cnt = cnt;
+	qgroup->tqg_stride = stride;
+
+	/*
+	 * Adjust drivers to use new taskqs.
+	 */
+	for (i = 0; i < old_cnt; i++) {
+		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
+			LIST_REMOVE(gtask, gt_list);
+			qgroup->tqg_queue[i].tgc_cnt--;
+			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
+		}
+	}
+
+	while ((gtask = LIST_FIRST(&gtask_head))) {
+		LIST_REMOVE(gtask, gt_list);
+		if (gtask->gt_cpu == -1)
+			qid = taskqgroup_find(qgroup, gtask->gt_uniq);
+		else {
+			for (i = 0; i < qgroup->tqg_cnt; i++)
+				if (qgroup->tqg_queue[i].tgc_cpu == gtask->gt_cpu) {
+					qid = i;
+					break;
+				}
+		}
+		qgroup->tqg_queue[qid].tgc_cnt++;
+		LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
+		    gt_list);
+		gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+	}
+	/*
+	 * Set new CPU and IRQ affinity
+	 */
+	for (i = 0; i < cnt; i++) {
+		qgroup->tqg_queue[i].tgc_cpu = i * qgroup->tqg_stride;
+		CPU_ZERO(&mask);
+		CPU_SET(qgroup->tqg_queue[i].tgc_cpu, &mask);
+		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list) {
+			if (gtask->gt_irq == -1)
+				continue;
+			intr_setaffinity(gtask->gt_irq, &mask);
+		}
+	}
+	mtx_unlock(&qgroup->tqg_lock);
+
+	/*
+	 * If taskq thread count has been reduced.
+	 */
+	for (i = cnt; i < old_cnt; i++)
+		taskqgroup_cpu_remove(qgroup, i);
+
+	mtx_lock(&qgroup->tqg_lock);
+	qgroup->tqg_adjusting = 0;
+
+	taskqgroup_bind(qgroup);
+
+	return (0);
+}
+
+int
+taskqgroup_adjust(struct taskqgroup *qgroup, int cpu, int stride)
+{
+	int error;
+
+	mtx_lock(&qgroup->tqg_lock);
+	error = _taskqgroup_adjust(qgroup, cpu, stride);
+	mtx_unlock(&qgroup->tqg_lock);
+
+	return (error);
+}
+
+struct taskqgroup *
+taskqgroup_create(char *name)
+{
+	struct taskqgroup *qgroup;
+
+	qgroup = malloc(sizeof(*qgroup), M_TASKQUEUE, M_WAITOK | M_ZERO);
+	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
+	qgroup->tqg_name = name;
+	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
+
+	return (qgroup);
+}
+
+void
+taskqgroup_destroy(struct taskqgroup *qgroup)
+{
+
+}
--- a/sys/net/if.c
+++ b/sys/net/if.c
@ -3900,6 +3900,19 @@ if_multiaddr_count(if_t ifp, int max)
 	return (count);
 }

+int
+if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg)
+{
+	struct ifmultiaddr *ifma;
+	int cnt = 0;
+
+	if_maddr_rlock(ifp);
+	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link)
+		cnt += filter(arg, ifma, cnt);
+	if_maddr_runlock(ifp);
+	return (cnt);
+}
+
 struct mbuf *
 if_dequeue(if_t ifp)
 {
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@ -628,6 +628,7 @@ int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max);
 int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max);
 int if_multiaddr_count(if_t ifp, int max);

+int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg);
 int if_getamcount(if_t ifp);
 struct ifaddr * if_getifaddr(if_t ifp);

--- a/sys/net/ifdi_if.m
+++ b/sys/net/ifdi_if.m
@ -0,0 +1,334 @@
+#
+# Copyright (c) 2014, Matthew Macy (kmacy@freebsd.org)
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#  2. Neither the name of Matthew Macy nor the names of its
+#     contributors may be used to endorse or promote products derived from
+#     this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+
+#include <machine/bus.h>
+#include <sys/bus.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_media.h>
+#include <net/iflib.h>
+
+INTERFACE ifdi;
+
+CODE {
+
+	static void
+	null_void_op(if_ctx_t _ctx __unused)
+	{
+	}
+
+	static void
+	null_timer_op(if_ctx_t _ctx __unused, uint16_t _qsidx __unused)
+	{
+	}
+
+	static int
+	null_int_op(if_ctx_t _ctx __unused)
+	{
+		return (0);
+	}
+
+	static void
+	null_queue_intr_enable(if_ctx_t _ctx __unused, uint16_t _qid __unused)
+	{
+	}
+
+	static void
+	null_led_func(if_ctx_t _ctx __unused, int _onoff __unused)
+	{
+	}
+
+	static void
+	null_vlan_register_op(if_ctx_t _ctx __unused, uint16_t vtag __unused)
+	{
+	}
+
+	static int
+	null_q_setup(if_ctx_t _ctx __unused, uint32_t _qid __unused)
+	{
+		return (0);
+	}
+
+	static int
+	null_i2c_req(if_ctx_t _sctx __unused, struct ifi2creq *_i2c __unused)
+	{
+		return (ENOTSUP);
+	}
+
+	static int
+	null_sysctl_int_delay(if_ctx_t _sctx __unused, if_int_delay_info_t _iidi __unused)
+	{
+		return (0);
+	}
+
+	static int
+	null_iov_init(if_ctx_t _ctx __unused, uint16_t num_vfs __unused, const nvlist_t *params __unused)
+	{
+		return (ENOTSUP);
+	}
+
+	static int
+	null_vf_add(if_ctx_t _ctx __unused, uint16_t num_vfs __unused, const nvlist_t *params __unused)
+	{
+		return (ENOTSUP);
+	}
+
+	static int
+	null_priv_ioctl(if_ctx_t _ctx __unused, u_long command, caddr_t *data __unused)
+	{
+		return (ENOTSUP);
+	}
+};
+
+#
+# bus interfaces
+#
+
+METHOD int attach_pre {
+	if_ctx_t _ctx;
+};
+
+METHOD int attach_post {
+	if_ctx_t _ctx;
+};
+
+METHOD int detach {
+	if_ctx_t _ctx;
+};
+
+METHOD int suspend {
+	if_ctx_t _ctx;
+} DEFAULT null_int_op;
+
+METHOD int shutdown {
+	if_ctx_t _ctx;
+} DEFAULT null_int_op;
+
+METHOD int resume {
+	if_ctx_t _ctx;
+} DEFAULT null_int_op;
+
+#
+# downcall to driver to allocate its
+# own queue state and tie it to the parent
+#
+
+METHOD int tx_queues_alloc {
+	if_ctx_t _ctx;
+	caddr_t *_vaddrs;
+	uint64_t *_paddrs;
+	int ntxqs;
+	int ntxqsets;
+};
+
+METHOD int rx_queues_alloc {
+	if_ctx_t _ctx;
+	caddr_t *_vaddrs;
+	uint64_t *_paddrs;
+	int nrxqs;
+	int nrxqsets;
+};
+
+METHOD void queues_free {
+	if_ctx_t _ctx;
+};
+
+#
+# interface reset / stop
+#
+
+METHOD void init {
+	if_ctx_t _ctx;
+};
+
+METHOD void stop {
+	if_ctx_t _ctx;
+};
+
+#
+# interrupt setup and manipulation
+#
+
+METHOD int msix_intr_assign {
+	if_ctx_t _sctx;
+	int msix;
+};
+
+METHOD void intr_enable {
+	if_ctx_t _ctx;
+};
+
+METHOD void intr_disable {
+	if_ctx_t _ctx;
+};
+
+METHOD void queue_intr_enable {
+	if_ctx_t _ctx;
+	uint16_t _qid;
+} DEFAULT null_queue_intr_enable;
+
+METHOD void link_intr_enable {
+	if_ctx_t _ctx;
+} DEFAULT null_void_op;
+
+#
+# interface configuration
+#
+
+METHOD void multi_set {
+	if_ctx_t _ctx;
+};
+
+METHOD int mtu_set {
+	if_ctx_t _ctx;
+	uint32_t _mtu;
+};
+
+METHOD void media_set{
+	if_ctx_t _ctx;
+} DEFAULT null_void_op;
+
+METHOD int promisc_set {
+	if_ctx_t _ctx;
+	int _flags;
+};
+
+METHOD void crcstrip_set {
+	if_ctx_t _ctx;
+	int _onoff;
+};
+
+#
+# IOV handling
+#
+
+METHOD void vflr_handle {
+	if_ctx_t _ctx;
+} DEFAULT null_void_op;
+
+METHOD int iov_init {
+	if_ctx_t _ctx;
+	uint16_t num_vfs;
+	const nvlist_t * params;
+} DEFAULT null_iov_init;
+
+METHOD void iov_uninit {
+	if_ctx_t _ctx;
+} DEFAULT null_void_op;
+
+METHOD int iov_vf_add {
+	if_ctx_t _ctx;
+	uint16_t num_vfs;
+	const nvlist_t * params;
+} DEFAULT null_vf_add;
+
+
+#
+# Device status
+#
+
+METHOD void update_admin_status {
+	if_ctx_t _ctx;
+};
+
+METHOD void media_status {
+	if_ctx_t _ctx;
+	struct ifmediareq *_ifm;
+};
+
+METHOD int media_change {
+	if_ctx_t _ctx;
+};
+
+METHOD uint64_t get_counter {
+	if_ctx_t _ctx;
+	ift_counter cnt;
+};
+
+METHOD int priv_ioctl {
+	if_ctx_t _ctx;
+	u_long   _cmd;
+	caddr_t _data;
+} DEFAULT null_priv_ioctl;
+
+#
+# optional methods
+#
+
+METHOD int i2c_req {
+	if_ctx_t _ctx;
+	struct ifi2creq *_req;
+} DEFAULT null_i2c_req;
+
+METHOD int txq_setup {
+	if_ctx_t _ctx;
+	uint32_t _txqid;
+} DEFAULT null_q_setup;
+
+METHOD int rxq_setup {
+	if_ctx_t _ctx;
+	uint32_t _txqid;
+} DEFAULT null_q_setup;
+
+METHOD void timer {
+	if_ctx_t _ctx;
+	uint16_t _txqid;
+} DEFAULT null_timer_op;
+
+METHOD void watchdog_reset {
+	if_ctx_t _ctx;
+} DEFAULT null_void_op;
+
+METHOD void led_func {
+	if_ctx_t _ctx;
+	int _onoff;
+} DEFAULT null_led_func;
+
+METHOD void vlan_register {
+	if_ctx_t _ctx;
+	uint16_t _vtag;
+} DEFAULT null_vlan_register_op;
+
+METHOD void vlan_unregister {
+	if_ctx_t _ctx;
+	uint16_t _vtag;
+} DEFAULT null_vlan_register_op;
+
+METHOD int sysctl_int_delay {
+	if_ctx_t _sctx;
+	if_int_delay_info_t _iidi;
+} DEFAULT null_sysctl_int_delay;
+
+
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
--- a/sys/net/iflib.h
+++ b/sys/net/iflib.h
@ -0,0 +1,338 @@
+/*-
+ * Copyright (c) 2014-2015, Matthew Macy (mmacy@nextbsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions and the following disclaimer.
+ *
+ *  2. Neither the name of Matthew Macy nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef __IFLIB_H_
+#define __IFLIB_H_
+
+#include <sys/kobj.h>
+#include <sys/bus.h>
+#include <sys/cpuset.h>
+#include <machine/bus.h>
+#include <sys/bus_dma.h>
+#include <sys/nv.h>
+
+
+/*
+ * Most cards can handle much larger TSO requests
+ * but the FreeBSD TCP stack will break on larger
+ * values
+ */
+#define FREEBSD_TSO_SIZE_MAX 65518
+
+
+struct iflib_ctx;
+typedef struct iflib_ctx *if_ctx_t;
+struct if_shared_ctx;
+typedef struct if_shared_ctx *if_shared_ctx_t;
+struct if_int_delay_info;
+typedef struct if_int_delay_info  *if_int_delay_info_t;
+
+/*
+ * File organization:
+ *  - public structures
+ *  - iflib accessors
+ *  - iflib utility functions
+ *  - iflib core functions
+ */
+
+typedef struct if_rxd_frag {
+	uint8_t irf_flid;
+	uint16_t irf_idx;
+} *if_rxd_frag_t;
+
+typedef struct if_rxd_info {
+	/* set by iflib */
+	uint16_t iri_qsidx;		/* qset index */
+	uint16_t iri_vtag;		/* vlan tag - if flag set */
+	uint16_t iri_len;		/* packet length */
+	uint16_t iri_cidx;		/* consumer index of cq */
+	struct ifnet *iri_ifp;		/* some drivers >1 interface per softc */
+
+	/* updated by driver */
+	uint16_t iri_flags;		/* mbuf flags for packet */
+	uint32_t iri_flowid;		/* RSS hash for packet */
+	uint32_t iri_csum_flags;	/* m_pkthdr csum flags */
+	uint32_t iri_csum_data;		/* m_pkthdr csum data */
+	uint8_t	 iri_nfrags;		/* number of fragments in packet */
+	uint8_t	 iri_rsstype;		/* RSS hash type */
+	uint8_t	 iri_pad;		/* any padding in the received data */
+	if_rxd_frag_t iri_frags;
+} *if_rxd_info_t;
+
+#define IPI_TX_INTR	0x1		/* send an interrupt when this packet is sent */
+#define IPI_TX_IPV4	0x2		/* ethertype IPv4 */
+#define IPI_TX_IPV6	0x4		/* ethertype IPv6 */
+
+typedef struct if_pkt_info {
+	uint32_t			ipi_len;	/* packet length */
+	bus_dma_segment_t		*ipi_segs;	/* physical addresses */
+	uint16_t			ipi_qsidx;	/* queue set index */
+	uint16_t			ipi_nsegs;	/* number of segments */
+	uint16_t			ipi_ndescs;	/* number of descriptors used by encap */
+	uint16_t			ipi_flags;	/* iflib per-packet flags */
+	uint32_t			ipi_pidx;	/* start pidx for encap */
+	uint32_t			ipi_new_pidx;	/* next available pidx post-encap */
+	/* offload handling */
+	uint64_t			ipi_csum_flags;	/* packet checksum flags */
+	uint16_t			ipi_tso_segsz;	/* tso segment size */
+	uint16_t			ipi_mflags;	/* packet mbuf flags */
+	uint16_t			ipi_vtag;	/* VLAN tag */
+	uint16_t			ipi_etype;	/* ether header type */
+	uint8_t				ipi_ehdrlen;	/* ether header length */
+	uint8_t				ipi_ip_hlen;	/* ip header length */
+	uint8_t				ipi_tcp_hlen;	/* tcp header length */
+	uint8_t				ipi_tcp_hflags;	/* tcp header flags */
+	uint8_t				ipi_ipproto;	/* ip protocol */
+	/* implied padding */
+	uint32_t			ipi_tcp_seq;	/* tcp seqno */
+	uint32_t			ipi_tcp_sum;	/* tcp csum */
+} *if_pkt_info_t;
+
+typedef struct if_irq {
+	struct resource  *ii_res;
+	int               ii_rid;
+	void             *ii_tag;
+} *if_irq_t;
+
+struct if_int_delay_info {
+	if_ctx_t iidi_ctx;	/* Back-pointer to the iflib ctx (softc) */
+	int iidi_offset;			/* Register offset to read/write */
+	int iidi_value;			/* Current value in usecs */
+	struct sysctl_oid *iidi_oidp;
+	struct sysctl_req *iidi_req;
+};
+
+typedef enum {
+	IFLIB_INTR_LEGACY,
+	IFLIB_INTR_MSI,
+	IFLIB_INTR_MSIX
+} iflib_intr_mode_t;
+
+/*
+ * This really belongs in pciio.h or some place more general
+ * but this is the only consumer for now.
+ */
+typedef struct pci_vendor_info {
+	uint32_t	pvi_vendor_id;
+	uint32_t	pvi_device_id;
+	uint32_t	pvi_subvendor_id;
+	uint32_t	pvi_subdevice_id;
+	uint32_t	pvi_rev_id;
+	uint32_t	pvi_class_mask;
+	caddr_t		pvi_name;
+} pci_vendor_info_t;
+
+#define PVID(vendor, devid, name) {vendor, devid, 0, 0, 0, 0, name}
+#define PVID_OEM(vendor, devid, svid, sdevid, revid, name) {vendor, devid, svid, sdevid, revid, 0, name}
+#define PVID_END {0, 0, 0, 0, 0, 0, NULL}
+
+typedef struct if_txrx {
+	int (*ift_txd_encap) (void *, if_pkt_info_t);
+	void (*ift_txd_flush) (void *, uint16_t, uint32_t);
+	int (*ift_txd_credits_update) (void *, uint16_t, uint32_t, bool);
+
+	int (*ift_rxd_available) (void *, uint16_t qsidx, uint32_t pidx);
+	int (*ift_rxd_pkt_get) (void *, if_rxd_info_t ri);
+	void (*ift_rxd_refill) (void * , uint16_t qsidx, uint8_t flidx, uint32_t pidx,
+							uint64_t *paddrs, caddr_t *vaddrs, uint16_t count);
+	void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, uint32_t pidx);
+	int (*ift_legacy_intr) (void *);
+} *if_txrx_t;
+
+typedef struct if_softc_ctx {
+	int isc_vectors;
+	int isc_nrxqsets;
+	int isc_ntxqsets;
+	int isc_msix_bar;		/* can be model specific - initialize in attach_pre */
+	int isc_tx_nsegments;		/* can be model specific - initialize in attach_pre */
+	int isc_tx_tso_segments_max;
+	int isc_tx_tso_size_max;
+	int isc_tx_tso_segsize_max;
+	int isc_rss_table_size;
+	int isc_rss_table_mask;
+
+	iflib_intr_mode_t isc_intr;
+	uint16_t isc_max_frame_size; /* set at init time by driver */
+	pci_vendor_info_t isc_vendor_info;	/* set by iflib prior to attach_pre */
+} *if_softc_ctx_t;
+
+/*
+ * Initialization values for device
+ */
+struct if_shared_ctx {
+	int isc_magic;
+	if_txrx_t isc_txrx;
+	driver_t *isc_driver;
+	int isc_ntxd;
+	int isc_nrxd;
+	int isc_nfl;
+	int isc_flags;
+	bus_size_t isc_q_align;
+	bus_size_t isc_tx_maxsize;
+	bus_size_t isc_tx_maxsegsize;
+	bus_size_t isc_rx_maxsize;
+	bus_size_t isc_rx_maxsegsize;
+	int isc_rx_nsegments;
+	int isc_rx_process_limit;
+
+
+	uint32_t isc_txqsizes[8];
+	int isc_ntxqs;			/* # of tx queues per tx qset - usually 1 */
+	uint32_t isc_rxqsizes[8];
+	int isc_nrxqs;			/* # of rx queues per rx qset - intel 1, chelsio 2, broadcom 3 */
+	int isc_admin_intrcnt;		/* # of admin/link interrupts */
+
+	int isc_tx_reclaim_thresh;
+
+	/* fields necessary for probe */
+	pci_vendor_info_t *isc_vendor_info;
+	char *isc_driver_version;
+/* optional function to transform the read values to match the table*/
+	void (*isc_parse_devinfo) (uint16_t *device_id, uint16_t *subvendor_id,
+				   uint16_t *subdevice_id, uint16_t *rev_id);
+};
+
+typedef struct iflib_dma_info {
+	bus_addr_t		idi_paddr;
+	caddr_t			idi_vaddr;
+	bus_dma_tag_t		idi_tag;
+	bus_dmamap_t		idi_map;
+	uint32_t		idi_size;
+} *iflib_dma_info_t;
+
+#define IFLIB_MAGIC 0xCAFEF00D
+
+typedef enum {
+	IFLIB_INTR_TX,
+	IFLIB_INTR_RX,
+	IFLIB_INTR_ADMIN,
+	IFLIB_INTR_IOV,
+} iflib_intr_type_t;
+
+#ifndef ETH_ADDR_LEN
+#define ETH_ADDR_LEN 6
+#endif
+
+
+/*
+ * Interface has a separate command queue
+ */
+#define IFLIB_HAS_CQ		0x1
+/*
+ * Driver has already allocated vectors
+ */
+#define IFLIB_SKIP_MSIX		0x2
+
+/*
+ * Interface is a virtual function
+ */
+#define IFLIB_IS_VF		0x4
+
+
+/*
+ * field accessors
+ */
+void *iflib_get_softc(if_ctx_t ctx);
+
+device_t iflib_get_dev(if_ctx_t ctx);
+
+if_t iflib_get_ifp(if_ctx_t ctx);
+
+struct ifmedia *iflib_get_media(if_ctx_t ctx);
+
+if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx);
+if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx);
+
+void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]);
+
+
+
+
+/*
+ * If the driver can plug cleanly in to newbus use these
+ */
+int iflib_device_probe(device_t);
+int iflib_device_attach(device_t);
+int iflib_device_detach(device_t);
+int iflib_device_suspend(device_t);
+int iflib_device_resume(device_t);
+int iflib_device_shutdown(device_t);
+
+
+int iflib_device_iov_init(device_t, uint16_t, const nvlist_t *);
+void iflib_device_iov_uninit(device_t);
+int iflib_device_iov_add_vf(device_t, uint16_t, const nvlist_t *);
+
+/*
+ * If the driver can't plug cleanly in to newbus
+ * use these
+ */
+int iflib_device_register(device_t dev, void *softc, if_shared_ctx_t sctx, if_ctx_t *ctxp);
+int iflib_device_deregister(if_ctx_t);
+
+
+
+int iflib_irq_alloc(if_ctx_t, if_irq_t, int, driver_filter_t, void *filter_arg, driver_intr_t, void *arg, char *name);
+int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
+							iflib_intr_type_t type, driver_filter_t *filter,
+							void *filter_arg, int qid, char *name);
+void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type,  void *arg, int qid, char *name);
+
+void iflib_irq_free(if_ctx_t ctx, if_irq_t irq);
+
+void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name);
+
+void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask,
+			     task_fn_t *fn, char *name);
+
+
+void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid);
+void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid);
+void iflib_admin_intr_deferred(if_ctx_t ctx);
+void iflib_iov_intr_deferred(if_ctx_t ctx);
+
+
+void iflib_link_state_change(if_ctx_t ctx, int linkstate);
+
+int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags);
+void iflib_dma_free(iflib_dma_info_t dma);
+
+int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count);
+
+void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count);
+
+
+struct mtx *iflib_ctx_lock_get(if_ctx_t);
+struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t);
+
+void iflib_led_create(if_ctx_t ctx);
+
+void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *,
+								if_int_delay_info_t, int, int);
+
+#endif /*  __IFLIB_H_ */
--- a/sys/net/mp_ring.c
+++ b/sys/net/mp_ring.c
@ -0,0 +1,542 @@
+/*-
+ * Copyright (c) 2014 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/malloc.h>
+#include <machine/cpu.h>
+
+
+
+#include <net/mp_ring.h>
+
+#if defined(__i386__)
+#define atomic_cmpset_acq_64 atomic_cmpset_64
+#define atomic_cmpset_rel_64 atomic_cmpset_64
+#endif
+
+union ring_state {
+	struct {
+		uint16_t pidx_head;
+		uint16_t pidx_tail;
+		uint16_t cidx;
+		uint16_t flags;
+	};
+	uint64_t state;
+};
+
+enum {
+	IDLE = 0,	/* consumer ran to completion, nothing more to do. */
+	BUSY,		/* consumer is running already, or will be shortly. */
+	STALLED,	/* consumer stopped due to lack of resources. */
+	ABDICATED,	/* consumer stopped even though there was work to be
+			   done because it wants another thread to take over. */
+};
+
+static inline uint16_t
+space_available(struct ifmp_ring *r, union ring_state s)
+{
+	uint16_t x = r->size - 1;
+
+	if (s.cidx == s.pidx_head)
+		return (x);
+	else if (s.cidx > s.pidx_head)
+		return (s.cidx - s.pidx_head - 1);
+	else
+		return (x - s.pidx_head + s.cidx);
+}
+
+static inline uint16_t
+increment_idx(struct ifmp_ring *r, uint16_t idx, uint16_t n)
+{
+	int x = r->size - idx;
+
+	MPASS(x > 0);
+	return (x > n ? idx + n : n - x);
+}
+
+/* Consumer is about to update the ring's state to s */
+static inline uint16_t
+state_to_flags(union ring_state s, int abdicate)
+{
+
+	if (s.cidx == s.pidx_tail)
+		return (IDLE);
+	else if (abdicate && s.pidx_tail != s.pidx_head)
+		return (ABDICATED);
+
+	return (BUSY);
+}
+
+#ifdef NO_64BIT_ATOMICS
+static void
+drain_ring_locked(struct ifmp_ring *r, union ring_state os, uint16_t prev, int budget)
+{
+	union ring_state ns;
+	int n, pending, total;
+	uint16_t cidx = os.cidx;
+	uint16_t pidx = os.pidx_tail;
+
+	MPASS(os.flags == BUSY);
+	MPASS(cidx != pidx);
+
+	if (prev == IDLE)
+		counter_u64_add(r->starts, 1);
+	pending = 0;
+	total = 0;
+
+	while (cidx != pidx) {
+
+		/* Items from cidx to pidx are available for consumption. */
+		n = r->drain(r, cidx, pidx);
+		if (n == 0) {
+			os.state = ns.state = r->state;
+			ns.cidx = cidx;
+			ns.flags = STALLED;
+			r->state = ns.state;
+			if (prev != STALLED)
+				counter_u64_add(r->stalls, 1);
+			else if (total > 0) {
+				counter_u64_add(r->restarts, 1);
+				counter_u64_add(r->stalls, 1);
+			}
+			break;
+		}
+		cidx = increment_idx(r, cidx, n);
+		pending += n;
+		total += n;
+
+		/*
+		 * We update the cidx only if we've caught up with the pidx, the
+		 * real cidx is getting too far ahead of the one visible to
+		 * everyone else, or we have exceeded our budget.
+		 */
+		if (cidx != pidx && pending < 64 && total < budget)
+			continue;
+
+		os.state = ns.state = r->state;
+		ns.cidx = cidx;
+		ns.flags = state_to_flags(ns, total >= budget);
+		r->state = ns.state;
+
+		if (ns.flags == ABDICATED)
+			counter_u64_add(r->abdications, 1);
+		if (ns.flags != BUSY) {
+			/* Wrong loop exit if we're going to stall. */
+			MPASS(ns.flags != STALLED);
+			if (prev == STALLED) {
+				MPASS(total > 0);
+				counter_u64_add(r->restarts, 1);
+			}
+			break;
+		}
+
+		/*
+		 * The acquire style atomic above guarantees visibility of items
+		 * associated with any pidx change that we notice here.
+		 */
+		pidx = ns.pidx_tail;
+		pending = 0;
+	}
+}
+#else
+/*
+ * Caller passes in a state, with a guarantee that there is work to do and that
+ * all items up to the pidx_tail in the state are visible.
+ */
+static void
+drain_ring_lockless(struct ifmp_ring *r, union ring_state os, uint16_t prev, int budget)
+{
+	union ring_state ns;
+	int n, pending, total;
+	uint16_t cidx = os.cidx;
+	uint16_t pidx = os.pidx_tail;
+
+	MPASS(os.flags == BUSY);
+	MPASS(cidx != pidx);
+
+	if (prev == IDLE)
+		counter_u64_add(r->starts, 1);
+	pending = 0;
+	total = 0;
+
+	while (cidx != pidx) {
+
+		/* Items from cidx to pidx are available for consumption. */
+		n = r->drain(r, cidx, pidx);
+		if (n == 0) {
+			critical_enter();
+			do {
+				os.state = ns.state = r->state;
+				ns.cidx = cidx;
+				ns.flags = STALLED;
+			} while (atomic_cmpset_64(&r->state, os.state,
+			    ns.state) == 0);
+			critical_exit();
+			if (prev != STALLED)
+				counter_u64_add(r->stalls, 1);
+			else if (total > 0) {
+				counter_u64_add(r->restarts, 1);
+				counter_u64_add(r->stalls, 1);
+			}
+			break;
+		}
+		cidx = increment_idx(r, cidx, n);
+		pending += n;
+		total += n;
+
+		/*
+		 * We update the cidx only if we've caught up with the pidx, the
+		 * real cidx is getting too far ahead of the one visible to
+		 * everyone else, or we have exceeded our budget.
+		 */
+		if (cidx != pidx && pending < 64 && total < budget)
+			continue;
+		critical_enter();
+		do {
+			os.state = ns.state = r->state;
+			ns.cidx = cidx;
+			ns.flags = state_to_flags(ns, total >= budget);
+		} while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0);
+		critical_exit();
+
+		if (ns.flags == ABDICATED)
+			counter_u64_add(r->abdications, 1);
+		if (ns.flags != BUSY) {
+			/* Wrong loop exit if we're going to stall. */
+			MPASS(ns.flags != STALLED);
+			if (prev == STALLED) {
+				MPASS(total > 0);
+				counter_u64_add(r->restarts, 1);
+			}
+			break;
+		}
+
+		/*
+		 * The acquire style atomic above guarantees visibility of items
+		 * associated with any pidx change that we notice here.
+		 */
+		pidx = ns.pidx_tail;
+		pending = 0;
+	}
+}
+#endif
+
+int
+ifmp_ring_alloc(struct ifmp_ring **pr, int size, void *cookie, mp_ring_drain_t drain,
+    mp_ring_can_drain_t can_drain, struct malloc_type *mt, int flags)
+{
+	struct ifmp_ring *r;
+
+	/* All idx are 16b so size can be 65536 at most */
+	if (pr == NULL || size < 2 || size > 65536 || drain == NULL ||
+	    can_drain == NULL)
+		return (EINVAL);
+	*pr = NULL;
+	flags &= M_NOWAIT | M_WAITOK;
+	MPASS(flags != 0);
+
+	r = malloc(__offsetof(struct ifmp_ring, items[size]), mt, flags | M_ZERO);
+	if (r == NULL)
+		return (ENOMEM);
+	r->size = size;
+	r->cookie = cookie;
+	r->mt = mt;
+	r->drain = drain;
+	r->can_drain = can_drain;
+	r->enqueues = counter_u64_alloc(flags);
+	r->drops = counter_u64_alloc(flags);
+	r->starts = counter_u64_alloc(flags);
+	r->stalls = counter_u64_alloc(flags);
+	r->restarts = counter_u64_alloc(flags);
+	r->abdications = counter_u64_alloc(flags);
+	if (r->enqueues == NULL || r->drops == NULL || r->starts == NULL ||
+	    r->stalls == NULL || r->restarts == NULL ||
+	    r->abdications == NULL) {
+		ifmp_ring_free(r);
+		return (ENOMEM);
+	}
+
+	*pr = r;
+#ifdef NO_64BIT_ATOMICS
+	mtx_init(&r->lock, "mp_ring lock", NULL, MTX_DEF);
+#endif
+	return (0);
+}
+
+void
+ifmp_ring_free(struct ifmp_ring *r)
+{
+
+	if (r == NULL)
+		return;
+
+	if (r->enqueues != NULL)
+		counter_u64_free(r->enqueues);
+	if (r->drops != NULL)
+		counter_u64_free(r->drops);
+	if (r->starts != NULL)
+		counter_u64_free(r->starts);
+	if (r->stalls != NULL)
+		counter_u64_free(r->stalls);
+	if (r->restarts != NULL)
+		counter_u64_free(r->restarts);
+	if (r->abdications != NULL)
+		counter_u64_free(r->abdications);
+
+	free(r, r->mt);
+}
+
+/*
+ * Enqueue n items and maybe drain the ring for some time.
+ *
+ * Returns an errno.
+ */
+#ifdef NO_64BIT_ATOMICS
+int
+ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget)
+{
+	union ring_state os, ns;
+	uint16_t pidx_start, pidx_stop;
+	int i;
+
+	MPASS(items != NULL);
+	MPASS(n > 0);
+
+	mtx_lock(&r->lock);
+	/*
+	 * Reserve room for the new items.  Our reservation, if successful, is
+	 * from 'pidx_start' to 'pidx_stop'.
+	 */
+	os.state = r->state;
+	if (n >= space_available(r, os)) {
+		counter_u64_add(r->drops, n);
+		MPASS(os.flags != IDLE);
+		if (os.flags == STALLED)
+			ifmp_ring_check_drainage(r, 0);
+		return (ENOBUFS);
+	}
+	ns.state = os.state;
+	ns.pidx_head = increment_idx(r, os.pidx_head, n);
+	r->state = ns.state;
+	pidx_start = os.pidx_head;
+	pidx_stop = ns.pidx_head;
+
+	/*
+	 * Wait for other producers who got in ahead of us to enqueue their
+	 * items, one producer at a time.  It is our turn when the ring's
+	 * pidx_tail reaches the begining of our reservation (pidx_start).
+	 */
+	while (ns.pidx_tail != pidx_start) {
+		cpu_spinwait();
+		ns.state = r->state;
+	}
+
+	/* Now it is our turn to fill up the area we reserved earlier. */
+	i = pidx_start;
+	do {
+		r->items[i] = *items++;
+		if (__predict_false(++i == r->size))
+			i = 0;
+	} while (i != pidx_stop);
+
+	/*
+	 * Update the ring's pidx_tail.  The release style atomic guarantees
+	 * that the items are visible to any thread that sees the updated pidx.
+	 */
+	os.state = ns.state = r->state;
+	ns.pidx_tail = pidx_stop;
+	ns.flags = BUSY;
+	r->state = ns.state;
+	counter_u64_add(r->enqueues, n);
+
+	/*
+	 * Turn into a consumer if some other thread isn't active as a consumer
+	 * already.
+	 */
+	if (os.flags != BUSY)
+		drain_ring_locked(r, ns, os.flags, budget);
+
+	mtx_unlock(&r->lock);
+	return (0);
+}
+
+#else
+int
+ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget)
+{
+	union ring_state os, ns;
+	uint16_t pidx_start, pidx_stop;
+	int i;
+
+	MPASS(items != NULL);
+	MPASS(n > 0);
+
+	/*
+	 * Reserve room for the new items.  Our reservation, if successful, is
+	 * from 'pidx_start' to 'pidx_stop'.
+	 */
+	for (;;) {
+		os.state = r->state;
+		if (n >= space_available(r, os)) {
+			counter_u64_add(r->drops, n);
+			MPASS(os.flags != IDLE);
+			if (os.flags == STALLED)
+				ifmp_ring_check_drainage(r, 0);
+			return (ENOBUFS);
+		}
+		ns.state = os.state;
+		ns.pidx_head = increment_idx(r, os.pidx_head, n);
+		critical_enter();
+		if (atomic_cmpset_64(&r->state, os.state, ns.state))
+			break;
+		critical_exit();
+		cpu_spinwait();
+	}
+	pidx_start = os.pidx_head;
+	pidx_stop = ns.pidx_head;
+
+	/*
+	 * Wait for other producers who got in ahead of us to enqueue their
+	 * items, one producer at a time.  It is our turn when the ring's
+	 * pidx_tail reaches the begining of our reservation (pidx_start).
+	 */
+	while (ns.pidx_tail != pidx_start) {
+		cpu_spinwait();
+		ns.state = r->state;
+	}
+
+	/* Now it is our turn to fill up the area we reserved earlier. */
+	i = pidx_start;
+	do {
+		r->items[i] = *items++;
+		if (__predict_false(++i == r->size))
+			i = 0;
+	} while (i != pidx_stop);
+
+	/*
+	 * Update the ring's pidx_tail.  The release style atomic guarantees
+	 * that the items are visible to any thread that sees the updated pidx.
+	 */
+	do {
+		os.state = ns.state = r->state;
+		ns.pidx_tail = pidx_stop;
+		ns.flags = BUSY;
+	} while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0);
+	critical_exit();
+	counter_u64_add(r->enqueues, n);
+
+	/*
+	 * Turn into a consumer if some other thread isn't active as a consumer
+	 * already.
+	 */
+	if (os.flags != BUSY)
+		drain_ring_lockless(r, ns, os.flags, budget);
+
+	return (0);
+}
+#endif
+
+void
+ifmp_ring_check_drainage(struct ifmp_ring *r, int budget)
+{
+	union ring_state os, ns;
+
+	os.state = r->state;
+	if (os.flags != STALLED || os.pidx_head != os.pidx_tail || r->can_drain(r) == 0)
+		return;
+
+	MPASS(os.cidx != os.pidx_tail);	/* implied by STALLED */
+	ns.state = os.state;
+	ns.flags = BUSY;
+
+
+#ifdef NO_64BIT_ATOMICS
+	mtx_lock(&r->lock);
+	if (r->state != os.state) {
+		mtx_unlock(&r->lock);
+		return;
+	}
+	r->state = ns.state;
+	drain_ring_locked(r, ns, os.flags, budget);
+	mtx_unlock(&r->lock);
+#else
+	/*
+	 * The acquire style atomic guarantees visibility of items associated
+	 * with the pidx that we read here.
+	 */
+	if (!atomic_cmpset_acq_64(&r->state, os.state, ns.state))
+		return;
+
+
+	drain_ring_lockless(r, ns, os.flags, budget);
+#endif
+}
+
+void
+ifmp_ring_reset_stats(struct ifmp_ring *r)
+{
+
+	counter_u64_zero(r->enqueues);
+	counter_u64_zero(r->drops);
+	counter_u64_zero(r->starts);
+	counter_u64_zero(r->stalls);
+	counter_u64_zero(r->restarts);
+	counter_u64_zero(r->abdications);
+}
+
+int
+ifmp_ring_is_idle(struct ifmp_ring *r)
+{
+	union ring_state s;
+
+	s.state = r->state;
+	if (s.pidx_head == s.pidx_tail && s.pidx_tail == s.cidx &&
+	    s.flags == IDLE)
+		return (1);
+
+	return (0);
+}
+
+int
+ifmp_ring_is_stalled(struct ifmp_ring *r)
+{
+	union ring_state s;
+
+	s.state = r->state;
+	if (s.pidx_head == s.pidx_tail && s.flags == STALLED)
+		return (1);
+
+	return (0);
+}
--- a/sys/net/mp_ring.h
+++ b/sys/net/mp_ring.h
@ -0,0 +1,71 @@
+/*-
+ * Copyright (c) 2014 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef __NET_MP_RING_H
+#define __NET_MP_RING_H
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+struct ifmp_ring;
+typedef u_int (*mp_ring_drain_t)(struct ifmp_ring *, u_int, u_int);
+typedef u_int (*mp_ring_can_drain_t)(struct ifmp_ring *);
+typedef void (*mp_ring_serial_t)(struct ifmp_ring *);
+
+struct ifmp_ring {
+	volatile uint64_t	state __aligned(CACHE_LINE_SIZE);
+
+	int			size __aligned(CACHE_LINE_SIZE);
+	void *			cookie;
+	struct malloc_type *	mt;
+	mp_ring_drain_t		drain;
+	mp_ring_can_drain_t	can_drain;	/* cheap, may be unreliable */
+	counter_u64_t		enqueues;
+	counter_u64_t		drops;
+	counter_u64_t		starts;
+	counter_u64_t		stalls;
+	counter_u64_t		restarts;	/* recovered after stalling */
+	counter_u64_t		abdications;
+#ifdef NO_64BIT_ATOMICS
+	struct mtx		lock;
+#endif
+	void * volatile		items[] __aligned(CACHE_LINE_SIZE);
+};
+
+int ifmp_ring_alloc(struct ifmp_ring **, int, void *, mp_ring_drain_t,
+    mp_ring_can_drain_t, struct malloc_type *, int);
+void ifmp_ring_free(struct ifmp_ring *);
+int ifmp_ring_enqueue(struct ifmp_ring *, void **, int, int);
+void ifmp_ring_check_drainage(struct ifmp_ring *, int);
+void ifmp_ring_reset_stats(struct ifmp_ring *);
+int ifmp_ring_is_idle(struct ifmp_ring *);
+int ifmp_ring_is_stalled(struct ifmp_ring *r);
+#endif
--- a/sys/sys/_task.h
+++ b/sys/sys/_task.h
@ -45,10 +45,21 @@ typedef void task_fn_t(void *context, int pending);

 struct task {
 	STAILQ_ENTRY(task) ta_link;	/* (q) link for queue */
-	u_short	ta_pending;		/* (q) count times queued */
+	uint8_t	ta_pending;		/* (q) count times queued */
+	uint8_t	ta_flags;		/* (q) flags */
 	u_short	ta_priority;		/* (c) Priority */
 	task_fn_t *ta_func;		/* (c) task handler */
 	void	*ta_context;		/* (c) argument for handler */
 };

+struct grouptask {
+	struct	task		gt_task;
+	void			*gt_taskqueue;
+	LIST_ENTRY(grouptask)	gt_list;
+	void			*gt_uniq;
+	char			*gt_name;
+	int16_t			gt_irq;
+	int16_t			gt_cpu;
+};
+
 #endif /* !_SYS__TASK_H_ */
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@ -279,6 +279,8 @@ struct mbuf {
 #define	M_PROTO11	0x00400000 /* protocol-specific */
 #define	M_PROTO12	0x00800000 /* protocol-specific */

+#define MB_DTOR_SKIP	0x1	/* don't pollute the cache by touching a freed mbuf */
+
 /*
 * Flags to purge when crossing layers.
 */
@ -401,6 +403,7 @@ struct mbuf {
 */
 #define	EXT_FLAG_EMBREF		0x000001	/* embedded ext_count */
 #define	EXT_FLAG_EXTREF		0x000002	/* external ext_cnt, notyet */
+
 #define	EXT_FLAG_NOFREE		0x000010	/* don't free mbuf to pool, notyet */

 #define	EXT_FLAG_VENDOR1	0x010000	/* for vendor-internal use */
--- a/sys/sys/taskqueue.h
+++ b/sys/sys/taskqueue.h
@ -39,6 +39,7 @@
 #include <sys/_cpuset.h>

 struct taskqueue;
+struct taskqgroup;
 struct thread;

 struct timeout_task {
@ -143,7 +144,7 @@ taskqueue_define_##name(void *arg)					\
 	init;								\
 }									\
 									\
-SYSINIT(taskqueue_##name, SI_SUB_CONFIGURE, SI_ORDER_SECOND,		\
+SYSINIT(taskqueue_##name, SI_SUB_INIT_IF, SI_ORDER_SECOND,		\
 	taskqueue_define_##name, NULL);					\
 									\
 struct __hack
@ -168,7 +169,7 @@ taskqueue_define_##name(void *arg)					\
 	init;								\
 }									\
 									\
-SYSINIT(taskqueue_##name, SI_SUB_CONFIGURE, SI_ORDER_SECOND,		\
+SYSINIT(taskqueue_##name, SI_SUB_INIT_IF, SI_ORDER_SECOND,		\
 	taskqueue_define_##name, NULL);					\
 									\
 struct __hack
@ -202,4 +203,63 @@ struct taskqueue *taskqueue_create_fast(const char *name, int mflags,
 				    taskqueue_enqueue_fn enqueue,
 				    void *context);

+/*
+ * Taskqueue groups.  Manages dynamic thread groups and irq binding for
+ * device and other tasks.
+ */
+int grouptaskqueue_enqueue(struct taskqueue *queue, struct task *task);
+void	taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
+	    void *uniq, int irq, char *name);
+int		taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
+		void *uniq, int cpu, int irq, char *name);
+void	taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
+struct taskqgroup *taskqgroup_create(char *name);
+void	taskqgroup_destroy(struct taskqgroup *qgroup);
+int	taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
+
+#define TASK_SKIP_WAKEUP		0x1
+
+#define GTASK_INIT(task, priority, func, context) do {	\
+	(task)->ta_pending = 0;				\
+	(task)->ta_flags = TASK_SKIP_WAKEUP;		\
+	(task)->ta_priority = (priority);		\
+	(task)->ta_func = (func);			\
+	(task)->ta_context = (context);			\
+} while (0)
+
+#define	GROUPTASK_INIT(gtask, priority, func, context)	\
+	GTASK_INIT(&(gtask)->gt_task, priority, func, context)
+
+#define	GROUPTASK_ENQUEUE(gtask)			\
+	grouptaskqueue_enqueue((gtask)->gt_taskqueue, &(gtask)->gt_task)
+
+#define TASKQGROUP_DECLARE(name)			\
+extern struct taskqgroup *qgroup_##name
+
+#define TASKQGROUP_DEFINE(name, cnt, stride)				\
+									\
+struct taskqgroup *qgroup_##name;					\
+									\
+static void								\
+taskqgroup_define_##name(void *arg)					\
+{									\
+	qgroup_##name = taskqgroup_create(#name);			\
+}									\
+									\
+SYSINIT(taskqgroup_##name, SI_SUB_INIT_IF, SI_ORDER_FIRST,		\
+	taskqgroup_define_##name, NULL);				\
+									\
+static void								\
+taskqgroup_adjust_##name(void *arg)					\
+{									\
+	taskqgroup_adjust(qgroup_##name, (cnt), (stride));		\
+}									\
+									\
+SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY,		\
+	taskqgroup_adjust_##name, NULL);				\
+									\
+struct __hack
+
+TASKQGROUP_DECLARE(net);
+
 #endif /* !_SYS_TASKQUEUE_H_ */