diff --git a/sys/dev/bnxt/if_bnxt.c b/sys/dev/bnxt/if_bnxt.c
index aaf18b04a120..94c65007ab0f 100644
--- a/sys/dev/bnxt/if_bnxt.c
+++ b/sys/dev/bnxt/if_bnxt.c
@@ -1640,8 +1640,7 @@ bnxt_msix_intr_assign(if_ctx_t ctx, int msix)
 	}
 
 	for (i=0; i<softc->scctx->isc_ntxqsets; i++)
-		/* TODO: Benchmark and see if tying to the RX irqs helps */
-		iflib_softirq_alloc_generic(ctx, -1, IFLIB_INTR_TX, NULL, i,
+		iflib_softirq_alloc_generic(ctx, i + 1, IFLIB_INTR_TX, NULL, i,
 		    "tx_cp");
 
 	return rc;
diff --git a/sys/dev/e1000/e1000_80003es2lan.c b/sys/dev/e1000/e1000_80003es2lan.c
index 7377d8e9d867..e7c42d5386eb 100644
--- a/sys/dev/e1000/e1000_80003es2lan.c
+++ b/sys/dev/e1000/e1000_80003es2lan.c
@@ -59,6 +59,7 @@ static s32  e1000_reset_hw_80003es2lan(struct e1000_hw *hw);
 static s32  e1000_init_hw_80003es2lan(struct e1000_hw *hw);
 static s32  e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw);
 static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw);
+static s32  e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
 static s32  e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex);
 static s32  e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw);
 static s32  e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw);
@@ -67,6 +68,7 @@ static s32  e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
 static s32  e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
 					     u16 data);
 static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw);
+static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
 static s32  e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw);
 static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw);
 
@@ -297,7 +299,7 @@ static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw)
 	DEBUGFUNC("e1000_acquire_phy_80003es2lan");
 
 	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
-	return e1000_acquire_swfw_sync(hw, mask);
+	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
 }
 
 /**
@@ -313,7 +315,7 @@ static void e1000_release_phy_80003es2lan(struct e1000_hw *hw)
 	DEBUGFUNC("e1000_release_phy_80003es2lan");
 
 	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
-	e1000_release_swfw_sync(hw, mask);
+	e1000_release_swfw_sync_80003es2lan(hw, mask);
 }
 
 /**
@@ -331,7 +333,7 @@ static s32 e1000_acquire_mac_csr_80003es2lan(struct e1000_hw *hw)
 
 	mask = E1000_SWFW_CSR_SM;
 
-	return e1000_acquire_swfw_sync(hw, mask);
+	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
 }
 
 /**
@@ -348,7 +350,7 @@ static void e1000_release_mac_csr_80003es2lan(struct e1000_hw *hw)
 
 	mask = E1000_SWFW_CSR_SM;
 
-	e1000_release_swfw_sync(hw, mask);
+	e1000_release_swfw_sync_80003es2lan(hw, mask);
 }
 
 /**
@@ -363,14 +365,14 @@ static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw)
 
 	DEBUGFUNC("e1000_acquire_nvm_80003es2lan");
 
-	ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM);
+	ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
 	if (ret_val)
 		return ret_val;
 
 	ret_val = e1000_acquire_nvm_generic(hw);
 
 	if (ret_val)
-		e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
+		e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
 
 	return ret_val;
 }
@@ -386,7 +388,78 @@ static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw)
 	DEBUGFUNC("e1000_release_nvm_80003es2lan");
 
 	e1000_release_nvm_generic(hw);
-	e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
+	e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 i = 0;
+	s32 timeout = 50;
+
+	DEBUGFUNC("e1000_acquire_swfw_sync_80003es2lan");
+
+	while (i < timeout) {
+		if (e1000_get_hw_semaphore_generic(hw))
+			return -E1000_ERR_SWFW_SYNC;
+
+		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/* Firmware currently using resource (fwmask)
+		 * or other software thread using resource (swmask)
+		 */
+		e1000_put_hw_semaphore_generic(hw);
+		msec_delay_irq(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		return -E1000_ERR_SWFW_SYNC;
+	}
+
+	swfw_sync |= swmask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	DEBUGFUNC("e1000_release_swfw_sync_80003es2lan");
+
+	while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS)
+		; /* Empty */
+
+	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
 }
 
 /**
diff --git a/sys/dev/e1000/e1000_82571.c b/sys/dev/e1000/e1000_82571.c
index 38f7e0f90955..5ff17f098a21 100644
--- a/sys/dev/e1000/e1000_82571.c
+++ b/sys/dev/e1000/e1000_82571.c
@@ -70,8 +70,11 @@ static s32  e1000_check_for_serdes_link_82571(struct e1000_hw *hw);
 static s32  e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw);
 static s32  e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data);
 static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw);
+static s32  e1000_get_hw_semaphore_82571(struct e1000_hw *hw);
 static s32  e1000_fix_nvm_checksum_82571(struct e1000_hw *hw);
 static s32  e1000_get_phy_id_82571(struct e1000_hw *hw);
+static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw);
+static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw);
 static s32  e1000_get_hw_semaphore_82574(struct e1000_hw *hw);
 static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw);
 static s32  e1000_set_d0_lplu_state_82574(struct e1000_hw *hw,
@@ -122,8 +125,8 @@ static s32 e1000_init_phy_params_82571(struct e1000_hw *hw)
 		phy->ops.get_cable_length = e1000_get_cable_length_igp_2;
 		phy->ops.read_reg	= e1000_read_phy_reg_igp;
 		phy->ops.write_reg	= e1000_write_phy_reg_igp;
-		phy->ops.acquire	= e1000_get_hw_semaphore;
-		phy->ops.release	= e1000_put_hw_semaphore;
+		phy->ops.acquire	= e1000_get_hw_semaphore_82571;
+		phy->ops.release	= e1000_put_hw_semaphore_82571;
 		break;
 	case e1000_82573:
 		phy->type		= e1000_phy_m88;
@@ -135,11 +138,12 @@ static s32 e1000_init_phy_params_82571(struct e1000_hw *hw)
 		phy->ops.get_cable_length = e1000_get_cable_length_m88;
 		phy->ops.read_reg	= e1000_read_phy_reg_m88;
 		phy->ops.write_reg	= e1000_write_phy_reg_m88;
-		phy->ops.acquire	= e1000_get_hw_semaphore;
-		phy->ops.release	= e1000_put_hw_semaphore;
+		phy->ops.acquire	= e1000_get_hw_semaphore_82571;
+		phy->ops.release	= e1000_put_hw_semaphore_82571;
 		break;
 	case e1000_82574:
 	case e1000_82583:
+		E1000_MUTEX_INIT(&hw->dev_spec._82571.swflag_mutex);
 
 		phy->type		= e1000_phy_bm;
 		phy->ops.get_cfg_done	= e1000_get_cfg_done_generic;
@@ -502,21 +506,99 @@ static s32 e1000_get_phy_id_82571(struct e1000_hw *hw)
 }
 
 /**
- *  e1000_get_hw_semaphore_82574 - Acquire hardware semaphore
+ *  e1000_get_hw_semaphore_82571 - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 sw_timeout = hw->nvm.word_size + 1;
+	s32 fw_timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	DEBUGFUNC("e1000_get_hw_semaphore_82571");
+
+	/* If we have timedout 3 times on trying to acquire
+	 * the inter-port SMBI semaphore, there is old code
+	 * operating on the other port, and it is not
+	 * releasing SMBI. Modify the number of times that
+	 * we try for the semaphore to interwork with this
+	 * older code.
+	 */
+	if (hw->dev_spec._82571.smb_counter > 2)
+		sw_timeout = 1;
+
+	/* Get the SW semaphore */
+	while (i < sw_timeout) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		usec_delay(50);
+		i++;
+	}
+
+	if (i == sw_timeout) {
+		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
+		hw->dev_spec._82571.smb_counter++;
+	}
+	/* Get the FW semaphore. */
+	for (i = 0; i < fw_timeout; i++) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		usec_delay(50);
+	}
+
+	if (i == fw_timeout) {
+		/* Release semaphores */
+		e1000_put_hw_semaphore_82571(hw);
+		DEBUGOUT("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_put_hw_semaphore_82571 - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ **/
+static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw)
+{
+	u32 swsm;
+
+	DEBUGFUNC("e1000_put_hw_semaphore_generic");
+
+	swsm = E1000_READ_REG(hw, E1000_SWSM);
+
+	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+
+	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
+}
+
+/**
+ *  e1000_get_hw_semaphore_82573 - Acquire hardware semaphore
  *  @hw: pointer to the HW structure
  *
  *  Acquire the HW semaphore during reset.
  *
  **/
-static s32
-e1000_get_hw_semaphore_82574(struct e1000_hw *hw)
+static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw)
 {
 	u32 extcnf_ctrl;
 	s32 i = 0;
-	/* XXX assert that mutex is held */
+
 	DEBUGFUNC("e1000_get_hw_semaphore_82573");
 
-	ASSERT_CTX_LOCK_HELD(hw);
 	extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
 	do {
 		extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
@@ -532,7 +614,7 @@ e1000_get_hw_semaphore_82574(struct e1000_hw *hw)
 
 	if (i == MDIO_OWNERSHIP_TIMEOUT) {
 		/* Release semaphores */
-		e1000_put_hw_semaphore_82574(hw);
+		e1000_put_hw_semaphore_82573(hw);
 		DEBUGOUT("Driver can't access the PHY\n");
 		return -E1000_ERR_PHY;
 	}
@@ -541,24 +623,58 @@ e1000_get_hw_semaphore_82574(struct e1000_hw *hw)
 }
 
 /**
- *  e1000_put_hw_semaphore_82574 - Release hardware semaphore
+ *  e1000_put_hw_semaphore_82573 - Release hardware semaphore
  *  @hw: pointer to the HW structure
  *
  *  Release hardware semaphore used during reset.
  *
  **/
-static void
-e1000_put_hw_semaphore_82574(struct e1000_hw *hw)
+static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw)
 {
 	u32 extcnf_ctrl;
 
-	DEBUGFUNC("e1000_put_hw_semaphore_82574");
+	DEBUGFUNC("e1000_put_hw_semaphore_82573");
 
 	extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
 	extcnf_ctrl &= ~E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
 	E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl);
 }
 
+/**
+ *  e1000_get_hw_semaphore_82574 - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM.
+ *
+ **/
+static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	DEBUGFUNC("e1000_get_hw_semaphore_82574");
+
+	E1000_MUTEX_LOCK(&hw->dev_spec._82571.swflag_mutex);
+	ret_val = e1000_get_hw_semaphore_82573(hw);
+	if (ret_val)
+		E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex);
+	return ret_val;
+}
+
+/**
+ *  e1000_put_hw_semaphore_82574 - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ *
+ **/
+static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw)
+{
+	DEBUGFUNC("e1000_put_hw_semaphore_82574");
+
+	e1000_put_hw_semaphore_82573(hw);
+	E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex);
+}
+
 /**
  *  e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state
  *  @hw: pointer to the HW structure
@@ -630,7 +746,7 @@ static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw)
 
 	DEBUGFUNC("e1000_acquire_nvm_82571");
 
-	ret_val = e1000_get_hw_semaphore(hw);
+	ret_val = e1000_get_hw_semaphore_82571(hw);
 	if (ret_val)
 		return ret_val;
 
@@ -643,7 +759,7 @@ static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw)
 	}
 
 	if (ret_val)
-		e1000_put_hw_semaphore(hw);
+		e1000_put_hw_semaphore_82571(hw);
 
 	return ret_val;
 }
@@ -659,7 +775,7 @@ static void e1000_release_nvm_82571(struct e1000_hw *hw)
 	DEBUGFUNC("e1000_release_nvm_82571");
 
 	e1000_release_nvm_generic(hw);
-	e1000_put_hw_semaphore(hw);
+	e1000_put_hw_semaphore_82571(hw);
 }
 
 /**
@@ -976,6 +1092,8 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
 	 */
 	switch (hw->mac.type) {
 	case e1000_82573:
+		ret_val = e1000_get_hw_semaphore_82573(hw);
+		break;
 	case e1000_82574:
 	case e1000_82583:
 		ret_val = e1000_get_hw_semaphore_82574(hw);
@@ -992,6 +1110,10 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
 	/* Must release MDIO ownership and mutex after MAC reset. */
 	switch (hw->mac.type) {
 	case e1000_82573:
+		/* Release mutex only if the hw semaphore is acquired */
+		if (!ret_val)
+			e1000_put_hw_semaphore_82573(hw);
+		break;
 	case e1000_82574:
 	case e1000_82583:
 		/* Release mutex only if the hw semaphore is acquired */
@@ -999,7 +1121,6 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
 			e1000_put_hw_semaphore_82574(hw);
 		break;
 	default:
-		panic("unknown mac type %x\n", hw->mac.type);
 		break;
 	}
 
diff --git a/sys/dev/e1000/e1000_82575.c b/sys/dev/e1000/e1000_82575.c
index 064731a7d551..5d68e8b9718c 100644
--- a/sys/dev/e1000/e1000_82575.c
+++ b/sys/dev/e1000/e1000_82575.c
@@ -79,9 +79,11 @@ static s32  e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data);
 static s32  e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw,
 					    u32 offset, u16 data);
 static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw);
+static s32  e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
 static s32  e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw,
 						 u16 *speed, u16 *duplex);
 static s32  e1000_get_phy_id_82575(struct e1000_hw *hw);
+static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask);
 static bool e1000_sgmii_active_82575(struct e1000_hw *hw);
 static s32  e1000_reset_init_script_82575(struct e1000_hw *hw);
 static s32  e1000_read_mac_addr_82575(struct e1000_hw *hw);
@@ -509,8 +511,12 @@ static s32 e1000_init_mac_params_82575(struct e1000_hw *hw)
 	/* link info */
 	mac->ops.get_link_up_info = e1000_get_link_up_info_82575;
 	/* acquire SW_FW sync */
-	mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync;
-	mac->ops.release_swfw_sync = e1000_release_swfw_sync;
+	mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575;
+	mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575;
+	if (mac->type >= e1000_i210) {
+		mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210;
+		mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210;
+	}
 
 	/* set lan id for port to determine which phy lock to use */
 	hw->mac.ops.set_lan_id(hw);
@@ -982,7 +988,7 @@ static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw)
 
 	DEBUGFUNC("e1000_acquire_nvm_82575");
 
-	ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM);
+	ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
 	if (ret_val)
 		goto out;
 
@@ -1013,7 +1019,7 @@ static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw)
 
 	ret_val = e1000_acquire_nvm_generic(hw);
 	if (ret_val)
-		e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
+		e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
 
 out:
 	return ret_val;
@@ -1032,7 +1038,83 @@ static void e1000_release_nvm_82575(struct e1000_hw *hw)
 
 	e1000_release_nvm_generic(hw);
 
-	e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
+	e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 ret_val = E1000_SUCCESS;
+	s32 i = 0, timeout = 200;
+
+	DEBUGFUNC("e1000_acquire_swfw_sync_82575");
+
+	while (i < timeout) {
+		if (e1000_get_hw_semaphore_generic(hw)) {
+			ret_val = -E1000_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/*
+		 * Firmware currently using resource (fwmask)
+		 * or other software thread using resource (swmask)
+		 */
+		e1000_put_hw_semaphore_generic(hw);
+		msec_delay_irq(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -E1000_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_release_swfw_sync_82575 - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	DEBUGFUNC("e1000_release_swfw_sync_82575");
+
+	while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS)
+		; /* Empty */
+
+	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
 }
 
 /**
diff --git a/sys/dev/e1000/e1000_hw.h b/sys/dev/e1000/e1000_hw.h
index c90066dc9435..e1464a7b655a 100644
--- a/sys/dev/e1000/e1000_hw.h
+++ b/sys/dev/e1000/e1000_hw.h
@@ -934,6 +934,7 @@ struct e1000_dev_spec_82543 {
 struct e1000_dev_spec_82571 {
 	bool laa_is_present;
 	u32 smb_counter;
+	E1000_MUTEX swflag_mutex;
 };
 
 struct e1000_dev_spec_80003es2lan {
@@ -957,6 +958,8 @@ enum e1000_ulp_state {
 struct e1000_dev_spec_ich8lan {
 	bool kmrn_lock_loss_workaround_enabled;
 	struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS];
+	E1000_MUTEX nvm_mutex;
+	E1000_MUTEX swflag_mutex;
 	bool nvm_k1_enabled;
 	bool disable_k1_off;
 	bool eee_disable;
diff --git a/sys/dev/e1000/e1000_i210.c b/sys/dev/e1000/e1000_i210.c
index f03fbac1f13d..cd8d7c7e1f56 100644
--- a/sys/dev/e1000/e1000_i210.c
+++ b/sys/dev/e1000/e1000_i210.c
@@ -37,6 +37,7 @@
 
 static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw);
 static void e1000_release_nvm_i210(struct e1000_hw *hw);
+static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw);
 static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
 				u16 *data);
 static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw);
@@ -57,7 +58,7 @@ static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw)
 
 	DEBUGFUNC("e1000_acquire_nvm_i210");
 
-	ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM);
+	ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
 
 	return ret_val;
 }
@@ -73,7 +74,152 @@ static void e1000_release_nvm_i210(struct e1000_hw *hw)
 {
 	DEBUGFUNC("e1000_release_nvm_i210");
 
-	e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM);
+	e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 ret_val = E1000_SUCCESS;
+	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
+
+	DEBUGFUNC("e1000_acquire_swfw_sync_i210");
+
+	while (i < timeout) {
+		if (e1000_get_hw_semaphore_i210(hw)) {
+			ret_val = -E1000_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/*
+		 * Firmware currently using resource (fwmask)
+		 * or other software thread using resource (swmask)
+		 */
+		e1000_put_hw_semaphore_generic(hw);
+		msec_delay_irq(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -E1000_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_release_swfw_sync_i210 - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	DEBUGFUNC("e1000_release_swfw_sync_i210");
+
+	while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS)
+		; /* Empty */
+
+	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
+
+	e1000_put_hw_semaphore_generic(hw);
+}
+
+/**
+ *  e1000_get_hw_semaphore_i210 - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	DEBUGFUNC("e1000_get_hw_semaphore_i210");
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		usec_delay(50);
+		i++;
+	}
+
+	if (i == timeout) {
+		/* In rare circumstances, the SW semaphore may already be held
+		 * unintentionally. Clear the semaphore once before giving up.
+		 */
+		if (hw->dev_spec._82575.clear_semaphore_once) {
+			hw->dev_spec._82575.clear_semaphore_once = FALSE;
+			e1000_put_hw_semaphore_generic(hw);
+			for (i = 0; i < timeout; i++) {
+				swsm = E1000_READ_REG(hw, E1000_SWSM);
+				if (!(swsm & E1000_SWSM_SMBI))
+					break;
+
+				usec_delay(50);
+			}
+		}
+
+		/* If we do not have the semaphore here, we have to give up. */
+		if (i == timeout) {
+			DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
+			return -E1000_ERR_NVM;
+		}
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		usec_delay(50);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		e1000_put_hw_semaphore_generic(hw);
+		DEBUGOUT("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return E1000_SUCCESS;
 }
 
 /**
diff --git a/sys/dev/e1000/e1000_i210.h b/sys/dev/e1000/e1000_i210.h
index 960e2c5a730f..f940915b0619 100644
--- a/sys/dev/e1000/e1000_i210.h
+++ b/sys/dev/e1000/e1000_i210.h
@@ -43,6 +43,8 @@ s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset,
 			      u16 words, u16 *data);
 s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset,
 			     u16 words, u16 *data);
+s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
+void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
 s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
 			 u16 *data);
 s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr,
diff --git a/sys/dev/e1000/e1000_ich8lan.c b/sys/dev/e1000/e1000_ich8lan.c
index ebb5aad94494..9be9ac799d94 100644
--- a/sys/dev/e1000/e1000_ich8lan.c
+++ b/sys/dev/e1000/e1000_ich8lan.c
@@ -694,6 +694,9 @@ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw)
 		dev_spec->shadow_ram[i].value    = 0xFFFF;
 	}
 
+	E1000_MUTEX_INIT(&dev_spec->nvm_mutex);
+	E1000_MUTEX_INIT(&dev_spec->swflag_mutex);
+
 	/* Function Pointers */
 	nvm->ops.acquire	= e1000_acquire_nvm_ich8lan;
 	nvm->ops.release	= e1000_release_nvm_ich8lan;
@@ -1844,7 +1847,7 @@ static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw *hw)
 {
 	DEBUGFUNC("e1000_acquire_nvm_ich8lan");
 
-	ASSERT_CTX_LOCK_HELD(hw);
+	E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.nvm_mutex);
 
 	return E1000_SUCCESS;
 }
@@ -1859,7 +1862,9 @@ static void e1000_release_nvm_ich8lan(struct e1000_hw *hw)
 {
 	DEBUGFUNC("e1000_release_nvm_ich8lan");
 
-	ASSERT_CTX_LOCK_HELD(hw);
+	E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.nvm_mutex);
+
+	return;
 }
 
 /**
@@ -1876,7 +1881,7 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
 
 	DEBUGFUNC("e1000_acquire_swflag_ich8lan");
 
-	ASSERT_CTX_LOCK_HELD(hw);
+	E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.swflag_mutex);
 
 	while (timeout) {
 		extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL);
@@ -1917,6 +1922,9 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
 	}
 
 out:
+	if (ret_val)
+		E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex);
+
 	return ret_val;
 }
 
@@ -1941,6 +1949,10 @@ static void e1000_release_swflag_ich8lan(struct e1000_hw *hw)
 	} else {
 		DEBUGOUT("Semaphore unexpectedly released by sw/fw/hw\n");
 	}
+
+	E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex);
+
+	return;
 }
 
 /**
@@ -5010,6 +5022,8 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
 		E1000_WRITE_REG(hw, E1000_FEXTNVM3, reg);
 	}
 
+	if (!ret_val)
+		E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex);
 
 	if (ctrl & E1000_CTRL_PHY_RST) {
 		ret_val = hw->phy.ops.get_cfg_done(hw);
diff --git a/sys/dev/e1000/e1000_mac.c b/sys/dev/e1000/e1000_mac.c
index 2140ba985e2f..1c863073f082 100644
--- a/sys/dev/e1000/e1000_mac.c
+++ b/sys/dev/e1000/e1000_mac.c
@@ -1706,6 +1706,76 @@ s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSED
 	return E1000_SUCCESS;
 }
 
+/**
+ *  e1000_get_hw_semaphore_generic - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	DEBUGFUNC("e1000_get_hw_semaphore_generic");
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		usec_delay(50);
+		i++;
+	}
+
+	if (i == timeout) {
+		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
+		return -E1000_ERR_NVM;
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = E1000_READ_REG(hw, E1000_SWSM);
+		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		usec_delay(50);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		e1000_put_hw_semaphore_generic(hw);
+		DEBUGOUT("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_put_hw_semaphore_generic - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ **/
+void e1000_put_hw_semaphore_generic(struct e1000_hw *hw)
+{
+	u32 swsm;
+
+	DEBUGFUNC("e1000_put_hw_semaphore_generic");
+
+	swsm = E1000_READ_REG(hw, E1000_SWSM);
+
+	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+
+	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
+}
+
 /**
  *  e1000_get_auto_rd_done_generic - Check for auto read completion
  *  @hw: pointer to the HW structure
@@ -2181,186 +2251,3 @@ s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg,
 
 	return E1000_SUCCESS;
 }
-
-/**
- *  e1000_get_hw_semaphore - Acquire hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Acquire the HW semaphore to access the PHY or NVM
- **/
-s32 e1000_get_hw_semaphore(struct e1000_hw *hw)
-{
-	u32 swsm;
-	s32 timeout = hw->nvm.word_size + 1;
-	s32 i = 0;
-	
-	DEBUGFUNC("e1000_get_hw_semaphore");
-#ifdef notyet
-	/* _82571 */
-	/* If we have timedout 3 times on trying to acquire
-	 * the inter-port SMBI semaphore, there is old code
-	 * operating on the other port, and it is not
-	 * releasing SMBI. Modify the number of times that
-	 * we try for the semaphore to interwork with this
-	 * older code.
-	 */
-	if (hw->dev_spec._82571.smb_counter > 2)
-		sw_timeout = 1;
-
-#endif
-	/* Get the SW semaphore */
-	while (i < timeout) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		if (!(swsm & E1000_SWSM_SMBI))
-			break;
-
-		usec_delay(50);
-		i++;
-	}
-
-	if (i == timeout) {
-#ifdef notyet
-		/*
-		 * XXX This sounds more like a driver bug whereby we either
-		 * recursed accidentally or missed clearing it previously
-		 */
-		/* In rare circumstances, the SW semaphore may already be held
-		 * unintentionally. Clear the semaphore once before giving up.
-		 */
-               if (hw->dev_spec._82575.clear_semaphore_once) {
-                       hw->dev_spec._82575.clear_semaphore_once = FALSE;
-                       e1000_put_hw_semaphore_generic(hw);
-                       for (i = 0; i < timeout; i++) {
-                               swsm = E1000_READ_REG(hw, E1000_SWSM);
-                               if (!(swsm & E1000_SWSM_SMBI))
-                                       break;
-
-                               usec_delay(50);
-                       }
-               }
-#endif
-
-		DEBUGOUT("Driver can't access device - SMBI bit is set.\n");
-		return -E1000_ERR_NVM;
-	}
-
-	/* Get the FW semaphore. */
-	for (i = 0; i < timeout; i++) {
-		swsm = E1000_READ_REG(hw, E1000_SWSM);
-		E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
-
-		/* Semaphore acquired if bit latched */
-		if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI)
-			break;
-
-		usec_delay(50);
-	}
-
-	if (i == timeout) {
-		/* Release semaphores */
-		e1000_put_hw_semaphore(hw);
-		DEBUGOUT("Driver can't access the NVM\n");
-		return -E1000_ERR_NVM;
-	}
-
-	return E1000_SUCCESS;
-}
-
-/**
- *  e1000_put_hw_semaphore - Release hardware semaphore
- *  @hw: pointer to the HW structure
- *
- *  Release hardware semaphore used to access the PHY or NVM
- **/
-void e1000_put_hw_semaphore(struct e1000_hw *hw)
-{
-	u32 swsm;
-
-	DEBUGFUNC("e1000_put_hw_semaphore");
-
-	swsm = E1000_READ_REG(hw, E1000_SWSM);
-
-	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
-
-	E1000_WRITE_REG(hw, E1000_SWSM, swsm);
-}
-
-
-/**
- *  e1000_acquire_swfw_sync - Acquire SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
- *  will also specify which port we're acquiring the lock for.
- **/
-s32
-e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-	u32 swmask = mask;
-	u32 fwmask = mask << 16;
-	s32 ret_val = E1000_SUCCESS;
-	s32 i = 0, timeout = 200;
-
-	DEBUGFUNC("e1000_acquire_swfw_sync");
-	ASSERT_NO_LOCKS();
-	while (i < timeout) {
-		if (e1000_get_hw_semaphore(hw)) {
-			ret_val = -E1000_ERR_SWFW_SYNC;
-			goto out;
-		}
-
-		swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-		if (!(swfw_sync & (fwmask | swmask)))
-			break;
-
-		/*
-		 * Firmware currently using resource (fwmask)
-		 * or other software thread using resource (swmask)
-		 */
-		e1000_put_hw_semaphore(hw);
-		msec_delay_irq(5);
-		i++;
-	}
-
-	if (i == timeout) {
-		DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n");
-		ret_val = -E1000_ERR_SWFW_SYNC;
-		goto out;
-	}
-
-	swfw_sync |= swmask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore(hw);
-
-out:
-	return ret_val;
-}
-
-/**
- *  e1000_release_swfw_sync - Release SW/FW semaphore
- *  @hw: pointer to the HW structure
- *  @mask: specifies which semaphore to acquire
- *
- *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
- *  will also specify which port we're releasing the lock for.
- **/
-void
-e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask)
-{
-	u32 swfw_sync;
-
-	DEBUGFUNC("e1000_release_swfw_sync");
-
-	while (e1000_get_hw_semaphore(hw) != E1000_SUCCESS)
-		; /* Empty */
-
-	swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC);
-	swfw_sync &= ~mask;
-	E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync);
-
-	e1000_put_hw_semaphore(hw);
-}
-
diff --git a/sys/dev/e1000/e1000_mac.h b/sys/dev/e1000/e1000_mac.h
index 2953bd52364f..ef9789bbb537 100644
--- a/sys/dev/e1000/e1000_mac.h
+++ b/sys/dev/e1000/e1000_mac.h
@@ -60,6 +60,7 @@ s32  e1000_get_bus_info_pci_generic(struct e1000_hw *hw);
 s32  e1000_get_bus_info_pcie_generic(struct e1000_hw *hw);
 void e1000_set_lan_id_single_port(struct e1000_hw *hw);
 void e1000_set_lan_id_multi_port_pci(struct e1000_hw *hw);
+s32  e1000_get_hw_semaphore_generic(struct e1000_hw *hw);
 s32  e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed,
 					       u16 *duplex);
 s32  e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw,
@@ -84,15 +85,11 @@ void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw);
 void e1000_clear_vfta_generic(struct e1000_hw *hw);
 void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count);
 void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw);
+void e1000_put_hw_semaphore_generic(struct e1000_hw *hw);
 s32  e1000_check_alt_mac_addr_generic(struct e1000_hw *hw);
 void e1000_reset_adaptive_generic(struct e1000_hw *hw);
 void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop);
 void e1000_update_adaptive_generic(struct e1000_hw *hw);
 void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
 
-s32  e1000_get_hw_semaphore(struct e1000_hw *hw);
-void e1000_put_hw_semaphore(struct e1000_hw *hw);
-s32 e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask);
-void e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask);
-
 #endif
diff --git a/sys/dev/e1000/e1000_osdep.h b/sys/dev/e1000/e1000_osdep.h
index 840bbfcfcdce..c7c23e582ca9 100644
--- a/sys/dev/e1000/e1000_osdep.h
+++ b/sys/dev/e1000/e1000_osdep.h
@@ -39,7 +39,6 @@
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/mbuf.h>
@@ -48,14 +47,6 @@
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
-
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/iflib.h>
-
-
-
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <machine/resource.h>
@@ -67,40 +58,11 @@
 
 
 #define ASSERT(x) if(!(x)) panic("EM: x")
-#define us_scale(x)  max(1, (x/(1000000/hz)))
-static inline int
-ms_scale(int x) {
-	if (hz == 1000) {
-		return (x);
-	} else if (hz > 1000) {
-		return (x*(hz/1000));
-	} else {
-		return (max(1, x/(1000/hz)));
-	}
-}
 
-static inline void
-safe_pause_us(int x) {
-	if (cold) {
-		DELAY(x);
-	} else {
-		pause("e1000_delay", max(1,  x/(1000000/hz)));
-	}
-}
-
-static inline void
-safe_pause_ms(int x) {
-	if (cold) {
-		DELAY(x*1000);
-	} else {
-		pause("e1000_delay", ms_scale(x));
-	}
-}
-
-#define usec_delay(x) safe_pause_us(x)
+#define usec_delay(x) DELAY(x)
 #define usec_delay_irq(x) usec_delay(x)
-#define msec_delay(x) safe_pause_ms(x)
-#define msec_delay_irq(x) msec_delay(x)
+#define msec_delay(x) DELAY(1000*(x))
+#define msec_delay_irq(x) DELAY(1000*(x))
 
 /* Enable/disable debugging statements in shared code */
 #define DBG		0
@@ -119,6 +81,16 @@ safe_pause_ms(int x) {
 #define CMD_MEM_WRT_INVALIDATE	0x0010  /* BIT_4 */
 #define PCI_COMMAND_REGISTER	PCIR_COMMAND
 
+/* Mutex used in the shared code */
+#define E1000_MUTEX                     struct mtx
+#define E1000_MUTEX_INIT(mutex)         mtx_init((mutex), #mutex, \
+                                            MTX_NETWORK_LOCK, \
+					    MTX_DEF | MTX_DUPOK)
+#define E1000_MUTEX_DESTROY(mutex)      mtx_destroy(mutex)
+#define E1000_MUTEX_LOCK(mutex)         mtx_lock(mutex)
+#define E1000_MUTEX_TRYLOCK(mutex)      mtx_trylock(mutex)
+#define E1000_MUTEX_UNLOCK(mutex)       mtx_unlock(mutex)
+
 typedef uint64_t	u64;
 typedef uint32_t	u32;
 typedef uint16_t	u16;
@@ -144,12 +116,6 @@ typedef int8_t		s8;
 #endif
 #endif /*__FreeBSD_version < 800000 */
 
-#ifdef INVARIANTS
-#define ASSERT_CTX_LOCK_HELD(hw) (sx_assert(iflib_ctx_lock_get(((struct e1000_osdep *)hw->back)->ctx), SX_XLOCKED))
-#else
-#define ASSERT_CTX_LOCK_HELD(hw)
-#endif
-
 #if defined(__i386__) || defined(__amd64__)
 static __inline
 void prefetch(void *x)
@@ -169,7 +135,6 @@ struct e1000_osdep
 	bus_space_tag_t    flash_bus_space_tag;
 	bus_space_handle_t flash_bus_space_handle;
 	device_t	   dev;
-	if_ctx_t	   ctx;
 };
 
 #define E1000_REGISTER(hw, reg) (((hw)->mac.type >= e1000_82543) \
@@ -251,22 +216,5 @@ struct e1000_osdep
     bus_space_write_2(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \
         ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg, value)
 
-
-#if defined(INVARIANTS)
-#include <sys/proc.h>
-
-#define ASSERT_NO_LOCKS()				\
-	do {						\
-	     int unknown_locks = curthread->td_locks - mtx_owned(&Giant);	\
-	     if (unknown_locks > 0) {					\
-		     WITNESS_WARN(WARN_GIANTOK|WARN_SLEEPOK|WARN_PANIC, NULL, "unexpected non-sleepable lock"); \
-	     }								\
-	     MPASS(curthread->td_rw_rlocks == 0);			\
-	     MPASS(curthread->td_lk_slocks == 0);			\
-	} while (0)
-#else
-#define ASSERT_NO_LOCKS()
-#endif
-
 #endif  /* _FREEBSD_OS_H_ */
 
diff --git a/sys/dev/e1000/em_txrx.c b/sys/dev/e1000/em_txrx.c
index 6e3ddedc65d0..22e983b370a0 100644
--- a/sys/dev/e1000/em_txrx.c
+++ b/sys/dev/e1000/em_txrx.c
@@ -66,7 +66,6 @@ static void em_receive_checksum(uint32_t status, if_rxd_info_t ri);
 static int em_determine_rsstype(u32 pkt_info);
 extern int em_intr(void *arg);
 
-
 struct if_txrx em_txrx = {
 	em_isc_txd_encap,
 	em_isc_txd_flush,
@@ -75,7 +74,7 @@ struct if_txrx em_txrx = {
 	em_isc_rxd_pkt_get,
 	em_isc_rxd_refill,
 	em_isc_rxd_flush,
-	em_intr,
+	em_intr
 };
 
 struct if_txrx lem_txrx = {
@@ -86,7 +85,7 @@ struct if_txrx lem_txrx = {
 	lem_isc_rxd_pkt_get,
 	lem_isc_rxd_refill,
 	em_isc_rxd_flush,
-	em_intr,
+	em_intr
 };
 
 extern if_shared_ctx_t em_sctx;
@@ -524,8 +523,8 @@ em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
 	for (i = 0, next_pidx = pidx; i < count; i++) {
 		rxd = &rxr->rx_base[next_pidx];
 		rxd->read.buffer_addr = htole64(paddrs[i]);
-		/* Zero out rx desc status */
-		rxd->wb.upper.status_error &= htole32(~0xFF);
+		/* DD bits must be cleared */
+		rxd->wb.upper.status_error = 0;
 
 		if (++next_pidx == scctx->isc_nrxd[0])
 			next_pidx = 0;
@@ -552,9 +551,14 @@ lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
 	struct e1000_rx_desc *rxd;
 	u32 staterr = 0;
 	int cnt, i;
-	budget = min(budget, scctx->isc_nrxd[0]);
 
-	for (cnt = 0, i = idx; cnt <= budget;) {
+	if (budget == 1) {
+		rxd = (struct e1000_rx_desc *)&rxr->rx_base[idx];
+		staterr = rxd->status;
+		return (staterr & E1000_RXD_STAT_DD);
+	}
+
+	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
 		staterr = rxd->status;
 
@@ -567,7 +571,6 @@ lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
 		if (staterr & E1000_RXD_STAT_EOP)
 			cnt++;
 	}
-	MPASS(cnt <= scctx->isc_nrxd[0]);
 	return (cnt);
 }
 
@@ -581,9 +584,14 @@ em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
 	union e1000_rx_desc_extended *rxd;
 	u32 staterr = 0;
 	int cnt, i;
-	budget = min(budget, scctx->isc_nrxd[0]);
 
-	for (cnt = 0, i = idx; cnt <= budget;) {
+	if (budget == 1) {
+		rxd = &rxr->rx_base[idx];
+		staterr = le32toh(rxd->wb.upper.status_error);
+		return (staterr & E1000_RXD_STAT_DD);
+	}
+
+	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
 		rxd = &rxr->rx_base[i];
 		staterr = le32toh(rxd->wb.upper.status_error);
 
@@ -598,7 +606,6 @@ em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
 			cnt++;
 
 	}
-	MPASS(cnt <= scctx->isc_nrxd[0]);
 	return (cnt);
 }
 
@@ -687,8 +694,7 @@ em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
 		pkt_info = le32toh(rxd->wb.lower.mrq);
 
 		/* Error Checking then decrement count */
-		KASSERT(staterr & E1000_RXD_STAT_DD,
-			("cidx=%d i=%d iri_len=%d", cidx, i, ri->iri_len));
+		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
 
 		len = le16toh(rxd->wb.upper.length);
 		ri->iri_len += len;
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index 516c27d25be4..e29891cd6de1 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -483,7 +483,7 @@ static struct if_shared_ctx em_sctx_init = {
 	.isc_vendor_info = em_vendor_info_array,
 	.isc_driver_version = em_driver_version,
 	.isc_driver = &em_if_driver,
-	.isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM,
+	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP,
 
 	.isc_nrxd_min = {EM_MIN_RXD},
 	.isc_ntxd_min = {EM_MIN_TXD},
@@ -511,7 +511,7 @@ static struct if_shared_ctx igb_sctx_init = {
 	.isc_vendor_info = igb_vendor_info_array,
 	.isc_driver_version = em_driver_version,
 	.isc_driver = &em_if_driver,
-	.isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM,
+	.isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP,
 
 	.isc_nrxd_min = {EM_MIN_RXD},
 	.isc_ntxd_min = {EM_MIN_TXD},
@@ -723,7 +723,7 @@ em_if_attach_pre(if_ctx_t ctx)
 		return (ENXIO);
 	}
 
-	adapter->ctx = adapter->osdep.ctx = ctx;
+	adapter->ctx = ctx;
 	adapter->dev = adapter->osdep.dev = dev;
 	scctx = adapter->shared = iflib_get_softc_ctx(ctx);
 	adapter->media = iflib_get_media(ctx);
@@ -1405,9 +1405,7 @@ em_msix_link(void *arg)
 {
 	struct adapter *adapter = arg;
 	u32 reg_icr;
-	int is_igb;
 
-	is_igb = (adapter->hw.mac.type >= igb_mac_min);
 	++adapter->link_irq;
 	MPASS(adapter->hw.back != NULL);
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
@@ -1415,29 +1413,26 @@ em_msix_link(void *arg)
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 
-	if (is_igb) {
-		if (reg_icr & E1000_ICR_LSC)
-			em_handle_link(adapter->ctx);
-		E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
-		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
+	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+		em_handle_link(adapter->ctx);
 	} else {
-		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
-			em_handle_link(adapter->ctx);
-		}
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
-					EM_MSIX_LINK | E1000_IMS_LSC);
-
-		/*
-		 * Because we must read the ICR for this interrupt
-		 * it may clear other causes using autoclear, for
-		 * this reason we simply create a soft interrupt
-		 * for all these vectors.
-		 */
-		if (reg_icr) {
-			E1000_WRITE_REG(&adapter->hw,
-					E1000_ICS, adapter->ims);
-		}
+				EM_MSIX_LINK | E1000_IMS_LSC);
+		if (adapter->hw.mac.type >= igb_mac_min)
+			E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
 	}
+
+	/*
+	 * Because we must read the ICR for this interrupt
+	 * it may clear other causes using autoclear, for
+	 * this reason we simply create a soft interrupt
+	 * for all these vectors.
+	 */
+	if (reg_icr && adapter->hw.mac.type < igb_mac_min) {
+		E1000_WRITE_REG(&adapter->hw,
+			E1000_ICS, adapter->ims);
+	}
+
 	return (FILTER_HANDLED);
 }
 
@@ -1675,6 +1670,13 @@ em_if_timer(if_ctx_t ctx, uint16_t qid)
 		return;
 
 	iflib_admin_intr_deferred(ctx);
+	/* Reset LAA into RAR[0] on 82571 */
+	if ((adapter->hw.mac.type == e1000_82571) &&
+	    e1000_get_laa_state_82571(&adapter->hw))
+		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
+
+	if (adapter->hw.mac.type < em_mac_min)
+		lem_smartspeed(adapter);
 
 	/* Mask to use in the irq trigger */
 	if (adapter->intr_type == IFLIB_INTR_MSIX) {
@@ -1785,14 +1787,6 @@ em_if_update_admin_status(if_ctx_t ctx)
 	}
 	em_update_stats_counters(adapter);
 
-	/* Reset LAA into RAR[0] on 82571 */
-	if ((adapter->hw.mac.type == e1000_82571) &&
-	    e1000_get_laa_state_82571(&adapter->hw))
-		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
-
-	if (adapter->hw.mac.type < em_mac_min)
-		lem_smartspeed(adapter);
-
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC);
 }
 
@@ -1908,87 +1902,6 @@ em_allocate_pci_resources(if_ctx_t ctx)
 	return (0);
 }
 
-static int
-igb_intr_assign(if_ctx_t ctx, int msix)
-{
-        struct adapter *adapter = iflib_get_softc(ctx);
-        struct em_rx_queue *rx_que = adapter->rx_queues;
-        struct em_tx_queue *tx_que = adapter->tx_queues;
-        int error, rid, i, vector = 0, rx_vectors;
-        char buf[16];
-
-        /* First set up ring resources */
-        for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) {
-                rid = vector + 1;
-                snprintf(buf, sizeof(buf), "rxq%d", i);
-                error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX,
-				em_msix_que, rx_que, rx_que->me, buf);
-                if (error) {
-                        device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d\n", i, error);
-                        adapter->rx_num_queues = i;
-                        goto fail;
-                }
-
-                rx_que->msix =  vector;
-
-                /*
-                 * Set the bit to enable interrupt 
-                 * in E1000_IMS -- bits 20 and 21
-		 * are for RX0 and RX1, note this has
-		 * NOTHING to do with the MSIX vector
-                 */
-                if (adapter->hw.mac.type == e1000_82574) {
-                        rx_que->eims = 1 << (20 + i);
-                        adapter->ims |= rx_que->eims;
-                        adapter->ivars |= (8 | rx_que->msix) << (i * 4);
-                } else if (adapter->hw.mac.type == e1000_82575)
-                        rx_que->eims = E1000_EICR_TX_QUEUE0 << vector;
-                else
-                        rx_que->eims = 1 << vector;
-        }
-        rx_vectors = vector;
-
-        vector = 0;
-        for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) {
-                snprintf(buf, sizeof(buf), "txq%d", i);
-                tx_que = &adapter->tx_queues[i];
-		tx_que->msix = adapter->rx_queues[i % adapter->rx_num_queues].msix;
-		rid = rman_get_start(adapter->rx_queues[i % adapter->rx_num_queues].que_irq.ii_res);
-                iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf);
-
-                if (adapter->hw.mac.type == e1000_82574) {
-                        tx_que->eims = 1 << (22 + i);
-                        adapter->ims |= tx_que->eims;
-                        adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4));
-                } else if (adapter->hw.mac.type == e1000_82575) {
-                        tx_que->eims = E1000_EICR_TX_QUEUE0 << (i %  adapter->tx_num_queues);
-                } else {
-                        tx_que->eims = 1 << (i %  adapter->tx_num_queues);
-                }
-        }
-
-        /* Link interrupt */
-        rid = rx_vectors + 1;
-        error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq");
-
-        if (error) {
-                device_printf(iflib_get_dev(ctx), "Failed to register admin handler");
-                goto fail;
-        }
-        adapter->linkvec = rx_vectors;
-        if (adapter->hw.mac.type < igb_mac_min) {
-                adapter->ivars |=  (8 | rx_vectors) << 16;
-                adapter->ivars |= 0x80000000;
-        }
-        return (0);
-fail:
-        iflib_irq_free(ctx, &adapter->irq);
-        rx_que = adapter->rx_queues;
-        for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++)
-                iflib_irq_free(ctx, &rx_que->que_irq);
-        return (error);
-}
-
 /*********************************************************************
  *
  *  Setup the MSIX Interrupt handlers
@@ -2000,18 +1913,14 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix)
 	struct adapter *adapter = iflib_get_softc(ctx);
 	struct em_rx_queue *rx_que = adapter->rx_queues;
 	struct em_tx_queue *tx_que = adapter->tx_queues;
-	int error, rid, i, vector = 0;
+	int error, rid, i, vector = 0, rx_vectors;
 	char buf[16];
 
-	if (adapter->hw.mac.type >= igb_mac_min) {
-		return igb_intr_assign(ctx, msix);
-	}
-
 	/* First set up ring resources */
 	for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) {
 		rid = vector + 1;
 		snprintf(buf, sizeof(buf), "rxq%d", i);
-		error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RX, em_msix_que, rx_que, rx_que->me, buf);
+		error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf);
 		if (error) {
 			device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error);
 			adapter->rx_num_queues = i + 1;
@@ -2035,19 +1944,16 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix)
 		else
 			rx_que->eims = 1 << vector;
 	}
+	rx_vectors = vector;
 
+	vector = 0;
 	for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) {
 		rid = vector + 1;
 		snprintf(buf, sizeof(buf), "txq%d", i);
 		tx_que = &adapter->tx_queues[i];
+		iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf);
 
-		error = iflib_irq_alloc_generic(ctx, &tx_que->que_irq, rid, IFLIB_INTR_TX, em_msix_que, tx_que, tx_que->me, buf);
-		if (error) {
-			device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error);
-			adapter->tx_num_queues = i + 1;
-			goto fail;
-		}
-		tx_que->msix = vector;
+		tx_que->msix = (vector % adapter->tx_num_queues);
 
 		/*
 		 * Set the bit to enable interrupt
@@ -2060,24 +1966,23 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix)
 			adapter->ims |= tx_que->eims;
 			adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4));
 		} else if (adapter->hw.mac.type == e1000_82575) {
-			tx_que->eims = E1000_EICR_TX_QUEUE0 << vector;
+			tx_que->eims = E1000_EICR_TX_QUEUE0 << (i %  adapter->tx_num_queues);
 		} else {
-			tx_que->eims = 1 << vector;
+			tx_que->eims = 1 << (i %  adapter->tx_num_queues);
 		}
 	}
 
 	/* Link interrupt */
-	rid = vector + 1;
+	rid = rx_vectors + 1;
 	error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq");
 
 	if (error) {
 		device_printf(iflib_get_dev(ctx), "Failed to register admin handler");
 		goto fail;
 	}
-
-	adapter->linkvec = vector;
+	adapter->linkvec = rx_vectors;
 	if (adapter->hw.mac.type < igb_mac_min) {
-		adapter->ivars |=  (8 | vector) << 16;
+		adapter->ivars |=  (8 | rx_vectors) << 16;
 		adapter->ivars |= 0x80000000;
 	}
 	return (0);
@@ -2234,24 +2139,15 @@ static void
 em_free_pci_resources(if_ctx_t ctx)
 {
 	struct adapter *adapter = iflib_get_softc(ctx);
-	struct em_rx_queue *rxque = adapter->rx_queues;
-	struct em_tx_queue *txque = adapter->tx_queues;
+	struct em_rx_queue *que = adapter->rx_queues;
 	device_t dev = iflib_get_dev(ctx);
-	int is_igb;
 
-	is_igb = (adapter->hw.mac.type >= igb_mac_min);
 	/* Release all msix queue resources */
 	if (adapter->intr_type == IFLIB_INTR_MSIX)
 		iflib_irq_free(ctx, &adapter->irq);
 
-	for (int i = 0; i < adapter->rx_num_queues; i++, rxque++) {
-		iflib_irq_free(ctx, &rxque->que_irq);
-	}
-
-	if (!is_igb) {
-		for (int i = 0; i < adapter->tx_num_queues; i++, txque++) {
-			iflib_irq_free(ctx, &txque->que_irq);
-		}
+	for (int i = 0; i < adapter->rx_num_queues; i++, que++) {
+		iflib_irq_free(ctx, &que->que_irq);
 	}
 
 	/* First release all the interrupt resources */
diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h
index 19154c5a3c8d..6e19449ac8b4 100644
--- a/sys/dev/e1000/if_em.h
+++ b/sys/dev/e1000/if_em.h
@@ -434,7 +434,6 @@ struct em_tx_queue {
 	u32			eims;		/* This queue's EIMS bit */
 	u32                    me;
 	struct tx_ring         txr;
-	struct if_irq          que_irq;
 };
 
 struct em_rx_queue {
@@ -444,7 +443,7 @@ struct em_rx_queue {
 	u32                    eims;
 	struct rx_ring         rxr;
 	u64                    irqs;
-	struct if_irq          que_irq;
+	struct if_irq          que_irq; 
 };  
 
 /* Our adapter structure */
diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c
index 716aef6b1053..6a39a3aac14a 100644
--- a/sys/kern/subr_gtaskqueue.c
+++ b/sys/kern/subr_gtaskqueue.c
@@ -48,26 +48,17 @@ __FBSDID("$FreeBSD$");
 #include <sys/unistd.h>
 #include <machine/stdarg.h>
 
-static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
+static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
 static void	gtaskqueue_thread_enqueue(void *);
 static void	gtaskqueue_thread_loop(void *arg);
-static int	_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri);
-TASKQGROUP_DEFINE(softirq, mp_ncpus, 1, false, PI_SOFT);
+
+TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
 
 struct gtaskqueue_busy {
 	struct gtask	*tb_running;
 	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
 };
 
-struct gt_intr_thread {
-	int	git_flags;		/* (j) IT_* flags. */
-	int	git_need;		/* Needs service. */
-};
-
-/* Interrupt thread flags kept in it_flags */
-#define	IT_DEAD		0x000001	/* Thread is waiting to exit. */
-#define	IT_WAIT		0x000002	/* Thread is waiting for completion. */
-
 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
 
 struct gtaskqueue {
@@ -78,7 +69,6 @@ struct gtaskqueue {
 	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
 	struct mtx		tq_mutex;
 	struct thread		**tq_threads;
-	struct gt_intr_thread *tq_gt_intrs;
 	int			tq_tcount;
 	int			tq_spin;
 	int			tq_flags;
@@ -90,7 +80,6 @@ struct gtaskqueue {
 #define	TQ_FLAGS_ACTIVE		(1 << 0)
 #define	TQ_FLAGS_BLOCKED	(1 << 1)
 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
-#define	TQ_FLAGS_INTR		(1 << 3)
 
 #define	DT_CALLOUT_ARMED	(1 << 0)
 
@@ -191,32 +180,6 @@ gtaskqueue_free(struct gtaskqueue *queue)
 	free(queue, M_GTASKQUEUE);
 }
 
-static void
-schedule_ithread(struct gtaskqueue *queue)
-{
-	struct proc *p;
-	struct thread *td;
-	struct gt_intr_thread *git;
-
-	MPASS(queue->tq_tcount == 1);
-	td = queue->tq_threads[0];
-	git = &queue->tq_gt_intrs[0];
-	p = td->td_proc;
-
-	atomic_store_rel_int(&git->git_need, 1);
-	thread_lock(td);
-	if (TD_AWAITING_INTR(td)) {
-		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
-		    td->td_name);
-		TD_CLR_IWAIT(td);
-		sched_add(td, SRQ_INTR);
-	} else {
-		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
-		    __func__, p->p_pid, td->td_name, git->git_need, td->td_state);
-	}
-	thread_unlock(td);
-}
-
 int
 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 {
@@ -234,13 +197,8 @@ grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
 	gtask->ta_flags |= TASK_ENQUEUED;
 	TQ_UNLOCK(queue);
-	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) {
-		if (queue->tq_flags & TQ_FLAGS_INTR) {
-			schedule_ithread(queue);
-		} else {
-			queue->tq_enqueue(queue->tq_context);
-		}
-	}
+	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
+		queue->tq_enqueue(queue->tq_context);
 	return (0);
 }
 
@@ -445,7 +403,7 @@ gtaskqueue_drain_all(struct gtaskqueue *queue)
 
 static int
 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
-    cpuset_t *mask, bool intr, const char *name, va_list ap)
+    cpuset_t *mask, const char *name, va_list ap)
 {
 	char ktname[MAXCOMLEN + 1];
 	struct thread *td;
@@ -464,12 +422,6 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 		printf("%s: no memory for %s threads\n", __func__, ktname);
 		return (ENOMEM);
 	}
-	tq->tq_gt_intrs = malloc(sizeof(struct gt_intr_thread) * count, M_GTASKQUEUE,
-	    M_NOWAIT | M_ZERO);
-	if (tq->tq_gt_intrs == NULL) {
-		printf("%s: no memory for %s intr info\n", __func__, ktname);
-		return (ENOMEM);
-	}
 
 	for (i = 0; i < count; i++) {
 		if (count == 1)
@@ -487,9 +439,6 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 		} else
 			tq->tq_tcount++;
 	}
-	if (intr)
-		tq->tq_flags |= TQ_FLAGS_INTR;
-
 	for (i = 0; i < count; i++) {
 		if (tq->tq_threads[i] == NULL)
 			continue;
@@ -509,14 +458,7 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 		}
 		thread_lock(td);
 		sched_prio(td, pri);
-		if (intr) {
-			/* we need to schedule the thread from the interrupt handler for this to work */
-			TD_SET_IWAIT(td);
-			sched_class(td, PRI_ITHD);
-			td->td_pflags |= TDP_ITHREAD;
-		} else {
-			sched_add(td, SRQ_BORING);
-		}
+		sched_add(td, SRQ_BORING);
 		thread_unlock(td);
 	}
 
@@ -525,13 +467,13 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 
 static int
 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
-   bool intr, const char *name, ...)
+    const char *name, ...)
 {
 	va_list ap;
 	int error;
 
 	va_start(ap, name);
-	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, intr, name, ap);
+	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
 	va_end(ap);
 	return (error);
 }
@@ -549,58 +491,16 @@ gtaskqueue_run_callback(struct gtaskqueue *tq,
 }
 
 static void
-intr_thread_loop(struct gtaskqueue *tq)
+gtaskqueue_thread_loop(void *arg)
 {
-	struct gt_intr_thread *git;
-	struct thread *td;
-
-	git = &tq->tq_gt_intrs[0];
-	td = tq->tq_threads[0];
-	MPASS(tq->tq_tcount == 1);
+	struct gtaskqueue **tqp, *tq;
 
+	tqp = arg;
+	tq = *tqp;
+	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
+	TQ_LOCK(tq);
 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
-		THREAD_NO_SLEEPING();
-		while (atomic_cmpset_acq_int(&git->git_need, 1, 0) != 0) {
-			gtaskqueue_run_locked(tq);
-		}
-		THREAD_SLEEPING_OK();
-
-		/*
-		 * Because taskqueue_run() can drop tq_mutex, we need to
-		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
-		 * meantime, which means we missed a wakeup.
-		 */
-		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
-			break;
-
-		TQ_UNLOCK(tq);
-		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
-		mtx_assert(&Giant, MA_NOTOWNED);
-		thread_lock(td);
-		if (atomic_load_acq_int(&git->git_need) == 0 &&
-		    (git->git_flags & (IT_DEAD | IT_WAIT)) == 0) {
-			TD_SET_IWAIT(td);
-			mi_switch(SW_VOL | SWT_IWAIT, NULL);
-		}
-#if 0
-		/* XXX is this something we want? */
-		if (git->git_flags & IT_WAIT) {
-			wake = 1;
-			git->git_flags &= ~IT_WAIT;
-		}
-#endif
-		thread_unlock(td);
-		TQ_LOCK(tq);
-	}
-	THREAD_NO_SLEEPING();
-	gtaskqueue_run_locked(tq);
-	THREAD_SLEEPING_OK();
-}
-
-static void
-timeshare_thread_loop(struct gtaskqueue *tq)
-{
-	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
+		/* XXX ? */
 		gtaskqueue_run_locked(tq);
 		/*
 		 * Because taskqueue_run() can drop tq_mutex, we need to
@@ -612,23 +512,6 @@ timeshare_thread_loop(struct gtaskqueue *tq)
 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
 	}
 	gtaskqueue_run_locked(tq);
-}
-
-static void
-gtaskqueue_thread_loop(void *arg)
-{
-	struct gtaskqueue **tqp, *tq;
-
-	tqp = arg;
-	tq = *tqp;
-	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
-	TQ_LOCK(tq);
-	if (curthread->td_pflags & TDP_ITHREAD) {
-		intr_thread_loop(tq);
-	} else {
-		timeshare_thread_loop(tq);
-	}
-
 	/*
 	 * This thread is on its way out, so just drop the lock temporarily
 	 * in order to call the shutdown callback.  This allows the callback
@@ -675,17 +558,11 @@ struct taskqgroup_cpu {
 struct taskqgroup {
 	struct taskqgroup_cpu tqg_queue[MAXCPU];
 	struct mtx	tqg_lock;
-	void (*adjust_func)(void*);
 	char *		tqg_name;
 	int		tqg_adjusting;
 	int		tqg_stride;
 	int		tqg_cnt;
-	int		tqg_pri;
-	int		tqg_flags;
-	bool		tqg_intr;
 };
-#define TQG_NEED_ADJUST	0x1
-#define TQG_ADJUSTED		0x2
 
 struct taskq_bind_task {
 	struct gtask bt_task;
@@ -693,16 +570,16 @@ struct taskq_bind_task {
 };
 
 static void
-taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu, bool intr, int pri)
+taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
 {
 	struct taskqgroup_cpu *qcpu;
 
 	qcpu = &qgroup->tqg_queue[idx];
 	LIST_INIT(&qcpu->tgc_tasks);
-	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK | M_ZERO,
+	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
 	    taskqueue_thread_enqueue, &qcpu->tgc_taskq);
-	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, pri,
-	    intr, "%s_%d", qgroup->tqg_name, idx);
+	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
+	    "%s_%d", qgroup->tqg_name, idx);
 	qcpu->tgc_cpu = cpu;
 }
 
@@ -786,20 +663,12 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
     void *uniq, int irq, char *name)
 {
 	cpuset_t mask;
-	int qid, error;
+	int qid;
 
 	gtask->gt_uniq = uniq;
 	gtask->gt_name = name;
 	gtask->gt_irq = irq;
 	gtask->gt_cpu = -1;
-
-	mtx_lock(&qgroup->tqg_lock);
-	qgroup->tqg_flags |= TQG_NEED_ADJUST;
-	mtx_unlock(&qgroup->tqg_lock);
-
-	if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED))
-		qgroup->adjust_func(NULL);
-
 	mtx_lock(&qgroup->tqg_lock);
 	qid = taskqgroup_find(qgroup, uniq);
 	qgroup->tqg_queue[qid].tgc_cnt++;
@@ -810,9 +679,7 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
 		CPU_ZERO(&mask);
 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
 		mtx_unlock(&qgroup->tqg_lock);
-		error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask);
-		if (error)
-			printf("taskqgroup_attach: setaffinity failed: %d\n", error);
+		intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 	} else
 		mtx_unlock(&qgroup->tqg_lock);
 }
@@ -821,7 +688,7 @@ static void
 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 {
 	cpuset_t mask;
-	int qid, cpu, error;
+	int qid, cpu;
 
 	mtx_lock(&qgroup->tqg_lock);
 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
@@ -831,10 +698,9 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 
 		CPU_ZERO(&mask);
 		CPU_SET(cpu, &mask);
-		error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_INTRHANDLER, &mask);
+		intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
+
 		mtx_lock(&qgroup->tqg_lock);
-		if (error)
-			printf("taskqgroup_attach_deferred: setaffinity failed: %d\n", error);
 	}
 	qgroup->tqg_queue[qid].tgc_cnt++;
 
@@ -845,79 +711,27 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 	mtx_unlock(&qgroup->tqg_lock);
 }
 
-static int
-taskqgroup_adjust_deferred(struct taskqgroup *qgroup, int cpu)
-{
-	int i, error = 0, cpu_max = -1;
-
-	mtx_lock(&qgroup->tqg_lock);
-	for (i = 0; i < qgroup->tqg_cnt; i++)
-		if (qgroup->tqg_queue[i].tgc_cpu > cpu_max)
-			cpu_max = qgroup->tqg_queue[i].tgc_cpu;
-	if (cpu_max >= cpu) {
-		mtx_unlock(&qgroup->tqg_lock);
-		return (0);
-	}
-	MPASS(cpu <= mp_maxid);
-	error = _taskqgroup_adjust(qgroup, cpu + 1, qgroup->tqg_stride,
-				   qgroup->tqg_intr, qgroup->tqg_pri);
-	if (error) {
-		printf("%s: _taskqgroup_adjust(%p, %d, %d, %d, %d) => %d\n\n",
-		       __func__, qgroup, cpu + 1, qgroup->tqg_stride, qgroup->tqg_intr,
-		       qgroup->tqg_pri, error);
-		goto out;
-	}
-	for (i = 0; i < qgroup->tqg_cnt; i++)
-		if (qgroup->tqg_queue[i].tgc_cpu > cpu_max)
-			cpu_max = qgroup->tqg_queue[i].tgc_cpu;
-	MPASS(cpu_max >= cpu);
-out:
-	mtx_unlock(&qgroup->tqg_lock);
-	return (error);
-}
-
 int
 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 	void *uniq, int cpu, int irq, char *name)
 {
 	cpuset_t mask;
-	int i, error, qid;
+	int i, qid;
 
 	qid = -1;
 	gtask->gt_uniq = uniq;
 	gtask->gt_name = name;
 	gtask->gt_irq = irq;
 	gtask->gt_cpu = cpu;
-	MPASS(cpu >= 0);
-
-	mtx_lock(&qgroup->tqg_lock);
-	qgroup->tqg_flags |= TQG_NEED_ADJUST;
-	mtx_unlock(&qgroup->tqg_lock);
-
-	if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) {
-		uintptr_t cpuid = cpu + 1;
-		qgroup->adjust_func((void *)cpuid);
-	}
-	if ((error = taskqgroup_adjust_deferred(qgroup, cpu)))
-		return (error);
-
 	mtx_lock(&qgroup->tqg_lock);
 	if (tqg_smp_started) {
-		for (i = 0; i < qgroup->tqg_cnt; i++) {
+		for (i = 0; i < qgroup->tqg_cnt; i++)
 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 				qid = i;
 				break;
 			}
-#ifdef INVARIANTS
-			else
-				printf("qgroup->tqg_queue[%d].tgc_cpu=0x%x tgc_cnt=0x%x\n",
-				       i, qgroup->tqg_queue[i].tgc_cpu, qgroup->tqg_queue[i].tgc_cnt);
-
-#endif
-		}
 		if (qid == -1) {
 			mtx_unlock(&qgroup->tqg_lock);
-			printf("%s: qid not found for cpu=%d\n", __func__, cpu);
 			return (EINVAL);
 		}
 	} else
@@ -930,11 +744,8 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 
 	CPU_ZERO(&mask);
 	CPU_SET(cpu, &mask);
-	if (irq != -1 && tqg_smp_started) {
-		error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask);
-		if (error)
-			printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error);
-	}
+	if (irq != -1 && tqg_smp_started)
+		intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 	return (0);
 }
 
@@ -942,18 +753,13 @@ static int
 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 {
 	cpuset_t mask;
-	int i, qid, irq, cpu, error;
+	int i, qid, irq, cpu;
 
 	qid = -1;
 	irq = gtask->gt_irq;
 	cpu = gtask->gt_cpu;
 	MPASS(tqg_smp_started);
-
-	if ((error = taskqgroup_adjust_deferred(qgroup, cpu)))
-		return (error);
 	mtx_lock(&qgroup->tqg_lock);
-	/* adjust as needed */
-	MPASS(cpu <= mp_maxid);
 	for (i = 0; i < qgroup->tqg_cnt; i++)
 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 			qid = i;
@@ -961,7 +767,6 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtas
 		}
 	if (qid == -1) {
 		mtx_unlock(&qgroup->tqg_lock);
-		printf("%s: qid not found for cpu=%d\n", __func__, cpu);
 		return (EINVAL);
 	}
 	qgroup->tqg_queue[qid].tgc_cnt++;
@@ -973,11 +778,8 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtas
 	CPU_ZERO(&mask);
 	CPU_SET(cpu, &mask);
 
-	if (irq != -1) {
-		error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask);
-		if (error)
-			printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error);
-	}
+	if (irq != -1)
+		intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 	return (0);
 }
 
@@ -1016,25 +818,8 @@ taskqgroup_binder(void *ctx)
 		printf("taskqgroup_binder: setaffinity failed: %d\n",
 		    error);
 	free(gtask, M_DEVBUF);
-
 }
-static void
-taskqgroup_ithread_binder(void *ctx)
-{
-	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
-	cpuset_t mask;
-	int error;
 
-	CPU_ZERO(&mask);
-	CPU_SET(gtask->bt_cpuid, &mask);
-	error = cpuset_setthread(curthread->td_tid, &mask);
-
-	if (error)
-		printf("taskqgroup_binder: setaffinity failed: %d\n",
-		    error);
-	free(gtask, M_DEVBUF);
-
-}
 static void
 taskqgroup_bind(struct taskqgroup *qgroup)
 {
@@ -1050,10 +835,7 @@ taskqgroup_bind(struct taskqgroup *qgroup)
 
 	for (i = 0; i < qgroup->tqg_cnt; i++) {
 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
-		if (qgroup->tqg_intr)
-			GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_ithread_binder, gtask);
-		else
-			GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
+		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
 		    &gtask->bt_task);
@@ -1061,7 +843,7 @@ taskqgroup_bind(struct taskqgroup *qgroup)
 }
 
 static int
-_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri)
+_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 {
 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 	struct grouptask *gtask;
@@ -1076,22 +858,14 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread,
 		return (EINVAL);
 	}
 	if (qgroup->tqg_adjusting) {
-		printf("%s: failed: adjusting\n", __func__);
+		printf("taskqgroup_adjust failed: adjusting\n");
 		return (EBUSY);
 	}
-	/* No work to be done */
-	if (qgroup->tqg_cnt == cnt)
-		return (0);
 	qgroup->tqg_adjusting = 1;
 	old_cnt = qgroup->tqg_cnt;
 	old_cpu = 0;
-	if (old_cnt < cnt) {
-		int old_max_idx = max(0, old_cnt-1);
-		old_cpu = qgroup->tqg_queue[old_max_idx].tgc_cpu;
-		if (old_cnt > 0)
-			for (k = 0; k < stride; k++)
-				old_cpu = CPU_NEXT(old_cpu);
-	}
+	if (old_cnt < cnt)
+		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
 	mtx_unlock(&qgroup->tqg_lock);
 	/*
 	 * Set up queue for tasks added before boot.
@@ -1107,7 +881,7 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread,
 	 */
 	cpu = old_cpu;
 	for (i = old_cnt; i < cnt; i++) {
-		taskqgroup_cpu_create(qgroup, i, cpu, ithread, pri);
+		taskqgroup_cpu_create(qgroup, i, cpu);
 
 		for (k = 0; k < stride; k++)
 			cpu = CPU_NEXT(cpu);
@@ -1115,8 +889,6 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread,
 	mtx_lock(&qgroup->tqg_lock);
 	qgroup->tqg_cnt = cnt;
 	qgroup->tqg_stride = stride;
-	qgroup->tqg_intr = ithread;
-	qgroup->tqg_pri = pri;
 
 	/*
 	 * Adjust drivers to use new taskqs.
@@ -1162,34 +934,12 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread,
 }
 
 int
-taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri)
+taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 {
 	int error;
 
 	mtx_lock(&qgroup->tqg_lock);
-	error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri);
-	mtx_unlock(&qgroup->tqg_lock);
-
-	return (error);
-}
-
-void
-taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*))
-{
-	qgroup-> adjust_func = adjust_func;
-}
-
-int
-taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri)
-{
-	int error = 0;
-
-	mtx_lock(&qgroup->tqg_lock);
-	if ((qgroup->tqg_flags & (TQG_ADJUSTED|TQG_NEED_ADJUST)) == TQG_NEED_ADJUST) {
-		qgroup->tqg_flags |= TQG_ADJUSTED;
-		error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri);
-		MPASS(error == 0);
-	}
+	error = _taskqgroup_adjust(qgroup, cnt, stride);
 	mtx_unlock(&qgroup->tqg_lock);
 
 	return (error);
@@ -1204,9 +954,7 @@ taskqgroup_create(char *name)
 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
 	qgroup->tqg_name = name;
 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
-	MPASS(qgroup->tqg_queue[0].tgc_cnt == 0);
-	MPASS(qgroup->tqg_queue[0].tgc_cpu == 0);
-	MPASS(qgroup->tqg_queue[0].tgc_taskq == 0);
+
 	return (qgroup);
 }
 
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index 4c077d5c0a03..41772d250f4a 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/taskqueue.h>
 #include <sys/limits.h>
 
+
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
@@ -156,7 +157,7 @@ struct iflib_ctx {
 	if_shared_ctx_t ifc_sctx;
 	struct if_softc_ctx ifc_softc_ctx;
 
-	struct sx ifc_sx;
+	struct mtx ifc_mtx;
 
 	uint16_t ifc_nhwtxqs;
 	uint16_t ifc_nhwrxqs;
@@ -184,8 +185,6 @@ struct iflib_ctx {
 	uint16_t ifc_sysctl_ntxqs;
 	uint16_t ifc_sysctl_nrxqs;
 	uint16_t ifc_sysctl_qs_eq_override;
-	uint16_t ifc_cpuid_highest;
-	uint16_t ifc_sysctl_rx_budget;
 
 	qidx_t ifc_sysctl_ntxds[8];
 	qidx_t ifc_sysctl_nrxds[8];
@@ -204,66 +203,8 @@ struct iflib_ctx {
 	eventhandler_tag ifc_vlan_detach_event;
 	uint8_t ifc_mac[ETHER_ADDR_LEN];
 	char ifc_mtx_name[16];
-	LIST_ENTRY(iflib_ctx) ifc_next;
 };
 
-static LIST_HEAD(ctx_head, iflib_ctx) ctx_list;
-static struct mtx ctx_list_lock;
-
-TASKQGROUP_DEFINE(if_io, mp_ncpus, 1, true, PI_NET);
-TASKQGROUP_DEFINE(if_config, 1, 1, false, PI_SOFT);
-
-static void
-iflib_ctx_apply(void (*fn)(if_ctx_t ctx, void *arg), void *arg)
-{
-	if_ctx_t ctx;
-
-	mtx_lock(&ctx_list_lock);
-	LIST_FOREACH(ctx, &ctx_list, ifc_next) {
-		(fn)(ctx, arg);
-	}
-	mtx_unlock(&ctx_list_lock);
-}
-
-static void
-_iflib_cpuid_highest(if_ctx_t ctx, void *arg) {
-	int *cpuid = arg;
-
-	if (*cpuid < ctx->ifc_cpuid_highest)
-		*cpuid = ctx->ifc_cpuid_highest;
-}
-
-static int
-iflib_cpuid_highest(void)
-{
-	int cpuid = 0;
-
-	iflib_ctx_apply(_iflib_cpuid_highest, &cpuid);
-	return (cpuid);
-}
-
-static void
-iflib_ctx_insert(if_ctx_t ctx)
-{
-	mtx_lock(&ctx_list_lock);
-	LIST_INSERT_HEAD(&ctx_list, ctx, ifc_next);
-	mtx_unlock(&ctx_list_lock);
-}
-
-static void
-iflib_ctx_remove(if_ctx_t ctx)
-{
-	int max_cpuid_prev, max_cpuid_new;
-
-	max_cpuid_prev = iflib_cpuid_highest();
-	mtx_lock(&ctx_list_lock);
-	LIST_REMOVE(ctx, ifc_next);
-	mtx_unlock(&ctx_list_lock);
-	max_cpuid_new = max(1, iflib_cpuid_highest());
-	if (max_cpuid_new < max_cpuid_prev) {
-		taskqgroup_adjust(qgroup_if_io, max_cpuid_new, 1, true, PI_NET);
-	}
-}
 
 void *
 iflib_get_softc(if_ctx_t ctx)
@@ -322,11 +263,9 @@ iflib_get_sctx(if_ctx_t ctx)
 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
 
 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
-#define RX_SW_DESC_INUSE       (1 << 1)
-#define RX_NETMAP_INUSE	(1 << 2)
-
-#define TX_SW_DESC_MAP_CREATED	(1 << 0)
-#define TX_SW_DESC_MAPPED      (1 << 1)
+#define TX_SW_DESC_MAP_CREATED	(1 << 1)
+#define RX_SW_DESC_INUSE        (1 << 3)
+#define TX_SW_DESC_MAPPED       (1 << 4)
 
 #define	M_TOOBIG		M_PROTO1
 
@@ -418,7 +357,6 @@ struct iflib_txq {
 	uint8_t		ift_qstatus;
 	uint8_t		ift_closed;
 	uint8_t		ift_update_freq;
-	uint8_t		ift_stall_count;
 	struct iflib_filter_info ift_filter_info;
 	bus_dma_tag_t		ift_desc_tag;
 	bus_dma_tag_t		ift_tso_desc_tag;
@@ -510,11 +448,9 @@ struct iflib_rxq {
 	struct grouptask        ifr_task;
 	struct iflib_filter_info ifr_filter_info;
 	iflib_dma_info_t		ifr_ifdi;
-	struct if_rxd_info		ifr_ri;
-	struct if_rxd_update	ifr_iru;
+
 	/* dynamically allocate if any drivers need a value substantially larger than this */
 	struct if_rxd_frag	ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
-
 #ifdef IFLIB_DIAGNOSTICS
 	uint64_t ifr_cpu_exec_count[256];
 #endif
@@ -529,11 +465,11 @@ typedef struct if_rxsd {
 
 /* multiple of word size */
 #ifdef __LP64__
-#define PKT_INFO_SIZE	7
+#define PKT_INFO_SIZE	6
 #define RXD_INFO_SIZE	5
 #define PKT_TYPE uint64_t
 #else
-#define PKT_INFO_SIZE	12
+#define PKT_INFO_SIZE	11
 #define RXD_INFO_SIZE	8
 #define PKT_TYPE uint32_t
 #endif
@@ -559,10 +495,9 @@ pkt_info_zero(if_pkt_info_t pi)
 	pi_pad = (if_pkt_info_pad_t)pi;
 	pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0;
 	pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0;
-	pi_pad->pkt_val[6] = 0;
 #ifndef __LP64__
-	pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0;
-	pi_pad->pkt_val[10] = 0; pi_pad->pkt_val[11] = 0;
+	pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0;
+	pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0;
 #endif	
 }
 
@@ -590,24 +525,14 @@ rxd_info_zero(if_rxd_info_t ri)
 #define MAX_SINGLE_PACKET_FRACTION 12
 #define IF_BAD_DMA (bus_addr_t)-1
 
-static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
-                   "iflib driver parameters");
-
-static int iflib_timer_int;
-SYSCTL_INT(_net_iflib, OID_AUTO, timer_int, CTLFLAG_RW, &iflib_timer_int,
-    0, "interval at which to run per-queue timers (in ticks)");
-
-static int force_busdma = 0;
-SYSCTL_INT(_net_iflib, OID_AUTO, force_busdma, CTLFLAG_RDTUN, &force_busdma,
-    1, "force busdma");
-
 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
 
-#define CTX_LOCK_INIT(_sc, _name)  sx_init(&(_sc)->ifc_sx, _name)
+#define CTX_LOCK_INIT(_sc, _name)  mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF)
+
+#define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx)
+#define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx)
+#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx)
 
-#define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_sx)
-#define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_sx)
-#define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_sx)
 
 #define CALLOUT_LOCK(txq)	mtx_lock(&txq->ift_mtx)
 #define CALLOUT_UNLOCK(txq) 	mtx_unlock(&txq->ift_mtx)
@@ -628,6 +553,9 @@ MODULE_VERSION(iflib, 1);
 MODULE_DEPEND(iflib, pci, 1, 1, 1);
 MODULE_DEPEND(iflib, ether, 1, 1, 1);
 
+TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
+TASKQGROUP_DEFINE(if_config_tqg, 1, 1);
+
 #ifndef IFLIB_DEBUG_COUNTERS
 #ifdef INVARIANTS
 #define IFLIB_DEBUG_COUNTERS 1
@@ -636,6 +564,9 @@ MODULE_DEPEND(iflib, ether, 1, 1, 1);
 #endif /* !INVARIANTS */
 #endif
 
+static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
+                   "iflib driver parameters");
+
 /*
  * XXX need to ensure that this can't accidentally cause the head to be moved backwards 
  */
@@ -758,14 +689,7 @@ iflib_debug_reset(void)
 static void iflib_debug_reset(void) {}
 #endif
 
-typedef void async_gtask_fn_t(if_ctx_t ctx, void *arg);
 
-struct async_task_arg {
-	async_gtask_fn_t *ata_fn;
-	if_ctx_t ata_ctx;
-	void *ata_arg;
-	struct grouptask *ata_gtask;
-};
 
 #define IFLIB_DEBUG 0
 
@@ -787,12 +711,6 @@ static void iflib_ifmp_purge(iflib_txq_t txq);
 static void _iflib_pre_assert(if_softc_ctx_t scctx);
 static void iflib_stop(if_ctx_t ctx);
 static void iflib_if_init_locked(if_ctx_t ctx);
-static int async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data);
-static int iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg);
-static void iflib_admin_reset_deferred(if_ctx_t ctx);
-
-
-
 #ifndef __NO_STRICT_ALIGNMENT
 static struct mbuf * iflib_fixup_rx(struct mbuf *m);
 #endif
@@ -866,94 +784,6 @@ iflib_netmap_register(struct netmap_adapter *na, int onoff)
 	return (status);
 }
 
-static void
-iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid)
-{
-        iflib_fl_t fl;
-
-        fl = &rxq->ifr_fl[flid];
-	iru->iru_paddrs = fl->ifl_bus_addrs;
-	iru->iru_vaddrs = &fl->ifl_vm_addrs[0];
-	iru->iru_idxs = fl->ifl_rxd_idxs;
-	iru->iru_qsidx = rxq->ifr_id;
-	iru->iru_buf_size = fl->ifl_buf_size;
-	iru->iru_flidx = fl->ifl_id;
-}
-
-static int
-netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init)
-{
-	struct netmap_adapter *na = kring->na;
-	u_int const lim = kring->nkr_num_slots - 1;
-	u_int head = kring->rhead;
-	struct netmap_ring *ring = kring->ring;
-	bus_dmamap_t *map;
-	if_rxd_update_t iru;
-	if_ctx_t ctx = rxq->ifr_ctx;
-	iflib_fl_t fl = &rxq->ifr_fl[0];
-	uint32_t refill_pidx, nic_i;
-
-	iru = &rxq->ifr_iru;
-	iru_init(iru, rxq, 0 /* flid */);
-	map = fl->ifl_sds.ifsd_map;
-	refill_pidx = netmap_idx_k2n(kring, nm_i);
-	if (init && (nm_i == head))
-		head = nm_prev(head, lim);
-	for (int tmp_pidx = 0; nm_i != head; tmp_pidx++) {
-		struct netmap_slot *slot = &ring->slot[nm_i];
-		void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]);
-		uint32_t nic_i_dma = refill_pidx;
-		nic_i = netmap_idx_k2n(kring, nm_i);
-
-		MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH);
-
-		if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
-		        return netmap_ring_reinit(kring);
-
-		fl->ifl_vm_addrs[tmp_pidx] = addr;
-		if (__predict_false(init) && map) {
-			netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
-		} else if (map && (slot->flags & NS_BUF_CHANGED)) {
-			/* buffer has changed, reload map */
-			netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr);
-		}
-		slot->flags &= ~NS_BUF_CHANGED;
-
-		nm_i = nm_next(nm_i, lim);
-		fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim);
-		if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1)
-			continue;
-
-		iru->iru_pidx = refill_pidx;
-		iru->iru_count = tmp_pidx+1;
-		ctx->isc_rxd_refill(ctx->ifc_softc, iru);
-
-		tmp_pidx = 0;
-		refill_pidx = nic_i;
-		if (map == NULL)
-			continue;
-
-		for (int n = 0; n < iru->iru_count; n++) {
-			bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma],
-					BUS_DMASYNC_PREREAD);
-			/* XXX - change this to not use the netmap func*/
-			nic_i_dma = nm_next(nic_i_dma, lim);
-		}
-	}
-	kring->nr_hwcur = head;
-
-	if (map)
-		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
-				BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-	/*
-	 * IMPORTANT: we must leave one free slot in the ring,
-	 * so move nic_i back by one unit
-	 */
-	nic_i = nm_prev(nic_i, lim);
-	ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
-	return (0);
-}
-
 /*
  * Reconcile kernel and user view of the transmit ring.
  *
@@ -1111,20 +941,18 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
 	struct netmap_adapter *na = kring->na;
 	struct netmap_ring *ring = kring->ring;
 	uint32_t nm_i;	/* index into the netmap ring */
-	uint32_t nic_i;	/* index into the NIC ring */
+	uint32_t nic_i, nic_i_start;	/* index into the NIC ring */
 	u_int i, n;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
-	struct if_rxd_info *ri;
-	struct if_rxd_update *iru;
+	struct if_rxd_info ri;
+	struct if_rxd_update iru;
 
 	struct ifnet *ifp = na->ifp;
 	if_ctx_t ctx = ifp->if_softc;
 	iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
 	iflib_fl_t fl = rxq->ifr_fl;
-	ri = &rxq->ifr_ri;
-	iru = &rxq->ifr_iru;
 	if (head > lim)
 		return netmap_ring_reinit(kring);
 
@@ -1160,14 +988,14 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
 			nm_i = netmap_idx_n2k(kring, nic_i);
 			avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX);
 			for (n = 0; avail > 0; n++, avail--) {
-				rxd_info_zero(ri);
-				ri->iri_frags = rxq->ifr_frags;
-				ri->iri_qsidx = kring->ring_id;
-				ri->iri_ifp = ctx->ifc_ifp;
-				ri->iri_cidx = nic_i;
+				rxd_info_zero(&ri);
+				ri.iri_frags = rxq->ifr_frags;
+				ri.iri_qsidx = kring->ring_id;
+				ri.iri_ifp = ctx->ifc_ifp;
+				ri.iri_cidx = nic_i;
 
-				error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, ri);
-				ring->slot[nm_i].len = error ? 0 : ri->iri_len - crclen;
+				error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
+				ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen;
 				ring->slot[nm_i].flags = slot_flags;
 				if (fl->ifl_sds.ifsd_map)
 					bus_dmamap_sync(fl->ifl_ifdi->idi_tag,
@@ -1200,7 +1028,63 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
 	if (nm_i == head)
 		return (0);
 
-	return (netmap_fl_refill(rxq, kring, nm_i, false));
+	iru.iru_paddrs = fl->ifl_bus_addrs;
+	iru.iru_vaddrs = &fl->ifl_vm_addrs[0];
+	iru.iru_idxs = fl->ifl_rxd_idxs;
+	iru.iru_qsidx = rxq->ifr_id;
+	iru.iru_buf_size = fl->ifl_buf_size;
+	iru.iru_flidx = fl->ifl_id;
+	nic_i_start = nic_i = netmap_idx_k2n(kring, nm_i);
+	for (i = 0; nm_i != head; i++) {
+		struct netmap_slot *slot = &ring->slot[nm_i];
+		void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]);
+
+		if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
+			goto ring_reset;
+
+		fl->ifl_vm_addrs[i] = addr;
+		if (fl->ifl_sds.ifsd_map && (slot->flags & NS_BUF_CHANGED)) {
+			/* buffer has changed, reload map */
+			netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], addr);
+		}
+		slot->flags &= ~NS_BUF_CHANGED;
+
+		nm_i = nm_next(nm_i, lim);
+		fl->ifl_rxd_idxs[i] = nic_i = nm_next(nic_i, lim);
+		if (nm_i != head && i < IFLIB_MAX_RX_REFRESH)
+			continue;
+
+		iru.iru_pidx = nic_i_start;
+		iru.iru_count = i;
+		i = 0;
+		ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
+		if (fl->ifl_sds.ifsd_map == NULL) {
+			nic_i_start = nic_i;
+			continue;
+		}
+		nic_i = nic_i_start;
+		for (n = 0; n < iru.iru_count; n++) {
+			bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i],
+					BUS_DMASYNC_PREREAD);
+			nic_i = nm_next(nic_i, lim);
+		}
+		nic_i_start = nic_i;
+	}
+	kring->nr_hwcur = head;
+
+	if (fl->ifl_sds.ifsd_map)
+		bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
+				BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+	/*
+	 * IMPORTANT: we must leave one free slot in the ring,
+	 * so move nic_i back by one unit
+	 */
+	nic_i = nm_prev(nic_i, lim);
+	ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
+	return 0;
+
+ring_reset:
+	return netmap_ring_reinit(kring);
 }
 
 static void
@@ -1209,12 +1093,13 @@ iflib_netmap_intr(struct netmap_adapter *na, int onoff)
 	struct ifnet *ifp = na->ifp;
 	if_ctx_t ctx = ifp->if_softc;
 
-	/* XXX - do we need synchronization here?*/
+	CTX_LOCK(ctx);
 	if (onoff) {
 		IFDI_INTR_ENABLE(ctx);
 	} else {
 		IFDI_INTR_DISABLE(ctx);
 	}
+	CTX_UNLOCK(ctx);
 }
 
 
@@ -1271,15 +1156,55 @@ static void
 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
 {
 	struct netmap_adapter *na = NA(ctx->ifc_ifp);
-	struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
 	struct netmap_slot *slot;
-	uint32_t nm_i;
+	struct if_rxd_update iru;
+	iflib_fl_t fl;
+	bus_dmamap_t *map;
+	int nrxd;
+	uint32_t i, j, pidx_start;
 
 	slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
 	if (slot == NULL)
 		return;
-	nm_i = netmap_idx_n2k(kring, 0);
-	netmap_fl_refill(rxq, kring, nm_i, true);
+	fl = &rxq->ifr_fl[0];
+	map = fl->ifl_sds.ifsd_map;
+	nrxd = ctx->ifc_softc_ctx.isc_nrxd[0];
+	iru.iru_paddrs = fl->ifl_bus_addrs;
+	iru.iru_vaddrs = &fl->ifl_vm_addrs[0];
+	iru.iru_idxs = fl->ifl_rxd_idxs;
+	iru.iru_qsidx = rxq->ifr_id;
+	iru.iru_buf_size = rxq->ifr_fl[0].ifl_buf_size;
+	iru.iru_flidx = 0;
+
+	for (pidx_start = i = j = 0; i < nrxd; i++, j++) {
+		int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i);
+		void *addr;
+
+		fl->ifl_rxd_idxs[j] = i;
+		addr = fl->ifl_vm_addrs[j] = PNMB(na, slot + sj, &fl->ifl_bus_addrs[j]);
+		if (map) {
+			netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, *map, addr);
+			map++;
+		}
+
+		if (j < IFLIB_MAX_RX_REFRESH && i < nrxd - 1)
+			continue;
+
+		iru.iru_pidx = pidx_start;
+		pidx_start = i;
+		iru.iru_count = j;
+		j = 0;
+		MPASS(pidx_start + j <= nrxd);
+		/* Update descriptors and the cached value */
+		ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
+	}
+	/* preserve queue */
+	if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) {
+		struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id];
+		int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
+		ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t);
+	} else
+		ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1);
 }
 
 #define iflib_netmap_detach(ifp) netmap_detach(ifp)
@@ -1301,17 +1226,8 @@ prefetch(void *x)
 {
 	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
 }
-static __inline void
-prefetch2(void *x)
-{
-	__asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
-#if (CACHE_LINE_SIZE < 128)
-	__asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long)))));
-#endif
-}
 #else
 #define prefetch(x)
-#define prefetch2(x)
 #endif
 
 static void
@@ -1427,25 +1343,6 @@ iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count)
 		iflib_dma_free(*dmaiter);
 }
 
-static void
-txq_validate(iflib_txq_t txq) {
-#ifdef INVARIANTS
-	uint32_t cidx = txq->ift_cidx;
-	struct mbuf **ifsd_m = txq->ift_sds.ifsd_m;
-	if (txq->ift_pidx > cidx) {
-		int i;
-		for (i = txq->ift_pidx; i < txq->ift_size; i++)
-			MPASS(ifsd_m[i] == NULL);
-		for (i = 0; i < cidx; i++)
-			MPASS(ifsd_m[i] == NULL);
-	} else if (txq->ift_pidx < cidx) {
-		int i;
-		for (i = txq->ift_pidx; i < cidx; i++)
-			MPASS(ifsd_m[i] == NULL);
-	}
-#endif
-}
-
 #ifdef EARLY_AP_STARTUP
 static const int iflib_started = 1;
 #else
@@ -1474,7 +1371,6 @@ iflib_fast_intr(void *arg)
 {
 	iflib_filter_info_t info = arg;
 	struct grouptask *gtask = info->ifi_task;
-
 	if (!iflib_started)
 		return (FILTER_HANDLED);
 
@@ -1486,35 +1382,6 @@ iflib_fast_intr(void *arg)
 	return (FILTER_HANDLED);
 }
 
-static int
-iflib_fast_intr_rx(void *arg)
-{
-	iflib_filter_info_t info = arg;
-	struct grouptask *gtask = info->ifi_task;
-	iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx;
-	if_ctx_t ctx;
-	int cidx;
-
-	if (!iflib_started)
-		return (FILTER_HANDLED);
-
-	DBG_COUNTER_INC(fast_intrs);
-	if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
-		return (FILTER_HANDLED);
-
-	ctx = rxq->ifr_ctx;
-	if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ)
-		cidx = rxq->ifr_cq_cidx;
-	else
-		cidx = rxq->ifr_fl[0].ifl_cidx;
-	if (iflib_rxd_avail(ctx, rxq, cidx, 1))
-		GROUPTASK_ENQUEUE(gtask);
-	else
-		IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
-	return (FILTER_HANDLED);
-}
-
-
 static int
 iflib_fast_intr_rxtx(void *arg)
 {
@@ -1531,10 +1398,11 @@ iflib_fast_intr_rxtx(void *arg)
 	if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED)
 		return (FILTER_HANDLED);
 
-	ctx = rxq->ifr_ctx;
 	for (i = 0; i < rxq->ifr_ntxqirq; i++) {
 		qidx_t txqid = rxq->ifr_txqid[i];
 
+		ctx = rxq->ifr_ctx;
+
 		if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) {
 			IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
 			continue;
@@ -2106,33 +1974,20 @@ iflib_fl_bufs_free(iflib_fl_t fl)
 			if (*sd_cl != NULL)
 				uma_zfree(fl->ifl_zone, *sd_cl);
 			*sd_flags = 0;
-		} else if (*sd_flags & RX_NETMAP_INUSE) {
-			if (fl->ifl_sds.ifsd_map != NULL) {
-				bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i];
-				bus_dmamap_unload(fl->ifl_desc_tag, sd_map);
-				bus_dmamap_destroy(fl->ifl_desc_tag, sd_map);
-			}
-			*sd_flags = 0;
-			MPASS(*sd_cl == NULL);
-			MPASS(*sd_m == NULL);
 		} else {
 			MPASS(*sd_cl == NULL);
 			MPASS(*sd_m == NULL);
 		}
-
 #if MEMORY_LOGGING
-		if (*sd_m != NULL)
-			fl->ifl_m_dequeued++;
-		if (*sd_cl != NULL)
-			fl->ifl_cl_dequeued++;
+		fl->ifl_m_dequeued++;
+		fl->ifl_cl_dequeued++;
 #endif
 		*sd_cl = NULL;
 		*sd_m = NULL;
 	}
 #ifdef INVARIANTS
 	for (i = 0; i < fl->ifl_size; i++) {
-		KASSERT(fl->ifl_sds.ifsd_flags[i] == 0, ("fl->ifl_sds.ifsd_flags[%d]=0x%x, expected 0",
-							 i, fl->ifl_sds.ifsd_flags[i]));
+		MPASS(fl->ifl_sds.ifsd_flags[i] == 0);
 		MPASS(fl->ifl_sds.ifsd_cl[i] == NULL);
 		MPASS(fl->ifl_sds.ifsd_m[i] == NULL);
 	}
@@ -2156,7 +2011,7 @@ iflib_fl_setup(iflib_fl_t fl)
 	if_ctx_t ctx = rxq->ifr_ctx;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 
-	bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size-1);
+	bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size);
 	/*
 	** Free current RX buffer structs and their mbufs
 	*/
@@ -2235,19 +2090,6 @@ iflib_rx_sds_free(iflib_rxq_t rxq)
 	}
 }
 
-/* CONFIG context only */
-static void
-iflib_handle_hang(if_ctx_t ctx, void  *arg __unused)
-{
-
-	CTX_LOCK(ctx);
-	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
-	IFDI_WATCHDOG_RESET(ctx);
-	ctx->ifc_watchdog_events++;
-	iflib_if_init_locked(ctx);
-	CTX_UNLOCK(ctx);
-}
-
 /*
  * MI independent logic
  *
@@ -2255,49 +2097,46 @@ iflib_handle_hang(if_ctx_t ctx, void  *arg __unused)
 static void
 iflib_timer(void *arg)
 {
-	iflib_txq_t txq_i, txq = arg;
+	iflib_txq_t txq = arg;
 	if_ctx_t ctx = txq->ift_ctx;
+	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
-	/* handle any laggards */
-	if (txq->ift_db_pending)
-		GROUPTASK_ENQUEUE(&txq->ift_task);
-	IFDI_TIMER(ctx, txq->ift_id);
-
-	if (ifmp_ring_is_stalled(txq->ift_br) &&
-	    txq->ift_cleaned_prev == txq->ift_cleaned)
-		txq->ift_stall_count++;
-	txq->ift_cleaned_prev = txq->ift_cleaned;
-	if (txq->ift_stall_count > 2) {
-		txq->ift_qstatus = IFLIB_QUEUE_HUNG;
-		device_printf(ctx->ifc_dev,  "TX(%d) desc avail = %d, pidx = %d\n",
-			      txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
-	}
-	if (txq->ift_id != 0) {
-		if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)
-		callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer,
-				 txq, txq->ift_timer.c_cpu);
-		return;
-	}
 	/*
 	** Check on the state of the TX queue(s), this
 	** can be done without the lock because its RO
 	** and the HUNG state will be static if set.
 	*/
-	txq_i = ctx->ifc_txqs;
-	for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq_i++) {
-		if (txq_i->ift_qstatus == IFLIB_QUEUE_HUNG) {
-			iflib_config_async_gtask_dispatch(ctx, iflib_handle_hang, "hang handler", txq);
-			/* init will reset the callout */
-			return;
-		}
-	}
+	IFDI_TIMER(ctx, txq->ift_id);
+	if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
+	    ((txq->ift_cleaned_prev == txq->ift_cleaned) ||
+	     (sctx->isc_pause_frames == 0)))
+		goto hung;
 
+	if (ifmp_ring_is_stalled(txq->ift_br))
+		txq->ift_qstatus = IFLIB_QUEUE_HUNG;
+	txq->ift_cleaned_prev = txq->ift_cleaned;
+	/* handle any laggards */
+	if (txq->ift_db_pending)
+		GROUPTASK_ENQUEUE(&txq->ift_task);
 
+	sctx->isc_pause_frames = 0;
 	if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 
-		callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer,
-		    txq, txq->ift_timer.c_cpu);
+		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
+	return;
+hung:
+	CTX_LOCK(ctx);
+	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
+	device_printf(ctx->ifc_dev,  "TX(%d) desc avail = %d, pidx = %d\n",
+				  txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
+
+	IFDI_WATCHDOG_RESET(ctx);
+	ctx->ifc_watchdog_events++;
+
+	ctx->ifc_flags |= IFC_DO_RESET;
+	iflib_admin_intr_deferred(ctx);
+	CTX_UNLOCK(ctx);
 }
 
 static void
@@ -2309,10 +2148,8 @@ iflib_init_locked(if_ctx_t ctx)
 	iflib_fl_t fl;
 	iflib_txq_t txq;
 	iflib_rxq_t rxq;
-	int i, j, tx_ip_csum_flags, tx_ip6_csum_flags, running, reset;
+	int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
 
-	running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING);
-	reset = !!(ctx->ifc_flags & IFC_DO_RESET);
 
 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 	IFDI_INTR_DISABLE(ctx);
@@ -2336,20 +2173,19 @@ iflib_init_locked(if_ctx_t ctx)
 		CALLOUT_UNLOCK(txq);
 		iflib_netmap_txq_init(ctx, txq);
 	}
+	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
+		MPASS(rxq->ifr_id == i);
+		iflib_netmap_rxq_init(ctx, rxq);
+	}
 #ifdef INVARIANTS
 	i = if_getdrvflags(ifp);
 #endif
 	IFDI_INIT(ctx);
 	MPASS(if_getdrvflags(ifp) == i);
-	if (!running && reset)
-		return;
 	for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
 		/* XXX this should really be done on a per-queue basis */
-		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
-			MPASS(rxq->ifr_id == i);
-			iflib_netmap_rxq_init(ctx, rxq);
+		if (if_getcapenable(ifp) & IFCAP_NETMAP)
 			continue;
-		}
 		for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
 			if (iflib_fl_setup(fl)) {
 				device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n");
@@ -2362,11 +2198,10 @@ iflib_init_locked(if_ctx_t ctx)
 	IFDI_INTR_ENABLE(ctx);
 	txq = ctx->ifc_txqs;
 	for (i = 0; i < sctx->isc_ntxqsets; i++, txq++)
-		callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer,
-			txq, txq->ift_timer.c_cpu);
+		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq,
+			txq->ift_timer.c_cpu);
 }
 
-/* CONFIG context only */
 static int
 iflib_media_change(if_t ifp)
 {
@@ -2380,19 +2215,17 @@ iflib_media_change(if_t ifp)
 	return (err);
 }
 
-/* CONFIG context only */
 static void
 iflib_media_status(if_t ifp, struct ifmediareq *ifmr)
 {
 	if_ctx_t ctx = if_getsoftc(ifp);
 
-	iflib_admin_intr_deferred(ctx);
 	CTX_LOCK(ctx);
+	IFDI_UPDATE_ADMIN_STATUS(ctx);
 	IFDI_MEDIA_STATUS(ctx, ifmr);
 	CTX_UNLOCK(ctx);
 }
 
-/* CONFIG context only */
 static void
 iflib_stop(if_ctx_t ctx)
 {
@@ -2407,7 +2240,9 @@ iflib_stop(if_ctx_t ctx)
 	if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 
 	IFDI_INTR_DISABLE(ctx);
+	DELAY(1000);
 	IFDI_STOP(ctx);
+	DELAY(1000);
 
 	iflib_debug_reset();
 	/* Wait for current tx queue users to exit to disarm watchdog timer. */
@@ -2420,13 +2255,11 @@ iflib_stop(if_ctx_t ctx)
 		for (j = 0; j < txq->ift_size; j++) {
 			iflib_txsd_free(ctx, txq, j);
 		}
-		/* XXX please rewrite to simply bzero this range */
-		txq->ift_processed = txq->ift_cleaned = txq->ift_cleaned_prev = 0;
-		txq->ift_stall_count = txq->ift_cidx_processed = 0;
-		txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = 0;
+		txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
+		txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
 		txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
 		txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
-		txq->ift_no_desc_avail = txq->ift_pullups = 0;
+		txq->ift_pullups = 0;
 		ifmp_ring_reset_stats(txq->ift_br);
 		for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++)
 			bzero((void *)di->idi_vaddr, di->idi_size);
@@ -2569,9 +2402,6 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd)
 		}
 		cl = *sd->ifsd_cl;
 		*sd->ifsd_cl = NULL;
-#if MEMORY_LOGGING
-		sd->ifsd_fl->ifl_cl_dequeued++;
-#endif
 
 		/* Can these two be made one ? */
 		m_init(m, M_NOWAIT, MT_DATA, flags);
@@ -2641,12 +2471,20 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
 	 * XXX early demux data packets so that if_input processing only handles
 	 * acks in interrupt context
 	 */
-	struct mbuf *m, *mh, *mt, *mf;
+	struct mbuf *m, *mh, *mt;
 
 	ifp = ctx->ifc_ifp;
+#ifdef DEV_NETMAP
+	if (ifp->if_capenable & IFCAP_NETMAP) {
+		u_int work = 0;
+		if (netmap_rx_irq(ifp, rxq->ifr_id, &work))
+			return (FALSE);
+	}
+#endif
+
 	mh = mt = NULL;
 	MPASS(budget > 0);
-	rx_pkts = rx_bytes = 0;
+	rx_pkts	= rx_bytes = 0;
 	if (sctx->isc_flags & IFLIB_HAS_RXCQ)
 		cidxp = &rxq->ifr_cq_cidx;
 	else
@@ -2709,14 +2547,11 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
 	}
 	/* make sure that we can refill faster than drain */
 	for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
-		__iflib_fl_refill_lt(ctx, fl, 2*budget + 8);
+		__iflib_fl_refill_lt(ctx, fl, budget + 8);
 
 	lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
-	mt = mf = NULL;
 	while (mh != NULL) {
 		m = mh;
-		if (mf == NULL)
-			mf = m;
 		mh = mh->m_nextpkt;
 		m->m_nextpkt = NULL;
 #ifndef __NO_STRICT_ALIGNMENT
@@ -2726,25 +2561,15 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
 		rx_bytes += m->m_pkthdr.len;
 		rx_pkts++;
 #if defined(INET6) || defined(INET)
-		if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) {
-			if (mf == m)
-				mf = NULL;
+		if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
 			continue;
-		}
 #endif
-		if (mt != NULL)
-			mt->m_nextpkt = m;
-		mt = m;
-	}
-	if (mf != NULL) {
-		ifp->if_input(ifp, mf);
 		DBG_COUNTER_INC(rx_if_input);
+		ifp->if_input(ifp, m);
 	}
 
-	if (rx_pkts) {
-		if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
-		if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
-	}
+	if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
+	if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
 
 	/*
 	 * Flush any outstanding LRO work
@@ -2752,9 +2577,14 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
 #if defined(INET6) || defined(INET)
 	tcp_lro_flush_all(&rxq->ifr_lc);
 #endif
-	return (avail || iflib_rxd_avail(ctx, rxq, *cidxp, 1));
+	if (avail)
+		return true;
+	return (iflib_rxd_avail(ctx, rxq, *cidxp, 1));
 err:
-	iflib_admin_reset_deferred(ctx);
+	CTX_LOCK(ctx);
+	ctx->ifc_flags |= IFC_DO_RESET;
+	iflib_admin_intr_deferred(ctx);
+	CTX_UNLOCK(ctx);
 	return (false);
 }
 
@@ -2841,19 +2671,20 @@ print_pkt(if_pkt_info_t pi)
 static int
 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 {
-	if_ctx_t ctx = txq->ift_ctx;
-#ifdef INET
-	if_shared_ctx_t sctx = ctx->ifc_sctx;
-#endif
-	if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
+	if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
 	struct ether_vlan_header *eh;
 	struct mbuf *m, *n;
-	int err;
 
-	if (scctx->isc_txrx->ift_txd_errata &&
-	    (err = scctx->isc_txrx->ift_txd_errata(ctx->ifc_softc, mp)))
-	    return (err);
 	n = m = *mp;
+	if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
+	    M_WRITABLE(m) == 0) {
+		if ((m = m_dup(m, M_NOWAIT)) == NULL) {
+			return (ENOMEM);
+		} else {
+			m_freem(*mp);
+			n = *mp = m;
+		}
+	}
 
 	/*
 	 * Determine where frame payload starts.
@@ -2874,10 +2705,6 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 		pi->ipi_ehdrlen = ETHER_HDR_LEN;
 	}
 
-	if (if_getmtu(txq->ift_ctx->ifc_ifp) >= pi->ipi_len) {
-		pi->ipi_csum_flags &= ~(CSUM_IP_TSO|CSUM_IP6_TSO);
-	}
-
 	switch (pi->ipi_etype) {
 #ifdef INET
 	case ETHERTYPE_IP:
@@ -2922,21 +2749,21 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 		pi->ipi_ipproto = ip->ip_p;
 		pi->ipi_flags |= IPI_TX_IPV4;
 
-		if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP))
+		if (pi->ipi_csum_flags & CSUM_IP)
                        ip->ip_sum = 0;
 
-		if (IS_TSO4(pi)) {
-			if (pi->ipi_ipproto == IPPROTO_TCP) {
-				if (__predict_false(th == NULL)) {
-					txq->ift_pullups++;
-					if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
-						return (ENOMEM);
-					th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
-				}
-				pi->ipi_tcp_hflags = th->th_flags;
-				pi->ipi_tcp_hlen = th->th_off << 2;
-				pi->ipi_tcp_seq = th->th_seq;
+		if (pi->ipi_ipproto == IPPROTO_TCP) {
+			if (__predict_false(th == NULL)) {
+				txq->ift_pullups++;
+				if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
+					return (ENOMEM);
+				th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
 			}
+			pi->ipi_tcp_hflags = th->th_flags;
+			pi->ipi_tcp_hlen = th->th_off << 2;
+			pi->ipi_tcp_seq = th->th_seq;
+		}
+		if (IS_TSO4(pi)) {
 			if (__predict_false(ip->ip_p != IPPROTO_TCP))
 				return (ENXIO);
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
@@ -2967,15 +2794,15 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
 		pi->ipi_ipproto = ip6->ip6_nxt;
 		pi->ipi_flags |= IPI_TX_IPV6;
 
-		if (IS_TSO6(pi)) {
-			if (pi->ipi_ipproto == IPPROTO_TCP) {
-				if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
-					if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
-						return (ENOMEM);
-				}
-				pi->ipi_tcp_hflags = th->th_flags;
-				pi->ipi_tcp_hlen = th->th_off << 2;
+		if (pi->ipi_ipproto == IPPROTO_TCP) {
+			if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
+				if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
+					return (ENOMEM);
 			}
+			pi->ipi_tcp_hflags = th->th_flags;
+			pi->ipi_tcp_hlen = th->th_off << 2;
+		}
+		if (IS_TSO6(pi)) {
 
 			if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
 				return (ENXIO);
@@ -3084,9 +2911,9 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map,
 	ifsd_m = txq->ift_sds.ifsd_m;
 	ntxd = txq->ift_size;
 	pidx = txq->ift_pidx;
-	MPASS(ifsd_m[pidx] == NULL);
-	if (force_busdma || map != NULL) {
+	if (map != NULL) {
 		uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags;
+
 		err = bus_dmamap_load_mbuf_sg(tag, map,
 					      *m0, segs, nsegs, BUS_DMA_NOWAIT);
 		if (err)
@@ -3239,8 +3066,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
 			next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
 			prefetch(&txq->ift_sds.ifsd_flags[next]);
 		}
-	}
-	if (txq->ift_sds.ifsd_map != NULL)
+	} else if (txq->ift_sds.ifsd_map != NULL)
 		map = txq->ift_sds.ifsd_map[pidx];
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
@@ -3253,19 +3079,18 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
 	m_head = *m_headp;
 
 	pkt_info_zero(&pi);
-	pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
-	pi.ipi_pidx = pidx;
-	pi.ipi_qsidx = txq->ift_id;
 	pi.ipi_len = m_head->m_pkthdr.len;
+	pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
 	pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
 	pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0;
+	pi.ipi_pidx = pidx;
+	pi.ipi_qsidx = txq->ift_id;
 
 	/* deliberate bitwise OR to make one condition */
 	if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
 		if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0))
 			return (err);
 		m_head = *m_headp;
-		pi.ipi_hdr_data = mtod(m_head, caddr_t);
 	}
 
 retry:
@@ -3442,7 +3267,6 @@ iflib_tx_desc_free(iflib_txq_t txq, int n)
 			gen = 0;
 		}
 	}
-	txq_validate(txq);
 	txq->ift_cidx = cidx;
 	txq->ift_gen = gen;
 }
@@ -3492,10 +3316,10 @@ _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining)
 
 	prefetch(items[(cidx + offset) & (size-1)]);
 	if (remaining > 1) {
-		prefetch2(&items[next]);
-		prefetch2(items[(cidx + offset + 1) & (size-1)]);
-		prefetch2(items[(cidx + offset + 2) & (size-1)]);
-		prefetch2(items[(cidx + offset + 3) & (size-1)]);
+		prefetch(&items[next]);
+		prefetch(items[(cidx + offset + 1) & (size-1)]);
+		prefetch(items[(cidx + offset + 2) & (size-1)]);
+		prefetch(items[(cidx + offset + 3) & (size-1)]);
 	}
 	return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)]));
 }
@@ -3676,7 +3500,7 @@ _task_fn_tx(void *context)
 #endif
 	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
 		return;
-	if (if_getcapenable(ifp) & IFCAP_NETMAP) {
+	if ((ifp->if_capenable & IFCAP_NETMAP)) {
 		if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false))
 			netmap_tx_irq(ifp, txq->ift_id);
 		IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
@@ -3684,7 +3508,8 @@ _task_fn_tx(void *context)
 	}
 	if (txq->ift_db_pending)
 		ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE);
-	ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
+	else
+		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
 	if (ctx->ifc_flags & IFC_LEGACY)
 		IFDI_INTR_ENABLE(ctx);
 	else {
@@ -3700,7 +3525,6 @@ _task_fn_rx(void *context)
 	if_ctx_t ctx = rxq->ifr_ctx;
 	bool more;
 	int rc;
-	uint16_t budget;
 
 #ifdef IFLIB_DIAGNOSTICS
 	rxq->ifr_cpu_exec_count[curcpu]++;
@@ -3708,19 +3532,7 @@ _task_fn_rx(void *context)
 	DBG_COUNTER_INC(task_fn_rxs);
 	if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
 		return;
-	more = true;
-#ifdef DEV_NETMAP
-	if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) {
-		u_int work = 0;
-		if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) {
-			more = false;
-		}
-	}
-#endif
-	budget = ctx->ifc_sysctl_rx_budget;
-	if (budget == 0)
-		budget = 16;	/* XXX */
-	if (more == false || (more = iflib_rxeof(rxq, budget)) == false) {
+	if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) {
 		if (ctx->ifc_flags & IFC_LEGACY)
 			IFDI_INTR_ENABLE(ctx);
 		else {
@@ -3735,44 +3547,43 @@ _task_fn_rx(void *context)
 		GROUPTASK_ENQUEUE(&rxq->ifr_task);
 }
 
-/* CONFIG context only */
 static void
 _task_fn_admin(void *context)
 {
 	if_ctx_t ctx = context;
 	if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
 	iflib_txq_t txq;
-	int i, running;
+	int i;
+
+	if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) {
+		if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
+			return;
+		}
+	}
 
 	CTX_LOCK(ctx);
-	running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING);
-
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
 		CALLOUT_LOCK(txq);
 		callout_stop(&txq->ift_timer);
 		CALLOUT_UNLOCK(txq);
 	}
-	if (running) {
-		for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
-			callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer,
-			    txq, txq->ift_timer.c_cpu);
-		IFDI_LINK_INTR_ENABLE(ctx);
-	}
-	if (ctx->ifc_flags & IFC_DO_RESET) {
-		iflib_if_init_locked(ctx);
-		ctx->ifc_flags &= ~IFC_DO_RESET;
-	}
 	IFDI_UPDATE_ADMIN_STATUS(ctx);
+	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
+		callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu);
+	IFDI_LINK_INTR_ENABLE(ctx);
+	if (ctx->ifc_flags & IFC_DO_RESET) {
+		ctx->ifc_flags &= ~IFC_DO_RESET;
+		iflib_if_init_locked(ctx);
+	}
 	CTX_UNLOCK(ctx);
 
-	if (LINK_ACTIVE(ctx) == 0 || !running)
+	if (LINK_ACTIVE(ctx) == 0)
 		return;
 	for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
 		iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
 }
 
 
-/* CONFIG context only */
 static void
 _task_fn_iov(void *context)
 {
@@ -3887,20 +3698,21 @@ iflib_if_transmit(if_t ifp, struct mbuf *m)
 	DBG_COUNTER_INC(tx_seen);
 	err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE);
 
-	GROUPTASK_ENQUEUE(&txq->ift_task);
 	if (err) {
+		GROUPTASK_ENQUEUE(&txq->ift_task);
 		/* support forthcoming later */
 #ifdef DRIVER_BACKPRESSURE
 		txq->ift_closed = TRUE;
 #endif
 		ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
 		m_freem(m);
+	} else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) {
+		GROUPTASK_ENQUEUE(&txq->ift_task);
 	}
 
 	return (err);
 }
 
-/* CONFIG context only */
 static void
 iflib_if_qflush(if_t ifp)
 {
@@ -3984,12 +3796,29 @@ iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
 		CTX_UNLOCK(ctx);
 		break;
 	case SIOCSIFFLAGS:
-		err = async_if_ioctl(ctx, command, data);
+		CTX_LOCK(ctx);
+		if (if_getflags(ifp) & IFF_UP) {
+			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+				if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
+				    (IFF_PROMISC | IFF_ALLMULTI)) {
+					err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
+				}
+			} else
+				reinit = 1;
+		} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
+			iflib_stop(ctx);
+		}
+		ctx->ifc_if_flags = if_getflags(ifp);
+		CTX_UNLOCK(ctx);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
-			err = async_if_ioctl(ctx, command, data);
+			CTX_LOCK(ctx);
+			IFDI_INTR_DISABLE(ctx);
+			IFDI_MULTI_SET(ctx);
+			IFDI_INTR_ENABLE(ctx);
+			CTX_UNLOCK(ctx);
 		}
 		break;
 	case SIOCSIFMEDIA:
@@ -4083,7 +3912,6 @@ iflib_if_get_counter(if_t ifp, ift_counter cnt)
  *
  **********************************************************************/
 
-/* CONFIG context only */
 static void
 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag)
 {
@@ -4103,7 +3931,6 @@ iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag)
 	CTX_UNLOCK(ctx);
 }
 
-/* CONFIG context only */
 static void
 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag)
 {
@@ -4123,7 +3950,6 @@ iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag)
 	CTX_UNLOCK(ctx);
 }
 
-/* CONFIG context only */
 static void
 iflib_led_func(void *arg, int onoff)
 {
@@ -4268,10 +4094,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 			scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
 		}
 	}
-	CTX_LOCK(ctx);
-	err = IFDI_ATTACH_PRE(ctx);
-	CTX_UNLOCK(ctx);
-	if (err) {
+
+	if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
 		return (err);
 	}
@@ -4299,8 +4123,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 	/* set unconditionally for !x86 */
 	ctx->ifc_flags |= IFC_DMAR;
 #endif
-	if (force_busdma)
-		ctx->ifc_flags |= IFC_DMAR;
 
 	msix_bar = scctx->isc_msix_bar;
 	main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
@@ -4313,7 +4135,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 		if (!powerof2(scctx->isc_nrxd[i])) {
 			/* round down instead? */
 			device_printf(dev, "# rx descriptors must be a power of 2\n");
-
 			err = EINVAL;
 			goto fail;
 		}
@@ -4352,7 +4173,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 
 	GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
 	/* XXX format name */
-	taskqgroup_attach(qgroup_if_config, &ctx->ifc_admin_task, ctx, -1, "admin");
+	taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin");
 	/*
 	** Now setup MSI or MSI/X, should
 	** return us the number of supported
@@ -4411,10 +4232,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 		}
 	}
 	ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac);
-	CTX_LOCK(ctx);
-	err = IFDI_ATTACH_POST(ctx);
-	CTX_UNLOCK(ctx);
-	if (err) {
+	if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
 		device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
 		goto fail_detach;
 	}
@@ -4426,7 +4244,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 
 	if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
 	iflib_add_device_sysctl_post(ctx);
-	iflib_ctx_insert(ctx);
 	ctx->ifc_flags |= IFC_INIT_DONE;
 	return (0);
 fail_detach:
@@ -4437,9 +4254,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct
 fail_queues:
 	/* XXX free queues */
 fail:
-	CTX_LOCK(ctx);
 	IFDI_DETACH(ctx);
-	CTX_UNLOCK(ctx);
 	return (err);
 }
 
@@ -4487,10 +4302,12 @@ iflib_device_deregister(if_ctx_t ctx)
 
 	iflib_netmap_detach(ifp);
 	ether_ifdetach(ifp);
+	/* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
+	CTX_LOCK_DESTROY(ctx);
 	if (ctx->ifc_led_dev != NULL)
 		led_destroy(ctx->ifc_led_dev);
 	/* XXX drain any dependent tasks */
-	tqg = qgroup_if_io;
+	tqg = qgroup_if_io_tqg;
 	for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
 		callout_drain(&txq->ift_timer);
 		if (txq->ift_task.gt_uniq != NULL)
@@ -4504,16 +4321,13 @@ iflib_device_deregister(if_ctx_t ctx)
 			free(fl->ifl_rx_bitmap, M_IFLIB);
 			
 	}
-	tqg = qgroup_if_config;
+	tqg = qgroup_if_config_tqg;
 	if (ctx->ifc_admin_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_admin_task);
 	if (ctx->ifc_vflr_task.gt_uniq != NULL)
 		taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
 
-	CTX_LOCK(ctx);
 	IFDI_DETACH(ctx);
-	CTX_UNLOCK(ctx);
-	CTX_LOCK_DESTROY(ctx);
 	device_set_softc(ctx->ifc_dev, NULL);
 	if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
 		pci_release_msi(dev);
@@ -4534,7 +4348,6 @@ iflib_device_deregister(if_ctx_t ctx)
 	iflib_rx_structures_free(ctx);
 	if (ctx->ifc_flags & IFC_SC_ALLOCATED)
 		free(ctx->ifc_softc, M_IFLIB);
-	iflib_ctx_remove(ctx);
 	free(ctx, M_IFLIB);
 	return (0);
 }
@@ -4630,14 +4443,13 @@ iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params)
  *
  **********************************************************************/
 
+/*
+ * - Start a fast taskqueue thread for each core
+ * - Start a taskqueue for control operations
+ */
 static int
 iflib_module_init(void)
 {
-
-	iflib_timer_int = hz / 2;
-	TUNABLE_INT_FETCH("net.iflib.timer_int", &iflib_timer_int);
-	LIST_INIT(&ctx_list);
-	mtx_init(&ctx_list_lock, "ctx list", NULL, MTX_DEF);
 	return (0);
 }
 
@@ -5081,124 +4893,25 @@ iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
 	return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
 }
 
-#ifdef SMP
 static int
-find_nth(if_ctx_t ctx, int qid)
+find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid)
 {
-	cpuset_t cpus;
 	int i, cpuid, eqid, count;
 
-	CPU_COPY(&ctx->ifc_cpus, &cpus);
+	CPU_COPY(&ctx->ifc_cpus, cpus);
 	count = CPU_COUNT(&ctx->ifc_cpus);
 	eqid = qid % count;
 	/* clear up to the qid'th bit */
 	for (i = 0; i < eqid; i++) {
-		cpuid = CPU_FFS(&cpus);
+		cpuid = CPU_FFS(cpus);
 		MPASS(cpuid != 0);
-		CPU_CLR(cpuid-1, &cpus);
+		CPU_CLR(cpuid-1, cpus);
 	}
-	cpuid = CPU_FFS(&cpus);
+	cpuid = CPU_FFS(cpus);
 	MPASS(cpuid != 0);
 	return (cpuid-1);
 }
 
-static int
-find_child_with_core(int cpu, struct cpu_group *grp)
-{
-	int i;
-
-	if (grp->cg_children == 0)
-		return -1;
-
-	MPASS(grp->cg_child);
-	for (i = 0; i < grp->cg_children; i++) {
-		if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
-			return i;
-	}
-
-	return -1;
-}
-
-/*
- * Find the nth thread on the specified core
- */
-static int
-find_thread(int cpu, int thread_num)
-{
-	struct cpu_group *grp;
-	int i;
-	cpuset_t cs;
-
-	grp = smp_topo();
-	if (grp == NULL)
-		return cpu;
-	i = 0;
-	while ((i = find_child_with_core(cpu, grp)) != -1) {
-		/* If the child only has one cpu, don't descend */
-		if (grp->cg_child[i].cg_count <= 1)
-			break;
-		grp = &grp->cg_child[i];
-	}
-
-	/* If they don't share at least an L2 cache, use the same CPU */
-	if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
-		return cpu;
-
-	/* Now pick one */
-	CPU_COPY(&grp->cg_mask, &cs);
-	for (i = thread_num % grp->cg_count; i > 0; i--) {
-		MPASS(CPU_FFS(&cs));
-		CPU_CLR(CPU_FFS(&cs) - 1, &cs);
-	}
-	MPASS(CPU_FFS(&cs));
-	return CPU_FFS(&cs) - 1;
-}
-
-static int
-get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid)
-{
-	switch (type) {
-	case IFLIB_INTR_TX:
-		/* TX queues get threads on the same core as the corresponding RX queue */
-		/* XXX handle multiple RX threads per core and more than two threads per core */
-		return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
-	case IFLIB_INTR_RX:
-	case IFLIB_INTR_RXTX:
-		/* RX queues get the first thread on their core */
-		return qid / CPU_COUNT(&ctx->ifc_cpus);
-	default:
-		return -1;
-	}
-}
-#else
-#define get_thread_num(ctx, type, qid)	0
-#define find_thread(cpuid, tid)		0
-#define find_nth(ctx, gid)		0
-#endif
-
-/* Just to avoid copy/paste */
-static inline int
-iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid,
-    struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name)
-{
-	int cpuid;
-	int err, tid;
-
-	cpuid = find_nth(ctx, qid);
-	tid = get_thread_num(ctx, type, qid);
-	MPASS(tid >= 0);
-	cpuid = find_thread(cpuid, tid);
-	err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name);
-	if (err) {
-		device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err);
-		return (err);
-	}
-	if (cpuid > ctx->ifc_cpuid_highest)
-		ctx->ifc_cpuid_highest = cpuid;
-	MPASS(gtask->gt_taskqueue != NULL);
-	return 0;
-}
-
 int
 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 						iflib_intr_type_t type, driver_filter_t *filter,
@@ -5207,8 +4920,9 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 	struct grouptask *gtask;
 	struct taskqgroup *tqg;
 	iflib_filter_info_t info;
+	cpuset_t cpus;
 	gtask_fn_t *fn;
-	int tqrid, err;
+	int tqrid, err, cpuid;
 	driver_filter_t *intr_fast;
 	void *q;
 
@@ -5221,7 +4935,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 		q = &ctx->ifc_txqs[qid];
 		info = &ctx->ifc_txqs[qid].ift_filter_info;
 		gtask = &ctx->ifc_txqs[qid].ift_task;
-		tqg = qgroup_if_io;
+		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_tx;
 		intr_fast = iflib_fast_intr;
 		GROUPTASK_INIT(gtask, 0, fn, q);
@@ -5230,16 +4944,16 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 		q = &ctx->ifc_rxqs[qid];
 		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
-		tqg = qgroup_if_io;
+		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_rx;
-		intr_fast = iflib_fast_intr_rx;
+		intr_fast = iflib_fast_intr;
 		GROUPTASK_INIT(gtask, 0, fn, q);
 		break;
 	case IFLIB_INTR_RXTX:
 		q = &ctx->ifc_rxqs[qid];
 		info = &ctx->ifc_rxqs[qid].ifr_filter_info;
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
-		tqg = qgroup_if_io;
+		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_rx;
 		intr_fast = iflib_fast_intr_rxtx;
 		GROUPTASK_INIT(gtask, 0, fn, q);
@@ -5249,7 +4963,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 		tqrid = -1;
 		info = &ctx->ifc_filter_info;
 		gtask = &ctx->ifc_admin_task;
-		tqg = qgroup_if_config;
+		tqg = qgroup_if_config_tqg;
 		fn = _task_fn_admin;
 		intr_fast = iflib_fast_intr_ctx;
 		break;
@@ -5271,9 +4985,8 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
 		return (0);
 
 	if (tqrid != -1) {
-		err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name);
-		if (err)
-			return (err);
+		cpuid = find_nth(ctx, &cpus, qid);
+		taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name);
 	} else {
 		taskqgroup_attach(tqg, gtask, q, tqrid, name);
 	}
@@ -5288,25 +5001,24 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type,  void
 	struct taskqgroup *tqg;
 	gtask_fn_t *fn;
 	void *q;
-	int err;
 
 	switch (type) {
 	case IFLIB_INTR_TX:
 		q = &ctx->ifc_txqs[qid];
 		gtask = &ctx->ifc_txqs[qid].ift_task;
-		tqg = qgroup_if_io;
+		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_tx;
 		break;
 	case IFLIB_INTR_RX:
 		q = &ctx->ifc_rxqs[qid];
 		gtask = &ctx->ifc_rxqs[qid].ifr_task;
-		tqg = qgroup_if_io;
+		tqg = qgroup_if_io_tqg;
 		fn = _task_fn_rx;
 		break;
 	case IFLIB_INTR_IOV:
 		q = ctx;
 		gtask = &ctx->ifc_vflr_task;
-		tqg = qgroup_if_config;
+		tqg = qgroup_if_config_tqg;
 		rid = -1;
 		fn = _task_fn_iov;
 		break;
@@ -5314,14 +5026,7 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type,  void
 		panic("unknown net intr type");
 	}
 	GROUPTASK_INIT(gtask, 0, fn, q);
-	if (rid != -1) {
-		err = iflib_irq_set_affinity(ctx, rid, type, qid, gtask, tqg, q, name);
-		if (err)
-			taskqgroup_attach(tqg, gtask, q, rid, name);
-	}
-	else {
-		taskqgroup_attach(tqg, gtask, q, rid, name);
-	}
+	taskqgroup_attach(tqg, gtask, q, rid, name);
 }
 
 void
@@ -5351,7 +5056,7 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *
 	q = &ctx->ifc_rxqs[0];
 	info = &rxq[0].ifr_filter_info;
 	gtask = &rxq[0].ifr_task;
-	tqg = qgroup_if_io;
+	tqg = qgroup_if_io_tqg;
 	tqrid = irq->ii_rid = *rid;
 	fn = _task_fn_rx;
 
@@ -5368,7 +5073,7 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *
 	taskqgroup_attach(tqg, gtask, q, tqrid, name);
 
 	GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
-	taskqgroup_attach(qgroup_if_io, &txq->ift_task, txq, tqrid, "tx");
+	taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx");
 	return (0);
 }
 
@@ -5401,28 +5106,12 @@ iflib_admin_intr_deferred(if_ctx_t ctx)
 	struct grouptask *gtask;
 
 	gtask = &ctx->ifc_admin_task;
-	MPASS(gtask != NULL && gtask->gt_taskqueue != NULL);
+	MPASS(gtask->gt_taskqueue != NULL);
 #endif
 
 	GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
 }
 
-/* CONFIG context only */
-static void
-iflib_handle_reset(if_ctx_t ctx, void *arg)
-{
-	CTX_LOCK(ctx);
-	ctx->ifc_flags |= IFC_DO_RESET;
-	iflib_admin_intr_deferred(ctx);
-	CTX_UNLOCK(ctx);
-}
-
-static void
-iflib_admin_reset_deferred(if_ctx_t ctx)
-{
-	iflib_config_async_gtask_dispatch(ctx, iflib_handle_reset, "reset handler", NULL);
-}
-
 void
 iflib_iov_intr_deferred(if_ctx_t ctx)
 {
@@ -5434,7 +5123,7 @@ void
 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
 {
 
-	taskqgroup_attach_cpu(qgroup_if_io, gt, uniq, cpu, -1, name);
+	taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name);
 }
 
 void
@@ -5443,104 +5132,14 @@ iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn,
 {
 
 	GROUPTASK_INIT(gtask, 0, fn, ctx);
-	taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name);
+	taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name);
 }
 
-static void
-iflib_multi_set(if_ctx_t ctx, void *arg)
-{
-	CTX_LOCK(ctx);
-	IFDI_INTR_DISABLE(ctx);
-	IFDI_MULTI_SET(ctx);
-	IFDI_INTR_ENABLE(ctx);
-	CTX_UNLOCK(ctx);
-}
-
-static void
-iflib_flags_set(if_ctx_t ctx, void *arg)
-{
-	int reinit, err;
-	if_t ifp = ctx->ifc_ifp;
-
-	err = reinit = 0;
-	CTX_LOCK(ctx);
-	if (if_getflags(ifp) & IFF_UP) {
-		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
-			if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
-			    (IFF_PROMISC | IFF_ALLMULTI)) {
-				err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
-			}
-		} else
-			reinit = 1;
-	} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
-		iflib_stop(ctx);
-	}
-	ctx->ifc_if_flags = if_getflags(ifp);
-	if (reinit)
-		iflib_if_init_locked(ctx);
-	CTX_UNLOCK(ctx);
-	if (err)
-		log(LOG_WARNING, "IFDI_PROMISC_SET returned %d\n", err);
-}
-
-static void
-async_gtask(void *ctx)
-{
-	struct async_task_arg *at_arg = ctx;
-	if_ctx_t if_ctx = at_arg->ata_ctx;
-	void *arg = at_arg->ata_arg;
-
-	at_arg->ata_fn(if_ctx, arg);
-	taskqgroup_detach(qgroup_if_config, at_arg->ata_gtask);
-	free(at_arg->ata_gtask, M_IFLIB);
-}
-
-static int
-iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg)
-{
-	struct grouptask *gtask;
-	struct async_task_arg *at_arg;
-
-	if ((gtask = malloc(sizeof(struct grouptask) + sizeof(struct async_task_arg), M_IFLIB, M_NOWAIT|M_ZERO)) == NULL)
-		return (ENOMEM);
-
-	at_arg = (struct async_task_arg *)(gtask + 1);
-	at_arg->ata_fn = fn;
-	at_arg->ata_ctx = ctx;
-	at_arg->ata_arg = arg;
-	at_arg->ata_gtask = gtask;
-
-	GROUPTASK_INIT(gtask, 0, async_gtask, at_arg);
-	taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name);
-	GROUPTASK_ENQUEUE(gtask);
-	return (0);
-}
-
-static int
-async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data)
-{
-	int rc;
-
-	switch (command) {
-	case SIOCADDMULTI:
-	case SIOCDELMULTI:
-		rc = iflib_config_async_gtask_dispatch(ctx, iflib_multi_set, "async_if_multi", NULL);
-		break;
-	case SIOCSIFFLAGS:
-		rc = iflib_config_async_gtask_dispatch(ctx, iflib_flags_set, "async_if_flags", NULL);
-		break;
-	default:
-		panic("unknown command %lx", command);
-	}
-	return (rc);
-}
-
-
 void
 iflib_config_gtask_deinit(struct grouptask *gtask)
 {
 
-	taskqgroup_detach(qgroup_if_config, gtask);
+	taskqgroup_detach(qgroup_if_config_tqg, gtask);	
 }
 
 void
@@ -5607,11 +5206,11 @@ iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name,
 	    info, 0, iflib_sysctl_int_delay, "I", description);
 }
 
-struct sx *
+struct mtx *
 iflib_ctx_lock_get(if_ctx_t ctx)
 {
 
-	return (&ctx->ifc_sx);
+	return (&ctx->ifc_mtx);
 }
 
 static int
@@ -5731,22 +5330,13 @@ iflib_msix_init(if_ctx_t ctx)
 		rx_queues = min(rx_queues, tx_queues);
 	}
 
-	device_printf(dev, "trying %d rx queues %d tx queues \n", rx_queues, tx_queues);
+	device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues);
 
-	vectors = tx_queues + rx_queues + admincnt;
+	vectors = rx_queues + admincnt;
 	if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
 		device_printf(dev,
 					  "Using MSIX interrupts with %d vectors\n", vectors);
 		scctx->isc_vectors = vectors;
-
-		if (vectors < tx_queues + rx_queues + admincnt) {
-			vectors -= admincnt;
-			if (vectors % 2 != 0)
-				vectors -= 1;
-			if (rx_queues > vectors / 2)
-				rx_queues = vectors / 2;
-			tx_queues = vectors - rx_queues;
-		}
 		scctx->isc_nrxqsets = rx_queues;
 		scctx->isc_ntxqsets = tx_queues;
 		scctx->isc_intr = IFLIB_INTR_MSIX;
@@ -5881,12 +5471,9 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
 	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
 		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
                        "permit #txq != #rxq");
-	SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
+       SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
                       CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0,
                       "disable MSIX (default 0)");
-	SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
-		       CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0,
-                       "set the rx budget");
 
 	/* XXX change for per-queue sizes */
 	SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
@@ -5897,10 +5484,6 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx)
 		       CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
                        mp_ndesc_handler, "A",
                        "list of # of rx descriptors to use, 0 = use default #");
-
-       SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "watchdog_events",
-                      CTLFLAG_RD, &ctx->ifc_watchdog_events, 0,
-                      "Watchdog events seen since load");
 }
 
 static void
diff --git a/sys/net/iflib.h b/sys/net/iflib.h
index 8c7ebb4594aa..6ac75dbb0afb 100644
--- a/sys/net/iflib.h
+++ b/sys/net/iflib.h
@@ -119,7 +119,6 @@ typedef struct if_pkt_info {
 	qidx_t			ipi_pidx;	/* start pidx for encap */
 	qidx_t			ipi_new_pidx;	/* next available pidx post-encap */
 	/* offload handling */
-	caddr_t			ipi_hdr_data;	/* raw header */
 	uint8_t			ipi_ehdrlen;	/* ether header length */
 	uint8_t			ipi_ip_hlen;	/* ip header length */
 	uint8_t			ipi_tcp_hlen;	/* tcp header length */
@@ -184,7 +183,6 @@ typedef struct if_txrx {
 	void (*ift_rxd_refill) (void * , if_rxd_update_t iru);
 	void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx);
 	int (*ift_legacy_intr) (void *);
-	int (*ift_txd_errata) (void *, struct mbuf **mp);
 } *if_txrx_t;
 
 typedef struct if_softc_ctx {
@@ -296,9 +294,9 @@ typedef enum {
  */
 #define IFLIB_HAS_TXCQ		0x08
 /*
- *
+ * Interface does checksum in place
  */
-#define IFLIB_UNUSED___0	0x10
+#define IFLIB_NEED_SCRATCH	0x10
 /*
  * Interface doesn't expect in_pseudo for th_sum
  */
@@ -307,10 +305,6 @@ typedef enum {
  * Interface doesn't align IP header
  */
 #define IFLIB_DO_RX_FIXUP	0x40
-/*
- * Driver needs csum zeroed for offloading
- */
-#define IFLIB_NEED_ZERO_CSUM	0x80
 
 
 
@@ -387,7 +381,7 @@ int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, i
 void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count);
 
 
-struct sx *iflib_ctx_lock_get(if_ctx_t);
+struct mtx *iflib_ctx_lock_get(if_ctx_t);
 struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t);
 
 void iflib_led_create(if_ctx_t ctx);
diff --git a/sys/net/mp_ring.c b/sys/net/mp_ring.c
index e2e94e9087c8..3ff272c719ab 100644
--- a/sys/net/mp_ring.c
+++ b/sys/net/mp_ring.c
@@ -226,15 +226,11 @@ drain_ring_lockless(struct ifmp_ring *r, union ring_state os, uint16_t prev, int
 		if (cidx != pidx && pending < 64 && total < budget)
 			continue;
 		critical_enter();
-		os.state = ns.state = r->state;
-		ns.cidx = cidx;
-		ns.flags = state_to_flags(ns, total >= budget);
-		while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0) {
-			cpu_spinwait();
+		do {
 			os.state = ns.state = r->state;
 			ns.cidx = cidx;
 			ns.flags = state_to_flags(ns, total >= budget);
-		}
+		} while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0);
 		critical_exit();
 
 		if (ns.flags == ABDICATED)
@@ -458,12 +454,18 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget)
 	do {
 		os.state = ns.state = r->state;
 		ns.pidx_tail = pidx_stop;
-		if (os.flags == IDLE)
-			ns.flags = ABDICATED;
+		ns.flags = BUSY;
 	} while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0);
 	critical_exit();
 	counter_u64_add(r->enqueues, n);
 
+	/*
+	 * Turn into a consumer if some other thread isn't active as a consumer
+	 * already.
+	 */
+	if (os.flags != BUSY)
+		drain_ring_lockless(r, ns, os.flags, budget);
+
 	return (0);
 }
 #endif
@@ -474,9 +476,7 @@ ifmp_ring_check_drainage(struct ifmp_ring *r, int budget)
 	union ring_state os, ns;
 
 	os.state = r->state;
-	if ((os.flags != STALLED && os.flags != ABDICATED) ||	// Only continue in STALLED and ABDICATED
-	    os.pidx_head != os.pidx_tail ||			// Require work to be available
-	    (os.flags != ABDICATED && r->can_drain(r) == 0))	// Can either drain, or everyone left
+	if (os.flags != STALLED || os.pidx_head != os.pidx_tail || r->can_drain(r) == 0)
 		return;
 
 	MPASS(os.cidx != os.pidx_tail);	/* implied by STALLED */
diff --git a/sys/sys/gtaskqueue.h b/sys/sys/gtaskqueue.h
index be36a4756af8..e85196372323 100644
--- a/sys/sys/gtaskqueue.h
+++ b/sys/sys/gtaskqueue.h
@@ -58,9 +58,7 @@ int		taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask,
 void	taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
 struct taskqgroup *taskqgroup_create(char *name);
 void	taskqgroup_destroy(struct taskqgroup *qgroup);
-int	taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri);
-int	taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri);
-void taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*));
+int	taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
 
 #define TASK_ENQUEUED			0x1
 #define TASK_SKIP_WAKEUP		0x2
@@ -82,40 +80,27 @@ void taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*)
 #define TASKQGROUP_DECLARE(name)			\
 extern struct taskqgroup *qgroup_##name
 
-
-#define TASKQGROUP_DEFINE(name, cnt, stride, intr, pri)			\
+#define TASKQGROUP_DEFINE(name, cnt, stride)				\
 									\
 struct taskqgroup *qgroup_##name;					\
 									\
 static void								\
-taskqgroup_adjust_##name(void *arg)					\
-{									\
-	int max = (intr) ? 1 : (cnt);					\
-	if (arg != NULL) {						\
-		uintptr_t maxcpu = (uintptr_t) arg;				\
-		max = maxcpu;						\
-	}								\
-									\
-	taskqgroup_adjust_once(qgroup_##name, max, (stride), (intr), (pri)); \
-}									\
-									\
-SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY,		\
-	taskqgroup_adjust_##name, NULL);				\
-									\
-static void								\
 taskqgroup_define_##name(void *arg)					\
 {									\
 	qgroup_##name = taskqgroup_create(#name);			\
-	taskqgroup_set_adjust(qgroup_##name, taskqgroup_adjust_##name); \
 }									\
+									\
 SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST,		\
-	taskqgroup_define_##name, NULL)
-
-
-
-
-
-
+	taskqgroup_define_##name, NULL);				\
+									\
+static void								\
+taskqgroup_adjust_##name(void *arg)					\
+{									\
+	taskqgroup_adjust(qgroup_##name, (cnt), (stride));		\
+}									\
+									\
+SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY,		\
+	taskqgroup_adjust_##name, NULL)
 
 TASKQGROUP_DECLARE(net);
 TASKQGROUP_DECLARE(softirq);