MFC 296379,296380,296381,296593,296594,296595

296379
    hyperv/hn: Add multiple channel support, a.k.a. vRSS

    Each channel contains one RX ring and one TX ring.  And we
    try to distribute the channels to different evenly.

    Note: Currently we don't have enough information to extract
    the RSS type and RSS hash value from the received packets.

    This greatly improves the TX/RX performance for 8 virtual CPU
    Hyper-V over 10Ge: it can max out 10Ge for TCP when multiple
    RX/TX rings are enabled.

    This almost doubles the TX/RX performance for locally connected
    Hyper-Vs: was 6Gbps w/ 128 TCP streams, now 11Gbps w/ multiple
    RX/TX rings enabled.

    It is not enabled by default; it will be switched on after more
    tests.

    Collaborated with:  Hongjiang Zhang <honzhan microsoft com>
    MFC after:  2 week
    Sponsored by:       Microsoft OSTC

296380
    hyperv/hn: Pass channel to send done callbacks.

    Mainly to strigent the data packet send done check.

    MFC after:  2 weeks
    Sponsored by:       Microsoft OSTC

296381
    hyperv/hn: Add per-TX ring stats for # of transmitted packets

    MFC after:  2 weeks
    Sponsored by:       Microsoft OSTC

296593
    hyperv/hn: Move if_initname to an earlier place

    So that functions shared w/ attach path could use if_printf().

    While I'm here, remove unnecessary if_dunit and if_dname assignment.

    MFC after:  1 week
    Sponsored by:       Microsoft OSTC
    Differential Revision:      https://reviews.freebsd.org/D5576

296594
    hyperv/hn: Factor out hn_channel_attach

    MFC after:  1 week
    Sponsored by:       Microsoft OSTC
    Differential Revision:      https://reviews.freebsd.org/D5577

296595
    hyperv/hn: Make the # of TX rings configurable.

    Rename the tunables to avoid confusion.

    MFC after:  1 week
    Sponsored by:       Microsoft OSTC
    Differential Revision:      https://reviews.freebsd.org/D5578
This commit is contained in:
sephe 2016-06-16 02:48:18 +00:00
parent e10381a633
commit 2fae5018b1
8 changed files with 641 additions and 50 deletions

View File

@ -911,6 +911,8 @@ int hv_vmbus_channel_teardown_gpdal(
struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
void vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu);
/**
* @brief Get physical address from virtual
*/

View File

@ -56,14 +56,14 @@ MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver");
/*
* Forward declarations
*/
static void hv_nv_on_channel_callback(void *context);
static void hv_nv_on_channel_callback(void *xchan);
static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
static int hv_nv_connect_to_vsp(struct hv_device *device);
static void hv_nv_on_send_completion(netvsc_dev *net_dev,
struct hv_device *device, hv_vm_packet_descriptor *pkt);
struct hv_device *device, struct hv_vmbus_channel *, hv_vm_packet_descriptor *pkt);
static void hv_nv_on_receive_completion(struct hv_vmbus_channel *chan,
uint64_t tid, uint32_t status);
static void hv_nv_on_receive(netvsc_dev *net_dev,
@ -660,6 +660,34 @@ hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
hv_nv_destroy_send_buffer(net_dev);
}
/*
* Callback handler for subchannel offer
* @@param context new subchannel
*/
static void
hv_nv_subchan_callback(void *xchan)
{
struct hv_vmbus_channel *chan = xchan;
netvsc_dev *net_dev;
uint16_t chn_index = chan->offer_msg.offer.sub_channel_index;
struct hv_device *device = chan->device;
hn_softc_t *sc = device_get_softc(device->device);
int ret;
net_dev = sc->net_dev;
if (chn_index >= net_dev->num_channel) {
/* Would this ever happen? */
return;
}
netvsc_subchan_callback(sc, chan);
chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
ret = hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE,
NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0,
hv_nv_on_channel_callback, chan);
}
/*
* Net VSC on device add
*
@ -692,6 +720,7 @@ hv_nv_on_device_add(struct hv_device *device, void *additional_info)
free(chan->hv_chan_rdbuf, M_NETVSC);
goto cleanup;
}
chan->sc_creation_callback = hv_nv_subchan_callback;
/*
* Connect with the NetVsp
@ -757,7 +786,8 @@ hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
*/
static void
hv_nv_on_send_completion(netvsc_dev *net_dev,
struct hv_device *device, hv_vm_packet_descriptor *pkt)
struct hv_device *device, struct hv_vmbus_channel *chan,
hv_vm_packet_descriptor *pkt)
{
nvsp_msg *nvsp_msg_pkt;
netvsc_packet *net_vsc_pkt;
@ -769,7 +799,9 @@ hv_nv_on_send_completion(netvsc_dev *net_dev,
|| nvsp_msg_pkt->hdr.msg_type
== nvsp_msg_1_type_send_rx_buf_complete
|| nvsp_msg_pkt->hdr.msg_type
== nvsp_msg_1_type_send_send_buf_complete) {
== nvsp_msg_1_type_send_send_buf_complete
|| nvsp_msg_pkt->hdr.msg_type
== nvsp_msg5_type_subchannel) {
/* Copy the response back */
memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
sizeof(nvsp_msg));
@ -806,7 +838,7 @@ hv_nv_on_send_completion(netvsc_dev *net_dev,
}
/* Notify the layer above us */
net_vsc_pkt->compl.send.on_send_completion(
net_vsc_pkt->compl.send.on_send_completion(chan,
net_vsc_pkt->compl.send.send_completion_context);
}
@ -962,6 +994,46 @@ retry_send_cmplt:
}
}
/*
* Net VSC receiving vRSS send table from VSP
*/
static void
hv_nv_send_table(struct hv_device *device, hv_vm_packet_descriptor *pkt)
{
netvsc_dev *net_dev;
nvsp_msg *nvsp_msg_pkt;
int i;
uint32_t count, *table;
net_dev = hv_nv_get_inbound_net_device(device);
if (!net_dev)
return;
nvsp_msg_pkt =
(nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
if (nvsp_msg_pkt->hdr.msg_type !=
nvsp_msg5_type_send_indirection_table) {
printf("Netvsc: !Warning! receive msg type not "
"send_indirection_table. type = %d\n",
nvsp_msg_pkt->hdr.msg_type);
return;
}
count = nvsp_msg_pkt->msgs.vers_5_msgs.send_table.count;
if (count != VRSS_SEND_TABLE_SIZE) {
printf("Netvsc: Received wrong send table size: %u\n", count);
return;
}
table = (uint32_t *)
((unsigned long)&nvsp_msg_pkt->msgs.vers_5_msgs.send_table +
nvsp_msg_pkt->msgs.vers_5_msgs.send_table.offset);
for (i = 0; i < count; i++)
net_dev->vrss_send_table[i] = table[i];
}
/*
* Net VSC on channel callback
*/
@ -993,11 +1065,15 @@ hv_nv_on_channel_callback(void *xchan)
desc = (hv_vm_packet_descriptor *)buffer;
switch (desc->type) {
case HV_VMBUS_PACKET_TYPE_COMPLETION:
hv_nv_on_send_completion(net_dev, device, desc);
hv_nv_on_send_completion(net_dev, device,
chan, desc);
break;
case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
hv_nv_on_receive(net_dev, device, chan, desc);
break;
case HV_VMBUS_PACKET_TYPE_DATA_IN_BAND:
hv_nv_send_table(device, desc);
break;
default:
device_printf(dev,
"hv_cb recv unknow type %d "

View File

@ -86,6 +86,92 @@ MALLOC_DECLARE(M_NETVSC);
*/
#define NVSP_MAX_PACKETS_PER_RECEIVE 375
/* vRSS stuff */
#define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
#define RNDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
#define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
#define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
struct rndis_obj_header {
uint8_t type;
uint8_t rev;
uint16_t size;
} __packed;
/* rndis_recv_scale_cap/cap_flag */
#define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000
#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000
#define RNDIS_RSS_CAPS_USING_MSI_X 0x08000000
#define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000
#define RNDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000
#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100
#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200
#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400
/* RNDIS_RECEIVE_SCALE_CAPABILITIES */
struct rndis_recv_scale_cap {
struct rndis_obj_header hdr;
uint32_t cap_flag;
uint32_t num_int_msg;
uint32_t num_recv_que;
uint16_t num_indirect_tabent;
} __packed;
/* rndis_recv_scale_param flags */
#define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001
#define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002
#define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004
#define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008
#define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010
/* Hash info bits */
#define RNDIS_HASH_FUNC_TOEPLITZ 0x00000001
#define RNDIS_HASH_IPV4 0x00000100
#define RNDIS_HASH_TCP_IPV4 0x00000200
#define RNDIS_HASH_IPV6 0x00000400
#define RNDIS_HASH_IPV6_EX 0x00000800
#define RNDIS_HASH_TCP_IPV6 0x00001000
#define RNDIS_HASH_TCP_IPV6_EX 0x00002000
#define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
#define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
#define ITAB_NUM 128
#define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
/* RNDIS_RECEIVE_SCALE_PARAMETERS */
typedef struct rndis_recv_scale_param_ {
struct rndis_obj_header hdr;
/* Qualifies the rest of the information */
uint16_t flag;
/* The base CPU number to do receive processing. not used */
uint16_t base_cpu_number;
/* This describes the hash function and type being enabled */
uint32_t hashinfo;
/* The size of indirection table array */
uint16_t indirect_tabsize;
/* The offset of the indirection table from the beginning of this
* structure
*/
uint32_t indirect_taboffset;
/* The size of the hash secret key */
uint16_t hashkey_size;
/* The offset of the secret key from the beginning of this structure */
uint32_t hashkey_offset;
uint32_t processor_masks_offset;
uint32_t num_processor_masks;
uint32_t processor_masks_entry_size;
} rndis_recv_scale_param;
typedef enum nvsp_msg_type_ {
nvsp_msg_type_none = 0,
@ -146,6 +232,27 @@ typedef enum nvsp_msg_type_ {
nvsp_msg_2_type_alloc_chimney_handle,
nvsp_msg_2_type_alloc_chimney_handle_complete,
nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete,
/*
* Version 4 Messages
*/
nvsp_msg4_type_send_vf_association,
nvsp_msg4_type_switch_data_path,
nvsp_msg4_type_uplink_connect_state_deprecated,
nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated,
/*
* Version 5 Messages
*/
nvsp_msg5_type_oid_query_ex,
nvsp_msg5_type_oid_query_ex_comp,
nvsp_msg5_type_subchannel,
nvsp_msg5_type_send_indirection_table,
nvsp_msg5_max = nvsp_msg5_type_send_indirection_table,
} nvsp_msg_type;
typedef enum nvsp_status_ {
@ -793,6 +900,39 @@ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_
uint32_t status;
} __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
/*
* Version 5 messages
*/
enum nvsp_subchannel_operation {
NVSP_SUBCHANNEL_NONE = 0,
NVSP_SUBCHANNE_ALLOCATE,
NVSP_SUBCHANNE_MAX
};
typedef struct nvsp_5_subchannel_request_
{
uint32_t op;
uint32_t num_subchannels;
} __packed nvsp_5_subchannel_request;
typedef struct nvsp_5_subchannel_complete_
{
uint32_t status;
/* Actual number of subchannels allocated */
uint32_t num_subchannels;
} __packed nvsp_5_subchannel_complete;
typedef struct nvsp_5_send_indirect_table_
{
/* The number of entries in the send indirection table */
uint32_t count;
/*
* The offset of the send indireciton table from top of
* this struct. The send indirection table tells which channel
* to put the send traffic on. Each entry is a channel number.
*/
uint32_t offset;
} __packed nvsp_5_send_indirect_table;
typedef union nvsp_1_msg_uber_ {
nvsp_1_msg_send_ndis_version send_ndis_vers;
@ -838,11 +978,18 @@ typedef union nvsp_2_msg_uber_ {
nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
} __packed nvsp_2_msg_uber;
typedef union nvsp_5_msg_uber_
{
nvsp_5_subchannel_request subchannel_request;
nvsp_5_subchannel_complete subchn_complete;
nvsp_5_send_indirect_table send_table;
} __packed nvsp_5_msg_uber;
typedef union nvsp_all_msgs_ {
nvsp_msg_init_uber init_msgs;
nvsp_1_msg_uber vers_1_msgs;
nvsp_2_msg_uber vers_2_msgs;
nvsp_5_msg_uber vers_5_msgs;
} __packed nvsp_all_msgs;
/*
@ -883,6 +1030,7 @@ typedef struct nvsp_msg_ {
#define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024)
#define NETVSC_PACKET_SIZE PAGE_SIZE
#define VRSS_SEND_TABLE_SIZE 16
/*
* Data types
@ -923,10 +1071,15 @@ typedef struct netvsc_dev_ {
hv_bool_uint8_t destroy;
/* Negotiated NVSP version */
uint32_t nvsp_version;
uint32_t num_channel;
uint32_t vrss_send_table[VRSS_SEND_TABLE_SIZE];
} netvsc_dev;
struct hv_vmbus_channel;
typedef void (*pfn_on_send_rx_completion)(void *);
typedef void (*pfn_on_send_rx_completion)(struct hv_vmbus_channel *, void *);
#define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE)
#define NETVSC_PACKET_MAXPAGE 32
@ -1010,13 +1163,18 @@ struct hn_rx_ring {
u_long hn_csum_trusted;
u_long hn_lro_tried;
u_long hn_small_pkts;
u_long hn_pkts;
/* Rarely used stuffs */
struct sysctl_oid *hn_rx_sysctl_tree;
int hn_rx_flags;
} __aligned(CACHE_LINE_SIZE);
#define HN_TRUST_HCSUM_IP 0x0001
#define HN_TRUST_HCSUM_TCP 0x0002
#define HN_TRUST_HCSUM_UDP 0x0004
struct hv_vmbus_channel;
#define HN_RX_FLAG_ATTACHED 0x1
struct hn_tx_ring {
#ifndef HN_USE_TXDESC_BUFRING
@ -1053,13 +1211,17 @@ struct hn_tx_ring {
u_long hn_txdma_failed;
u_long hn_tx_collapsed;
u_long hn_tx_chimney;
u_long hn_pkts;
/* Rarely used stuffs */
struct hn_txdesc *hn_txdesc;
bus_dma_tag_t hn_tx_rndis_dtag;
struct sysctl_oid *hn_tx_sysctl_tree;
int hn_tx_flags;
} __aligned(CACHE_LINE_SIZE);
#define HN_TX_FLAG_ATTACHED 0x1
/*
* Device-specific softc structure
*/
@ -1085,9 +1247,12 @@ typedef struct hn_softc {
int hn_tx_ring_cnt;
int hn_tx_ring_inuse;
struct hn_tx_ring *hn_tx_ring;
int hn_cpu;
int hn_tx_chimney_max;
struct taskqueue *hn_tx_taskq;
struct sysctl_oid *hn_tx_sysctl_tree;
struct sysctl_oid *hn_rx_sysctl_tree;
} hn_softc_t;
/*

View File

@ -281,13 +281,16 @@ static int hn_use_if_start = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
&hn_use_if_start, 0, "Use if_start TX method");
static int hn_ring_cnt = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, ring_cnt, CTLFLAG_RDTUN,
&hn_ring_cnt, 0, "# of TX/RX rings to used");
static int hn_chan_cnt = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
&hn_chan_cnt, 0,
"# of channels to use; each channel has one RX ring and one TX ring");
static int hn_single_tx_ring = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, single_tx_ring, CTLFLAG_RDTUN,
&hn_single_tx_ring, 0, "Use one TX ring");
static int hn_tx_ring_cnt = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
&hn_tx_ring_cnt, 0, "# of TX rings to use");
static u_int hn_cpu_index;
/*
* Forward declarations
@ -327,6 +330,7 @@ static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
static void hn_create_rx_data(struct hn_softc *sc, int);
static void hn_destroy_rx_data(struct hn_softc *sc);
static void hn_set_tx_chimney_size(struct hn_softc *, int);
static void hn_channel_attach(struct hn_softc *, struct hv_vmbus_channel *);
static int hn_transmit(struct ifnet *, struct mbuf *);
static void hn_xmit_qflush(struct ifnet *);
@ -454,37 +458,46 @@ netvsc_attach(device_t dev)
ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
ifp->if_softc = sc;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ring_cnt = hn_ring_cnt;
if (ring_cnt <= 0 || ring_cnt >= mp_ncpus)
/*
* Figure out the # of RX rings (ring_cnt) and the # of TX rings
* to use (tx_ring_cnt).
*
* NOTE:
* The # of RX rings to use is same as the # of channels to use.
*/
ring_cnt = hn_chan_cnt;
if (ring_cnt <= 0 || ring_cnt > mp_ncpus)
ring_cnt = mp_ncpus;
tx_ring_cnt = ring_cnt;
if (hn_single_tx_ring || hn_use_if_start) {
/*
* - Explicitly asked to use single TX ring.
* - ifnet.if_start is used; ifnet.if_start only needs
* one TX ring.
*/
tx_ring_cnt = hn_tx_ring_cnt;
if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
tx_ring_cnt = ring_cnt;
if (hn_use_if_start) {
/* ifnet.if_start only needs one TX ring. */
tx_ring_cnt = 1;
}
/*
* Set the leader CPU for channels.
*/
sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
error = hn_create_tx_data(sc, tx_ring_cnt);
if (error)
goto failed;
hn_create_rx_data(sc, ring_cnt);
/*
* Associate the first TX/RX ring w/ the primary channel.
*/
chan = device_ctx->channel;
chan->hv_chan_rxr = &sc->hn_rx_ring[0];
chan->hv_chan_txr = &sc->hn_tx_ring[0];
sc->hn_tx_ring[0].hn_chan = chan;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_dunit = unit;
ifp->if_dname = NETVSC_DEVNAME;
KASSERT(HV_VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
KASSERT(chan->offer_msg.offer.sub_channel_index == 0,
("primary channel subidx %u",
chan->offer_msg.offer.sub_channel_index));
hn_channel_attach(sc, chan);
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = hn_ioctl;
@ -522,10 +535,18 @@ netvsc_attach(device_t dev)
error = hv_rf_on_device_add(device_ctx, &device_info, ring_cnt);
if (error)
goto failed;
KASSERT(sc->net_dev->num_channel > 0 &&
sc->net_dev->num_channel <= sc->hn_rx_ring_inuse,
("invalid channel count %u, should be less than %d",
sc->net_dev->num_channel, sc->hn_rx_ring_inuse));
/* TODO: vRSS */
sc->hn_tx_ring_inuse = 1;
sc->hn_rx_ring_inuse = 1;
/*
* Set the # of TX/RX rings that could be used according to
* the # of channels that host offered.
*/
if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel)
sc->hn_tx_ring_inuse = sc->net_dev->num_channel;
sc->hn_rx_ring_inuse = sc->net_dev->num_channel;
device_printf(dev, "%d TX ring, %d RX ring\n",
sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
@ -730,7 +751,7 @@ hn_txdesc_hold(struct hn_txdesc *txd)
}
static void
hn_tx_done(void *xpkt)
hn_tx_done(struct hv_vmbus_channel *chan, void *xpkt)
{
netvsc_packet *packet = xpkt;
struct hn_txdesc *txd;
@ -740,6 +761,11 @@ hn_tx_done(void *xpkt)
packet->compl.send.send_completion_tid;
txr = txd->txr;
KASSERT(txr->hn_chan == chan,
("channel mismatch, on channel%u, should be channel%u",
chan->offer_msg.offer.sub_channel_index,
txr->hn_chan->offer_msg.offer.sub_channel_index));
txr->hn_has_txeof = 1;
hn_txdesc_put(txr, txd);
}
@ -1025,6 +1051,7 @@ again:
if (txd->m->m_flags & M_MCAST)
if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
}
txr->hn_pkts++;
}
hn_txdesc_put(txr, txd);
@ -1357,6 +1384,7 @@ skip:
*/
ifp->if_ipackets++;
rxr->hn_pkts++;
if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
#if defined(INET) || defined(INET6)
@ -2122,6 +2150,13 @@ hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
#endif
#endif /* INET || INET6 */
ctx = device_get_sysctl_ctx(dev);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
/* Create dev.hn.UNIT.rx sysctl tree */
sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
CTLFLAG_RD, 0, "");
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
@ -2149,10 +2184,27 @@ hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
#endif
#endif /* INET || INET6 */
}
ctx = device_get_sysctl_ctx(dev);
child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
if (sc->hn_rx_sysctl_tree != NULL) {
char name[16];
/*
* Create per RX ring sysctl tree:
* dev.hn.UNIT.rx.RINGID
*/
snprintf(name, sizeof(name), "%d", i);
rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
OID_AUTO, name, CTLFLAG_RD, 0, "");
if (rxr->hn_rx_sysctl_tree != NULL) {
SYSCTL_ADD_ULONG(ctx,
SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
OID_AUTO, "packets", CTLFLAG_RW,
&rxr->hn_pkts, "# of packets received");
}
}
}
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
CTLTYPE_U64 | CTLFLAG_RW, sc,
@ -2419,6 +2471,9 @@ hn_create_tx_ring(struct hn_softc *sc, int id)
CTLFLAG_RD, &txr->hn_oactive, 0,
"over active");
}
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
CTLFLAG_RW, &txr->hn_pkts,
"# of packets transmitted");
}
}
@ -2782,6 +2837,55 @@ hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
mtx_unlock(&txr->hn_tx_lock);
}
static void
hn_channel_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan)
{
struct hn_rx_ring *rxr;
int idx;
idx = chan->offer_msg.offer.sub_channel_index;
KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
("invalid channel index %d, should > 0 && < %d",
idx, sc->hn_rx_ring_inuse));
rxr = &sc->hn_rx_ring[idx];
KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
("RX ring %d already attached", idx));
rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
chan->hv_chan_rxr = rxr;
if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n",
idx, chan->offer_msg.child_rel_id);
if (idx < sc->hn_tx_ring_inuse) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
("TX ring %d already attached", idx));
txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
chan->hv_chan_txr = txr;
txr->hn_chan = chan;
if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n",
idx, chan->offer_msg.child_rel_id);
}
/* Bind channel to a proper CPU */
vmbus_channel_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
}
void
netvsc_subchan_callback(struct hn_softc *sc, struct hv_vmbus_channel *chan)
{
KASSERT(!HV_VMBUS_CHAN_ISPRIMARY(chan),
("subchannel callback on primary channel"));
KASSERT(chan->offer_msg.offer.sub_channel_index > 0,
("invalid channel subidx %u",
chan->offer_msg.offer.sub_channel_index));
hn_channel_attach(sc, chan);
}
static void
hn_tx_taskq_create(void *arg __unused)
{

View File

@ -167,6 +167,14 @@
#define RNDIS_OID_GEN_MACHINE_NAME 0x0001021A
#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
/*
* For receive side scale
*/
/* Query only */
#define RNDIS_OID_GEN_RSS_CAPABILITIES 0x00010203
/* Query and set */
#define RNDIS_OID_GEN_RSS_PARAMETERS 0x00010204
#define RNDIS_OID_GEN_XMIT_OK 0x00020101
#define RNDIS_OID_GEN_RCV_OK 0x00020102
#define RNDIS_OID_GEN_XMIT_ERROR 0x00020103
@ -1060,6 +1068,8 @@ struct hv_vmbus_channel;
int netvsc_recv(struct hv_vmbus_channel *chan,
netvsc_packet *packet, rndis_tcp_ip_csum_info *csum_info);
void netvsc_channel_rollup(struct hv_vmbus_channel *chan);
void netvsc_subchan_callback(struct hn_softc *sc,
struct hv_vmbus_channel *chan);
void* hv_set_rppi_data(rndis_msg *rndis_mesg,
uint32_t rppi_size,

View File

@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <vm/pmap.h>
#include <dev/hyperv/include/hyperv.h>
#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
#include "hv_net_vsc.h"
#include "hv_rndis.h"
#include "hv_rndis_filter.h"
@ -69,8 +70,8 @@ static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter);
static int hv_rf_init_device(rndis_device *device);
static int hv_rf_open_device(rndis_device *device);
static int hv_rf_close_device(rndis_device *device);
static void hv_rf_on_send_request_completion(void *context);
static void hv_rf_on_send_request_halt_completion(void *context);
static void hv_rf_on_send_request_completion(struct hv_vmbus_channel *, void *context);
static void hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *, void *context);
int
hv_rf_send_offload_request(struct hv_device *device,
rndis_offload_params *offloads);
@ -224,6 +225,8 @@ hv_rf_send_request(rndis_device *device, rndis_request *request,
{
int ret;
netvsc_packet *packet;
netvsc_dev *net_dev = device->net_dev;
int send_buf_section_idx;
/* Set up the packet to send it */
packet = &request->pkt;
@ -238,6 +241,20 @@ hv_rf_send_request(rndis_device *device, rndis_request *request,
packet->page_buffers[0].offset =
(unsigned long)&request->request_msg & (PAGE_SIZE - 1);
if (packet->page_buffers[0].offset +
packet->page_buffers[0].length > PAGE_SIZE) {
packet->page_buf_count = 2;
packet->page_buffers[0].length =
PAGE_SIZE - packet->page_buffers[0].offset;
packet->page_buffers[1].pfn =
hv_get_phys_addr((char*)&request->request_msg +
packet->page_buffers[0].length) >> PAGE_SHIFT;
packet->page_buffers[1].offset = 0;
packet->page_buffers[1].length =
request->request_msg.msg_len -
packet->page_buffers[0].length;
}
packet->compl.send.send_completion_context = request; /* packet */
if (message_type != REMOTE_NDIS_HALT_MSG) {
packet->compl.send.on_send_completion =
@ -247,10 +264,25 @@ hv_rf_send_request(rndis_device *device, rndis_request *request,
hv_rf_on_send_request_halt_completion;
}
packet->compl.send.send_completion_tid = (unsigned long)device;
packet->send_buf_section_idx =
NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
if (packet->tot_data_buf_len < net_dev->send_section_size) {
send_buf_section_idx = hv_nv_get_next_send_section(net_dev);
if (send_buf_section_idx !=
NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
char *dest = ((char *)net_dev->send_buf +
send_buf_section_idx * net_dev->send_section_size);
memcpy(dest, &request->request_msg, request->request_msg.msg_len);
packet->send_buf_section_idx = send_buf_section_idx;
packet->send_buf_section_size = packet->tot_data_buf_len;
packet->page_buf_count = 0;
goto sendit;
}
/* Failed to allocate chimney send buffer; move on */
}
packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
packet->send_buf_section_size = 0;
sendit:
ret = hv_nv_on_send(device->net_dev->dev->channel, packet);
return (ret);
@ -528,6 +560,19 @@ hv_rf_query_device(rndis_device *device, uint32_t oid, void *result,
query->info_buffer_length = 0;
query->device_vc_handle = 0;
if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) {
struct rndis_recv_scale_cap *cap;
request->request_msg.msg_len +=
sizeof(struct rndis_recv_scale_cap);
query->info_buffer_length = sizeof(struct rndis_recv_scale_cap);
cap = (struct rndis_recv_scale_cap *)((unsigned long)query +
query->info_buffer_offset);
cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES;
cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
cap->hdr.size = sizeof(struct rndis_recv_scale_cap);
}
ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
if (ret != 0) {
/* Fixme: printf added */
@ -582,6 +627,114 @@ hv_rf_query_device_link_status(rndis_device *device)
RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
}
static uint8_t netvsc_hash_key[HASH_KEYLEN] = {
0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
};
/*
* RNDIS set vRSS parameters
*/
static int
hv_rf_set_rss_param(rndis_device *device, int num_queue)
{
rndis_request *request;
rndis_set_request *set;
rndis_set_complete *set_complete;
rndis_recv_scale_param *rssp;
uint32_t extlen = sizeof(rndis_recv_scale_param) +
(4 * ITAB_NUM) + HASH_KEYLEN;
uint32_t *itab, status;
uint8_t *keyp;
int i, ret;
request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
if (request == NULL) {
if (bootverbose)
printf("Netvsc: No memory to set vRSS parameters.\n");
ret = -1;
goto cleanup;
}
set = &request->request_msg.msg.set_request;
set->oid = RNDIS_OID_GEN_RSS_PARAMETERS;
set->info_buffer_length = extlen;
set->info_buffer_offset = sizeof(rndis_set_request);
set->device_vc_handle = 0;
/* Fill out the rssp parameter structure */
rssp = (rndis_recv_scale_param *)(set + 1);
rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS;
rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
rssp->hdr.size = sizeof(rndis_recv_scale_param);
rssp->flag = 0;
rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 |
RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6;
rssp->indirect_tabsize = 4 * ITAB_NUM;
rssp->indirect_taboffset = sizeof(rndis_recv_scale_param);
rssp->hashkey_size = HASH_KEYLEN;
rssp->hashkey_offset = rssp->indirect_taboffset +
rssp->indirect_tabsize;
/* Set indirection table entries */
itab = (uint32_t *)(rssp + 1);
for (i = 0; i < ITAB_NUM; i++)
itab[i] = i % num_queue;
/* Set hash key values */
keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset);
for (i = 0; i < HASH_KEYLEN; i++)
keyp[i] = netvsc_hash_key[i];
ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
if (ret != 0) {
goto cleanup;
}
/*
* Wait for the response from the host. Another thread will signal
* us when the response has arrived. In the failure case,
* sema_timedwait() returns a non-zero status after waiting 5 seconds.
*/
ret = sema_timedwait(&request->wait_sema, 5 * hz);
if (ret == 0) {
/* Response received, check status */
set_complete = &request->response_msg.msg.set_complete;
status = set_complete->status;
if (status != RNDIS_STATUS_SUCCESS) {
/* Bad response status, return error */
if (bootverbose)
printf("Netvsc: Failed to set vRSS "
"parameters.\n");
ret = -2;
} else {
if (bootverbose)
printf("Netvsc: Successfully set vRSS "
"parameters.\n");
}
} else {
/*
* We cannot deallocate the request since we may still
* receive a send completion for it.
*/
printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n",
request->request_msg.msg.init_request.request_id, ret);
goto exit;
}
cleanup:
if (request != NULL) {
hv_put_rndis_request(device, request);
}
exit:
return (ret);
}
/*
* RNDIS filter set packet filter
* Sends an rndis request with the new filter, then waits for a response
@ -817,12 +970,15 @@ hv_rf_close_device(rndis_device *device)
*/
int
hv_rf_on_device_add(struct hv_device *device, void *additl_info,
int nchan __unused)
int nchan)
{
int ret;
netvsc_dev *net_dev;
rndis_device *rndis_dev;
nvsp_msg *init_pkt;
rndis_offload_params offloads;
struct rndis_recv_scale_cap rsscaps;
uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap);
netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
device_t dev = device->device;
@ -888,6 +1044,67 @@ hv_rf_on_device_add(struct hv_device *device, void *additl_info,
dev_info->link_state = rndis_dev->link_status;
net_dev->num_channel = 1;
if (net_dev->nvsp_version < NVSP_PROTOCOL_VERSION_5 || nchan == 1)
return (0);
memset(&rsscaps, 0, rsscaps_size);
ret = hv_rf_query_device(rndis_dev,
RNDIS_OID_GEN_RSS_CAPABILITIES,
&rsscaps, &rsscaps_size);
if ((ret != 0) || (rsscaps.num_recv_que < 2)) {
device_printf(dev, "hv_rf_query_device failed or "
"rsscaps.num_recv_que < 2 \n");
goto out;
}
device_printf(dev, "channel, offered %u, requested %d\n",
rsscaps.num_recv_que, nchan);
if (nchan > rsscaps.num_recv_que)
nchan = rsscaps.num_recv_que;
net_dev->num_channel = nchan;
if (net_dev->num_channel == 1) {
device_printf(dev, "net_dev->num_channel == 1 under VRSS\n");
goto out;
}
/* request host to create sub channels */
init_pkt = &net_dev->channel_init_packet;
memset(init_pkt, 0, sizeof(nvsp_msg));
init_pkt->hdr.msg_type = nvsp_msg5_type_subchannel;
init_pkt->msgs.vers_5_msgs.subchannel_request.op =
NVSP_SUBCHANNE_ALLOCATE;
init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels =
net_dev->num_channel - 1;
ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret != 0) {
device_printf(dev, "Fail to allocate subchannel\n");
goto out;
}
sema_wait(&net_dev->channel_init_sema);
if (init_pkt->msgs.vers_5_msgs.subchn_complete.status !=
nvsp_status_success) {
ret = ENODEV;
device_printf(dev, "sub channel complete error\n");
goto out;
}
net_dev->num_channel = 1 +
init_pkt->msgs.vers_5_msgs.subchn_complete.num_subchannels;
ret = hv_rf_set_rss_param(rndis_dev, net_dev->num_channel);
out:
if (ret)
net_dev->num_channel = 1;
return (ret);
}
@ -942,7 +1159,8 @@ hv_rf_on_close(struct hv_device *device)
* RNDIS filter on send request completion callback
*/
static void
hv_rf_on_send_request_completion(void *context)
hv_rf_on_send_request_completion(struct hv_vmbus_channel *chan __unused,
void *context __unused)
{
}
@ -950,7 +1168,8 @@ hv_rf_on_send_request_completion(void *context)
* RNDIS filter on send request (halt only) completion callback
*/
static void
hv_rf_on_send_request_halt_completion(void *context)
hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *chan __unused,
void *context)
{
rndis_request *request = context;

View File

@ -63,17 +63,32 @@ typedef struct rndis_request_ {
struct sema wait_sema;
/*
* Fixme: We assumed a fixed size response here. If we do ever
* need to handle a bigger response, we can either define a max
* response message or add a response buffer variable above this field
* The max response size is sizeof(rndis_msg) + PAGE_SIZE.
*
* XXX
* This is ugly and should be cleaned up once we busdma-fy
* RNDIS request bits.
*/
rndis_msg response_msg;
uint8_t buf_resp[PAGE_SIZE];
/* Simplify allocation by having a netvsc packet inline */
netvsc_packet pkt;
hv_vmbus_page_buffer buffer;
/* Fixme: We assumed a fixed size request here. */
/*
* The max request size is sizeof(rndis_msg) + PAGE_SIZE.
*
* NOTE:
* This is required for the large request like RSS settings.
*
* XXX
* This is ugly and should be cleaned up once we busdma-fy
* RNDIS request bits.
*/
rndis_msg request_msg;
uint8_t buf_req[PAGE_SIZE];
/* Fixme: Poor man's semaphore. */
uint32_t halt_complete_flag;
} rndis_request;

View File

@ -277,7 +277,7 @@ vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
}
}
static void
void
vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
{
KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));