HyperV socket implementation for FreeBSD

This change adds Hyper-V socket feature in FreeBSD. New socket address
family AF_HYPERV and its kernel support are added.

Submitted by:	Wei Hu <weh@microsoft.com>
Reviewed by:	Dexuan Cui <decui@microsoft.com>
Relnotes:	yes
Sponsored by:	Microsoft
Differential Revision:	https://reviews.freebsd.org/D24061
This commit is contained in:
Wei Hu 2020-05-20 11:03:59 +00:00
parent b5ba8a0f32
commit a560f3ebd7
13 changed files with 2586 additions and 19 deletions

View File

@ -133,6 +133,7 @@ dev/hwpmc/hwpmc_core.c optional hwpmc
dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_tsc.c optional hwpmc
dev/hwpmc/hwpmc_x86.c optional hwpmc
dev/hyperv/hvsock/hv_sock.c optional hyperv
dev/hyperv/input/hv_kbd.c optional hyperv
dev/hyperv/input/hv_kbdc.c optional hyperv
dev/hyperv/pcib/vmbus_pcib.c optional hyperv pci

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,122 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Microsoft Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _HVSOCK_H
#define _HVSOCK_H
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/queue.h>
#include <dev/hyperv/include/hyperv.h>
#include <dev/hyperv/include/vmbus.h>
/*
* HyperV Socket Protocols
*/
#define HYPERV_SOCK_PROTO_TRANS 1 /* Transport protocol */
#define HVADDR_PORT_ANY -1U
#define HVADDR_PORT_UNKNOWN -1U
#define HVS_LIST_BOUND 0x01
#define HVS_LIST_CONNECTED 0x02
#define HVS_LIST_ALL (HVS_LIST_BOUND | HVS_LIST_CONNECTED)
struct sockaddr_hvs {
unsigned char sa_len;
sa_family_t sa_family;
unsigned int hvs_port;
unsigned char hvs_zero[sizeof(struct sockaddr) -
sizeof(sa_family_t) -
sizeof(unsigned char) -
sizeof(unsigned int)];
};
struct vmpipe_proto_header {
uint32_t vmpipe_pkt_type;
uint32_t vmpipe_data_size;
} __packed;
struct hvs_pkt_header {
struct vmbus_chanpkt_hdr chan_pkt_hdr;
struct vmpipe_proto_header vmpipe_pkt_hdr;
} __packed;
struct hvs_pcb {
struct socket *so; /* Pointer to socket */
struct sockaddr_hvs local_addr;
struct sockaddr_hvs remote_addr;
struct hyperv_guid vm_srv_id;
struct hyperv_guid host_srv_id;
struct vmbus_channel *chan;
/* Current packet header on rx ring */
struct hvs_pkt_header hvs_pkt;
/* Available data in receive br in current packet */
uint32_t recv_data_len;
/* offset in the packet */
uint32_t recv_data_off;
bool rb_init;
/* Link lists for global bound and connected sockets */
LIST_ENTRY(hvs_pcb) bound_next;
LIST_ENTRY(hvs_pcb) connected_next;
};
#define so2hvspcb(so) \
((struct hvs_pcb *)((so)->so_pcb))
#define hsvpcb2so(hvspcb) \
((struct socket *)((hvspcb)->so))
void hvs_addr_init(struct sockaddr_hvs *, const struct hyperv_guid *);
void hvs_trans_init(void);
void hvs_trans_close(struct socket *);
void hvs_trans_detach(struct socket *);
void hvs_trans_abort(struct socket *);
int hvs_trans_attach(struct socket *, int, struct thread *);
int hvs_trans_bind(struct socket *, struct sockaddr *, struct thread *);
int hvs_trans_listen(struct socket *, int, struct thread *);
int hvs_trans_accept(struct socket *, struct sockaddr **);
int hvs_trans_connect(struct socket *,
struct sockaddr *, struct thread *);
int hvs_trans_peeraddr(struct socket *, struct sockaddr **);
int hvs_trans_sockaddr(struct socket *, struct sockaddr **);
int hvs_trans_soreceive(struct socket *, struct sockaddr **,
struct uio *, struct mbuf **, struct mbuf **, int *);
int hvs_trans_sosend(struct socket *, struct sockaddr *, struct uio *,
struct mbuf *, struct mbuf *, int, struct thread *);
int hvs_trans_disconnect(struct socket *);
int hvs_trans_shutdown(struct socket *);
int hvs_trans_lock(void);
void hvs_trans_unlock(void);
void hvs_remove_socket_from_list(struct socket *, unsigned char);
#endif /* _HVSOCK_H */

View File

@ -31,6 +31,7 @@
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/_iovec.h>
/*
* VMBUS version is 32 bit, upper 16 bit for major_number and lower
@ -130,6 +131,7 @@ struct task;
struct taskqueue;
typedef void (*vmbus_chan_callback_t)(struct vmbus_channel *, void *);
typedef int (*vmbus_br_copy_callback_t)(void *, int, void *);
static __inline struct vmbus_channel *
vmbus_get_channel(device_t dev)
@ -205,6 +207,14 @@ int vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen,
int vmbus_chan_recv_pkt(struct vmbus_channel *chan,
struct vmbus_chanpkt_hdr *pkt, int *pktlen);
int vmbus_chan_recv_idxadv(struct vmbus_channel *chan,
uint32_t advance);
int vmbus_chan_recv_peek(struct vmbus_channel *chan,
void *data, int data_len, uint32_t advance);
int vmbus_chan_recv_peek_call(struct vmbus_channel *chan,
int data_len, uint32_t skip,
vmbus_br_copy_callback_t cb, void *cbarg);
int vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
uint16_t flags, void *data, int dlen, uint64_t xactid);
int vmbus_chan_send_sglist(struct vmbus_channel *chan,
@ -213,13 +223,30 @@ int vmbus_chan_send_sglist(struct vmbus_channel *chan,
int vmbus_chan_send_prplist(struct vmbus_channel *chan,
struct vmbus_gpa_range *prp, int prp_cnt, void *data,
int dlen, uint64_t xactid);
int vmbus_chan_iov_send(struct vmbus_channel *chan,
const struct iovec iov[], int iovlen,
vmbus_br_copy_callback_t cb, void *cbarg);
uint32_t vmbus_chan_write_available(struct vmbus_channel *chan);
uint32_t vmbus_chan_read_available(struct vmbus_channel *chan);
bool vmbus_chan_write_signal(struct vmbus_channel *chan,
int32_t min_signal_size);
void vmbus_chan_set_pending_send_size(struct vmbus_channel *chan,
uint32_t size);
uint32_t vmbus_chan_id(const struct vmbus_channel *chan);
uint32_t vmbus_chan_subidx(const struct vmbus_channel *chan);
bool vmbus_chan_is_primary(const struct vmbus_channel *chan);
bool vmbus_chan_is_revoked(const struct vmbus_channel *chan);
const struct hyperv_guid *
vmbus_chan_guid_inst(const struct vmbus_channel *chan);
bool vmbus_chan_is_hvs(const struct vmbus_channel *chan);
bool vmbus_chan_is_hvs_conn_from_host(
const struct vmbus_channel *chan);
int vmbus_req_tl_connect(struct hyperv_guid *,
struct hyperv_guid *);
struct hyperv_guid *
vmbus_chan_guid_type(struct vmbus_channel *chan);
struct hyperv_guid *
vmbus_chan_guid_inst(struct vmbus_channel *chan);
int vmbus_chan_prplist_nelem(int br_size, int prpcnt_max,
int dlen_max);
bool vmbus_chan_rx_empty(const struct vmbus_channel *chan);

View File

@ -365,12 +365,48 @@ vmbus_gpadl_alloc(struct vmbus_softc *sc)
uint32_t gpadl;
again:
gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
gpadl = atomic_fetchadd_int(&sc->vmbus_gpadl, 1);
if (gpadl == 0)
goto again;
return (gpadl);
}
/* Used for Hyper-V socket when guest client connects to host */
int
vmbus_req_tl_connect(struct hyperv_guid *guest_srv_id,
struct hyperv_guid *host_srv_id)
{
struct vmbus_softc *sc = vmbus_get_softc();
struct vmbus_chanmsg_tl_connect *req;
struct vmbus_msghc *mh;
int error;
if (!sc)
return ENXIO;
mh = vmbus_msghc_get(sc, sizeof(*req));
if (mh == NULL) {
device_printf(sc->vmbus_dev,
"can not get msg hypercall for tl connect\n");
return ENXIO;
}
req = vmbus_msghc_dataptr(mh);
req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_TL_CONN;
req->guest_endpoint_id = *guest_srv_id;
req->host_service_id = *host_srv_id;
error = vmbus_msghc_exec_noresult(mh);
vmbus_msghc_put(sc, mh);
if (error) {
device_printf(sc->vmbus_dev,
"tl connect msg hypercall failed\n");
}
return error;
}
static int
vmbus_connect(struct vmbus_softc *sc, uint32_t version)
{

View File

@ -52,18 +52,23 @@ static int
vmbus_br_sysctl_state(SYSCTL_HANDLER_ARGS)
{
const struct vmbus_br *br = arg1;
uint32_t rindex, windex, imask, ravail, wavail;
uint32_t rindex, windex, imask, psndsz, fvalue, ravail, wavail;
uint64_t intrcnt;
char state[256];
intrcnt = br->vbr_intrcnt;
rindex = br->vbr_rindex;
windex = br->vbr_windex;
imask = br->vbr_imask;
psndsz = br->vbr_psndsz;
fvalue = br->vbr_fvalue;
wavail = VMBUS_BR_WAVAIL(rindex, windex, br->vbr_dsize);
ravail = br->vbr_dsize - wavail;
snprintf(state, sizeof(state),
"rindex:%u windex:%u imask:%u ravail:%u wavail:%u",
rindex, windex, imask, ravail, wavail);
"intrcnt:%lu rindex:%u windex:%u imask:%u psndsz:%u fvalue:%u "
"ravail:%u wavail:%u",
intrcnt, rindex, windex, imask, psndsz, fvalue, ravail, wavail);
return sysctl_handle_string(oidp, state, sizeof(state), req);
}
@ -76,9 +81,11 @@ vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS)
#define BR_STATE_RIDX 0
#define BR_STATE_WIDX 1
#define BR_STATE_IMSK 2
#define BR_STATE_RSPC 3
#define BR_STATE_WSPC 4
#define BR_STATE_MAX 5
#define BR_STATE_PSSZ 3
#define BR_STATE_FVAL 4
#define BR_STATE_RSPC 5
#define BR_STATE_WSPC 6
#define BR_STATE_MAX 7
const struct vmbus_br *br = arg1;
uint32_t rindex, windex, wavail, state[BR_STATE_MAX];
@ -90,6 +97,8 @@ vmbus_br_sysctl_state_bin(SYSCTL_HANDLER_ARGS)
state[BR_STATE_RIDX] = rindex;
state[BR_STATE_WIDX] = windex;
state[BR_STATE_IMSK] = br->vbr_imask;
state[BR_STATE_PSSZ] = br->vbr_psndsz;
state[BR_STATE_FVAL] = br->vbr_fvalue;
state[BR_STATE_WSPC] = wavail;
state[BR_STATE_RSPC] = br->vbr_dsize - wavail;
@ -139,6 +148,12 @@ vmbus_rxbr_avail(const struct vmbus_rxbr *rbr)
VMBUS_BR_WAVAIL(rindex, windex, rbr->rxbr_dsize));
}
uint32_t
vmbus_rxbr_available(const struct vmbus_rxbr *rbr)
{
return (vmbus_rxbr_avail(rbr));
}
uint32_t
vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr)
{
@ -178,6 +193,40 @@ vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen)
vmbus_br_setup(&rbr->rxbr, buf, blen);
}
static __inline boolean_t
vmbus_rxbr_need_signal(const struct vmbus_rxbr *rbr, uint32_t bytes_read)
{
uint32_t pending_snd_sz, canwrite_size;
/* No need to signal if host doesn't want us to */
if (!rbr->rxbr_fpsndsz)
return false;
mb();
pending_snd_sz = rbr->rxbr_psndsz;
/* No need to signal if host sets pending_snd_sz to 0 */
if (!pending_snd_sz)
return false;
mb();
canwrite_size = rbr->rxbr_dsize - vmbus_rxbr_avail(rbr);
/* No need to signal if br already has enough space before read */
if (canwrite_size - bytes_read > pending_snd_sz)
return false;
/*
* No need to signal if still doesn't have enough space
* asked by host
*/
if (canwrite_size <= pending_snd_sz)
return false;
return true;
}
void
vmbus_txbr_init(struct vmbus_txbr *tbr)
{
@ -194,6 +243,23 @@ void
vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen)
{
vmbus_br_setup(&tbr->txbr, buf, blen);
/* Set feature bit enabling flow control */
tbr->txbr_fpsndsz = 1;
}
uint32_t
vmbus_txbr_get_imask(const struct vmbus_txbr *tbr)
{
mb();
return(tbr->txbr_imask);
}
void
vmbus_txbr_set_pending_snd_sz(struct vmbus_txbr *tbr, uint32_t size)
{
tbr->txbr_psndsz = size;
}
/*
@ -260,6 +326,116 @@ vmbus_txbr_copyto(const struct vmbus_txbr *tbr, uint32_t windex,
return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
}
static __inline uint32_t
vmbus_txbr_copyto_call(const struct vmbus_txbr *tbr, uint32_t windex,
uint32_t cplen, vmbus_br_copy_callback_t cb, void *cbarg, int *ret)
{
uint8_t *br_data = tbr->txbr_data;
uint32_t br_dsize = tbr->txbr_dsize;
int err = 0;
if (cplen > br_dsize - windex) {
uint32_t fraglen = br_dsize - windex;
/* Wrap-around detected */
err = cb((void *)(br_data + windex), fraglen, cbarg);
if (!err)
err = cb((void *)br_data, cplen - fraglen, cbarg);
} else {
err = cb((void *)(br_data + windex), cplen, cbarg);
}
*ret = err;
return VMBUS_BR_IDXINC(windex, cplen, br_dsize);
}
uint32_t
vmbus_txbr_available(const struct vmbus_txbr *tbr)
{
return (vmbus_txbr_avail(tbr));
}
/*
* NOTE:
* Not holding lock when calling user provided callback routine.
* Caller should hold lock to serialize ring buffer accesses.
*/
int
vmbus_txbr_write_call(struct vmbus_txbr *tbr,
const struct iovec iov[], int iovlen,
vmbus_br_copy_callback_t cb, void *cbarg,
boolean_t *need_sig)
{
uint32_t old_windex, windex, total;
uint64_t save_windex;
int i;
int cb_ret = 0;
total = 0;
for (i = 0; i < iovlen; i++)
total += iov[i].iov_len;
total += sizeof(save_windex);
/*
* NOTE:
* If this write is going to make br_windex same as br_rindex,
* i.e. the available space for write is same as the write size,
* we can't do it then, since br_windex == br_rindex means that
* the bufring is empty.
*/
if (vmbus_txbr_avail(tbr) <= total) {
return (EAGAIN);
}
/* Save br_windex for later use */
old_windex = tbr->txbr_windex;
/*
* Copy the scattered channel packet to the TX bufring.
*/
windex = old_windex;
for (i = 0; i < iovlen; i++) {
if (iov[i].iov_base != NULL) {
windex = vmbus_txbr_copyto(tbr, windex,
iov[i].iov_base, iov[i].iov_len);
} else if (cb != NULL) {
windex = vmbus_txbr_copyto_call(tbr, windex,
iov[i].iov_len, cb, cbarg, &cb_ret);
/*
* If callback fails, return without updating
* write index.
*/
if (cb_ret)
return (cb_ret);
}
}
mtx_lock_spin(&tbr->txbr_lock);
/*
* Set the offset of the current channel packet.
*/
save_windex = ((uint64_t)old_windex) << 32;
windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
sizeof(save_windex));
/*
* Update the write index _after_ the channel packet
* is copied.
*/
__compiler_membar();
tbr->txbr_windex = windex;
mtx_unlock_spin(&tbr->txbr_lock);
if (need_sig)
*need_sig = vmbus_txbr_need_signal(tbr, old_windex);
return (0);
}
/*
* Write scattered channel packet to TX bufring.
*
@ -346,6 +522,27 @@ vmbus_rxbr_copyfrom(const struct vmbus_rxbr *rbr, uint32_t rindex,
return VMBUS_BR_IDXINC(rindex, cplen, br_dsize);
}
static __inline uint32_t
vmbus_rxbr_copyfrom_call(const struct vmbus_rxbr *rbr, uint32_t rindex,
int cplen, vmbus_br_copy_callback_t cb, void *cbarg)
{
uint8_t *br_data = rbr->rxbr_data;
uint32_t br_dsize = rbr->rxbr_dsize;
int error = 0;
if (cplen > br_dsize - rindex) {
uint32_t fraglen = br_dsize - rindex;
/* Wrap-around detected. */
error = cb((void *)(br_data + rindex), fraglen, cbarg);
if (!error)
error = cb((void *)br_data, cplen - fraglen, cbarg);
} else {
error = cb((void *)(br_data + rindex), cplen, cbarg);
}
return (error);
}
int
vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen)
{
@ -366,6 +563,121 @@ vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen)
return (0);
}
/*
* NOTE:
* We only hold spin lock to check the ring buffer space. It is
* released before calling user provided callback routine.
* Caller should hold lock to serialize ring buffer accesses.
*/
int
vmbus_rxbr_peek_call(struct vmbus_rxbr *rbr, int dlen, uint32_t skip,
vmbus_br_copy_callback_t cb, void *cbarg)
{
uint32_t rindex, br_dsize0 = rbr->rxbr_dsize;
int ret;
mtx_lock_spin(&rbr->rxbr_lock);
/*
* The requested data + skip and the 64bits channel packet
* offset should be there at least.
*/
if (vmbus_rxbr_avail(rbr) < skip + dlen + sizeof(uint64_t)) {
mtx_unlock_spin(&rbr->rxbr_lock);
return (EAGAIN);
}
rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex, skip, br_dsize0);
mtx_unlock_spin(&rbr->rxbr_lock);
ret = vmbus_rxbr_copyfrom_call(rbr, rindex, dlen, cb, cbarg);
return (ret);
}
/*
* NOTE:
* We assume idx_adv == sizeof(channel packet).
*/
int
vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data, int dlen,
uint32_t idx_adv, boolean_t *need_sig)
{
uint32_t rindex, br_dsize = rbr->rxbr_dsize;
mtx_lock_spin(&rbr->rxbr_lock);
/*
* Make sure it has enough data to read.
*/
if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t) + dlen) {
mtx_unlock_spin(&rbr->rxbr_lock);
return (EAGAIN);
}
if (idx_adv > 0) {
/*
* Advance the read index first, including the channel's 64bit
* previous write offset.
*/
rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
idx_adv + sizeof(uint64_t), br_dsize);
__compiler_membar();
rbr->rxbr_rindex = rindex;
}
vmbus_rxbr_copyfrom(rbr, rbr->rxbr_rindex, data, dlen);
mtx_unlock_spin(&rbr->rxbr_lock);
if (need_sig) {
if (idx_adv > 0)
*need_sig =
vmbus_rxbr_need_signal(rbr, idx_adv +
sizeof(uint64_t));
else
*need_sig = false;
}
return (0);
}
/*
* NOTE:
* Just update the RX rb index.
*/
int
vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv,
boolean_t *need_sig)
{
uint32_t rindex, br_dsize = rbr->rxbr_dsize;
mtx_lock_spin(&rbr->rxbr_lock);
/*
* Make sure it has enough space to advance.
*/
if (vmbus_rxbr_avail(rbr) < idx_adv + sizeof(uint64_t)) {
mtx_unlock_spin(&rbr->rxbr_lock);
return (EAGAIN);
}
/*
* Advance the read index, including the channel's 64bit
* previous write offset.
*/
rindex = VMBUS_BR_IDXINC(rbr->rxbr_rindex,
idx_adv + sizeof(uint64_t), br_dsize);
__compiler_membar();
rbr->rxbr_rindex = rindex;
mtx_unlock_spin(&rbr->rxbr_lock);
if (need_sig) {
*need_sig =
vmbus_rxbr_need_signal(rbr, idx_adv + sizeof(uint64_t));
}
return (0);
}
/*
* NOTE:
* We assume (dlen + skip) == sizeof(channel packet).

View File

@ -44,6 +44,10 @@ struct vmbus_br {
#define vbr_windex vbr->br_windex
#define vbr_rindex vbr->br_rindex
#define vbr_imask vbr->br_imask
#define vbr_psndsz vbr->br_pending_snd_sz
#define vbr_fpsndsz vbr->br_feature_bits.feat_pending_snd_sz
#define vbr_fvalue vbr->br_feature_bits.value
#define vbr_intrcnt vbr->br_g2h_intr_cnt
#define vbr_data vbr->br_data
struct vmbus_rxbr {
@ -54,6 +58,10 @@ struct vmbus_rxbr {
#define rxbr_windex rxbr.vbr_windex
#define rxbr_rindex rxbr.vbr_rindex
#define rxbr_imask rxbr.vbr_imask
#define rxbr_psndsz rxbr.vbr_psndsz
#define rxbr_fpsndsz rxbr.vbr_fpsndsz
#define rxbr_fvalue rxbr.vbr_fvalue
#define rxbr_intrcnt rxbr.vbr_intrcnt
#define rxbr_data rxbr.vbr_data
#define rxbr_dsize rxbr.vbr_dsize
@ -65,6 +73,10 @@ struct vmbus_txbr {
#define txbr_windex txbr.vbr_windex
#define txbr_rindex txbr.vbr_rindex
#define txbr_imask txbr.vbr_imask
#define txbr_psndsz txbr.vbr_psndsz
#define txbr_fpsndsz txbr.vbr_fpsndsz
#define txbr_fvalue txbr.vbr_fvalue
#define txbr_intrcnt txbr.vbr_intrcnt
#define txbr_data txbr.vbr_data
#define txbr_dsize txbr.vbr_dsize
@ -118,13 +130,28 @@ void vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen);
int vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen);
int vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen,
uint32_t skip);
int vmbus_rxbr_idxadv(struct vmbus_rxbr *rbr, uint32_t idx_adv,
boolean_t *need_sig);
int vmbus_rxbr_idxadv_peek(struct vmbus_rxbr *rbr, void *data,
int dlen, uint32_t idx_adv, boolean_t *need_sig);
int vmbus_rxbr_peek_call(struct vmbus_rxbr *rbr, int dlen,
uint32_t skip, vmbus_br_copy_callback_t cb, void *cbarg);
void vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr);
uint32_t vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr);
uint32_t vmbus_rxbr_available(const struct vmbus_rxbr *rbr);
void vmbus_txbr_init(struct vmbus_txbr *tbr);
void vmbus_txbr_deinit(struct vmbus_txbr *tbr);
void vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen);
int vmbus_txbr_write(struct vmbus_txbr *tbr,
const struct iovec iov[], int iovlen, boolean_t *need_sig);
int vmbus_txbr_write_call(struct vmbus_txbr *tbr,
const struct iovec iov[], int iovlen,
vmbus_br_copy_callback_t cb, void *cbarg,
boolean_t *need_sig);
uint32_t vmbus_txbr_available(const struct vmbus_txbr *tbr);
uint32_t vmbus_txbr_get_imask(const struct vmbus_txbr *tbr);
void vmbus_txbr_set_pending_snd_sz(struct vmbus_txbr *tbr,
uint32_t size);
#endif /* _VMBUS_BRVAR_H_ */

View File

@ -127,10 +127,11 @@ vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
};
/*
* Notify host that there are data pending on our TX bufring.
* Notify host that there are data pending on our TX bufring or
* we have put some data on the TX bufring.
*/
static __inline void
vmbus_chan_signal_tx(const struct vmbus_channel *chan)
vmbus_chan_signal(const struct vmbus_channel *chan)
{
atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask);
if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
@ -139,6 +140,22 @@ vmbus_chan_signal_tx(const struct vmbus_channel *chan)
hypercall_signal_event(chan->ch_monprm_dma.hv_paddr);
}
static __inline void
vmbus_chan_signal_tx(struct vmbus_channel *chan)
{
chan->ch_txbr.txbr_intrcnt ++;
vmbus_chan_signal(chan);
}
static __inline void
vmbus_chan_signal_rx(struct vmbus_channel *chan)
{
chan->ch_rxbr.rxbr_intrcnt ++;
vmbus_chan_signal(chan);
}
static void
vmbus_chan_ins_prilist(struct vmbus_softc *sc, struct vmbus_channel *chan)
{
@ -1012,6 +1029,59 @@ vmbus_chan_intr_drain(struct vmbus_channel *chan)
taskqueue_drain(chan->ch_tq, &chan->ch_task);
}
uint32_t
vmbus_chan_write_available(struct vmbus_channel *chan)
{
return (vmbus_txbr_available(&chan->ch_txbr));
}
bool
vmbus_chan_write_signal(struct vmbus_channel *chan,
int32_t min_signal_size)
{
if (min_signal_size >= 0 &&
vmbus_chan_write_available(chan) > min_signal_size) {
return false;
}
if (!vmbus_txbr_get_imask(&chan->ch_txbr)) {
/* txbr imask is not set, signal the reader */
vmbus_chan_signal_tx(chan);
return true;
}
return false;
}
void
vmbus_chan_set_pending_send_size(struct vmbus_channel *chan,
uint32_t size)
{
if (chan)
vmbus_txbr_set_pending_snd_sz(&chan->ch_txbr, size);
}
int
vmbus_chan_iov_send(struct vmbus_channel *chan,
const struct iovec iov[], int iovlen,
vmbus_br_copy_callback_t cb, void *cbarg)
{
int error;
boolean_t send_evt;
if (iovlen == 0)
return (0);
error = vmbus_txbr_write_call(&chan->ch_txbr, iov, iovlen,
cb, cbarg, &send_evt);
if (!error && send_evt) {
vmbus_chan_signal_tx(chan);
}
return error;
}
int
vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags,
void *data, int dlen, uint64_t xactid)
@ -1211,6 +1281,78 @@ vmbus_chan_recv_pkt(struct vmbus_channel *chan,
return (0);
}
uint32_t
vmbus_chan_read_available(struct vmbus_channel *chan)
{
return (vmbus_rxbr_available(&chan->ch_rxbr));
}
/*
* This routine does:
* - Advance the channel read index for 'advance' bytes
* - Copy data_len bytes in to the buffer pointed by 'data'
* Return 0 if operation succeed. EAGAIN if operations if failed.
* If failed, the buffer pointed by 'data' is intact, and the
* channel read index is not advanced at all.
*/
int
vmbus_chan_recv_peek(struct vmbus_channel *chan,
void *data, int data_len, uint32_t advance)
{
int error;
boolean_t sig_event;
if (data == NULL || data_len <= 0)
return (EINVAL);
error = vmbus_rxbr_idxadv_peek(&chan->ch_rxbr,
data, data_len, advance, &sig_event);
if (!error && sig_event) {
vmbus_chan_signal_rx(chan);
}
return (error);
}
/*
* This routine does:
* - Advance the channel read index for 'advance' bytes
*/
int
vmbus_chan_recv_idxadv(struct vmbus_channel *chan, uint32_t advance)
{
int error;
boolean_t sig_event;
if (advance == 0)
return (EINVAL);
error = vmbus_rxbr_idxadv(&chan->ch_rxbr, advance, &sig_event);
if (!error && sig_event) {
vmbus_chan_signal_rx(chan);
}
return (error);
}
/*
* Caller should hold its own lock to serialize the ring buffer
* copy.
*/
int
vmbus_chan_recv_peek_call(struct vmbus_channel *chan, int data_len,
uint32_t skip, vmbus_br_copy_callback_t cb, void *cbarg)
{
if (!chan || data_len <= 0 || cb == NULL)
return (EINVAL);
return (vmbus_rxbr_peek_call(&chan->ch_rxbr, data_len, skip,
cb, cbarg));
}
static void
vmbus_chan_task(void *xchan, int pending __unused)
{
@ -1732,6 +1874,25 @@ vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
}
if (offer->chm_chflags & VMBUS_CHAN_TLNPI_PROVIDER_OFFER) {
/* This is HyperV socket channel */
chan->ch_is_hvs = true;
/* The first byte != 0 means the host initiated connection. */
chan->ch_hvs_conn_from_host =
offer->chm_udata.pipe.user_def[0];
if (bootverbose) {
device_printf(sc->vmbus_dev,
"chan%u is hyperv socket channel "
"connected %s host\n",
chan->ch_id,
(chan->ch_hvs_conn_from_host != 0) ?
"from" : "to");
}
} else {
chan->ch_is_hvs = false;
}
/*
* Setup event flag.
*/
@ -2047,8 +2208,31 @@ vmbus_chan_is_primary(const struct vmbus_channel *chan)
return false;
}
const struct hyperv_guid *
vmbus_chan_guid_inst(const struct vmbus_channel *chan)
bool
vmbus_chan_is_hvs(const struct vmbus_channel *chan)
{
return chan->ch_is_hvs;
}
bool
vmbus_chan_is_hvs_conn_from_host(const struct vmbus_channel *chan)
{
KASSERT(vmbus_chan_is_hvs(chan) == true,
("Not a HyperV Socket channel %u", chan->ch_id));
if (chan->ch_hvs_conn_from_host != 0)
return true;
else
return false;
}
struct hyperv_guid *
vmbus_chan_guid_type(struct vmbus_channel *chan)
{
return &chan->ch_guid_type;
}
struct hyperv_guid *
vmbus_chan_guid_inst(struct vmbus_channel *chan)
{
return &chan->ch_guid_inst;
}

View File

@ -149,6 +149,12 @@ struct vmbus_channel {
int ch_refs;
/*
* These are for HyperV socket channel only
*/
bool ch_is_hvs;
uint8_t ch_hvs_conn_from_host;
struct sysctl_ctx_list ch_sysctl_ctx;
} __aligned(CACHE_LINE_SIZE);

View File

@ -127,7 +127,54 @@ struct vmbus_bufring {
*/
volatile uint32_t br_imask;
uint8_t br_rsvd[4084];
/*
* WS2012/Win8 and later versions of Hyper-V implement interrupt
* driven flow management. The feature bit feat_pending_snd_sz
* is set by the host on the host->guest buffer ring, and by the
* guest on the guest->host buffer ring.
*
* The meaning of the feature bit is a bit complex in that it has
* semantics that apply to both buffer rings. If the guest sets
* the feature bit in the guest->host buffer ring, the guest is
* telling the host that:
* 1) It will set the br_pending_snd_sz field in the guest->host buffer
* ring when it is waiting for space to become available, and
* 2) It will read the pending_send_sz field in the host->guest
* ring buffer and interrupt the host when it frees enough space
*
* Similarly, if the host sets the feature bit in the host->guest
* ring buffer, the host is telling the guest that:
* 1) It will set the pending_send_sz field in the host->guest ring
* buffer when it is waiting for space to become available, and
* 2) It will read the pending_send_sz field in the guest->host
* ring buffer and interrupt the guest when it frees enough space
*
* If either the guest or host does not set the feature bit that it
* owns, that guest or host must do polling if it encounters a full
* ring buffer, and not signal the other end with an interrupt.
*/
volatile uint32_t br_pending_snd_sz;
uint32_t br_rsvd1[12];
union {
struct {
uint32_t feat_pending_snd_sz:1;
};
uint32_t value;
} br_feature_bits;
/* Padding to PAGE_SIZE */
uint8_t br_rsvd2[4020];
/*
* Total guest to host interrupt count
* - For rx ring, this counts the guest signaling host when this rx
* ring changing from full to not full.
*
* - For tx ring, this counts the guest signaling host when this tx
* ring changing from empty to non empty.
*/
uint64_t br_g2h_intr_cnt;
uint8_t br_data[];
} __packed;
CTASSERT(sizeof(struct vmbus_bufring) == PAGE_SIZE);
@ -196,7 +243,14 @@ struct vmbus_chanpkt_prplist {
#define VMBUS_CHANMSG_TYPE_CONNECT 14 /* REQ */
#define VMBUS_CHANMSG_TYPE_CONNECT_RESP 15 /* RESP */
#define VMBUS_CHANMSG_TYPE_DISCONNECT 16 /* REQ */
#define VMBUS_CHANMSG_TYPE_MAX 22
#define VMBUS_CHANMSG_TYPE_17 17
#define VMBUS_CHANMSG_TYPE_18 18
#define VMBUS_CHANMSG_TYPE_19 19
#define VMBUS_CHANMSG_TYPE_20 20
#define VMBUS_CHANMSG_TYPE_TL_CONN 21 /* REQ */
#define VMBUS_CHANMSG_TYPE_22 22
#define VMBUS_CHANMSG_TYPE_TL_RESULT 23 /* RESP */
#define VMBUS_CHANMSG_TYPE_MAX 24
struct vmbus_chanmsg_hdr {
uint32_t chm_type; /* VMBUS_CHANMSG_TYPE_ */
@ -229,6 +283,15 @@ struct vmbus_chanmsg_disconnect {
struct vmbus_chanmsg_hdr chm_hdr;
} __packed;
/* VMBUS_CHANMSG_TYPE_TL_CONN */
/* Hyper-V socket guest connect request */
struct vmbus_chanmsg_tl_connect {
struct vmbus_chanmsg_hdr chm_hdr;
struct hyperv_guid guest_endpoint_id;
struct hyperv_guid host_service_id;
} __packed;
/* VMBUS_CHANMSG_TYPE_CHOPEN */
struct vmbus_chanmsg_chopen {
struct vmbus_chanmsg_hdr chm_hdr;
@ -310,6 +373,12 @@ struct vmbus_chanmsg_chrescind {
uint32_t chm_chanid;
} __packed;
/* Size of the user defined data buffer for non-pipe offers */
#define VMBUS_CHANMSG_CHOFFER_UDATA_SIZE 120
/* Size of the user defined data buffer for pipe offers. */
#define VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE 116
/* VMBUS_CHANMSG_TYPE_CHOFFER */
struct vmbus_chanmsg_choffer {
struct vmbus_chanmsg_hdr chm_hdr;
@ -320,7 +389,26 @@ struct vmbus_chanmsg_choffer {
uint32_t chm_svrctx_sz;
uint16_t chm_chflags;
uint16_t chm_mmio_sz; /* unit: MB */
uint8_t chm_udata[120];
union {
/* Non-pipes */
struct {
uint8_t user_def[VMBUS_CHANMSG_CHOFFER_UDATA_SIZE];
} std;
/*
* Pipes:
* For integrated pipe protocol, which is implemented on
* top of standard user-defined data. Pipe clients have
* VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE bytes left for
* their own user.
*/
struct {
uint32_t pipe_mode;
uint8_t
user_def[VMBUS_CHANMSG_CHOFFER_UDATA_PIPE_SIZE];
} pipe;
} chm_udata;
uint16_t chm_subidx;
uint16_t chm_rsvd;
uint32_t chm_chanid;
@ -331,6 +419,9 @@ struct vmbus_chanmsg_choffer {
} __packed;
CTASSERT(sizeof(struct vmbus_chanmsg_choffer) <= VMBUS_MSG_DSIZE_MAX);
/* Server Flag */
#define VMBUS_CHAN_TLNPI_PROVIDER_OFFER 0x2000
#define VMBUS_CHOFFER_FLAG1_HASMNF 0x01
#endif /* !_VMBUS_REG_H_ */

View File

@ -1,5 +1,5 @@
# $FreeBSD$
SUBDIR = vmbus netvsc storvsc utilities
SUBDIR = vmbus netvsc storvsc utilities hvsock
.include <bsd.subdir.mk>

View File

@ -0,0 +1,13 @@
# $FreeBSD$
.PATH: ${SRCTOP}/sys/dev/hyperv/hvsock
KMOD= hv_sock
SRCS= hv_sock.c
SRCS+= hv_sock.h
CFLAGS+= -I${SRCTOP}/sys/dev/hyperv/include \
-I${SRCTOP}/sys/dev/hyperv/vmbus \
-I${SRCTOP}/sys/dev/hyperv/hvsock
.include <bsd.kmod.mk>

View File

@ -265,7 +265,8 @@ struct accept_filter_arg {
#define AF_IEEE80211 37 /* IEEE 802.11 protocol */
#define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */
#define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */
#define AF_MAX 42
#define AF_HYPERV 43 /* HyperV sockets */
#define AF_MAX 43
/*
* When allocating a new AF_ constant, please only allocate
* even numbered constants for FreeBSD until 134 as odd numbered AF_
@ -273,7 +274,6 @@ struct accept_filter_arg {
*/
#define AF_VENDOR00 39
#define AF_VENDOR01 41
#define AF_VENDOR02 43
#define AF_VENDOR03 45
#define AF_VENDOR04 47
#define AF_VENDOR05 49