Import Hyper-V paravirtualized drivers from projects/hyperv

branch into head.

Approved by:	re@ (hrs)
Obtained from:	Microsoft, NetApp, and Citrix.
This commit is contained in:
Peter Grehan 2013-09-13 18:47:58 +00:00
commit ab7fb3bca7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=255524
28 changed files with 12800 additions and 0 deletions

View File

@ -340,3 +340,7 @@ device vtnet # VirtIO Ethernet device
device virtio_blk # VirtIO Block device
device virtio_scsi # VirtIO SCSI device
device virtio_balloon # VirtIO Memory Balloon device
# HyperV drivers
device hyperv # HyperV drivers

View File

@ -221,6 +221,18 @@ dev/hwpmc/hwpmc_uncore.c optional hwpmc
dev/hwpmc/hwpmc_piv.c optional hwpmc
dev/hwpmc/hwpmc_tsc.c optional hwpmc
dev/hwpmc/hwpmc_x86.c optional hwpmc
dev/hyperv/netvsc/hv_net_vsc.c optional hyperv
dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c optional hyperv
dev/hyperv/netvsc/hv_rndis_filter.c optional hyperv
dev/hyperv/stordisengage/hv_ata_pci_disengage.c optional hyperv
dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv
dev/hyperv/utilities/hv_util.c optional hyperv
dev/hyperv/vmbus/hv_channel.c optional hyperv
dev/hyperv/vmbus/hv_channel_mgmt.c optional hyperv
dev/hyperv/vmbus/hv_connection.c optional hyperv
dev/hyperv/vmbus/hv_hv.c optional hyperv
dev/hyperv/vmbus/hv_ring_buffer.c optional hyperv
dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c optional hyperv
dev/kbd/kbd.c optional atkbd | sc | ukbd
dev/lindev/full.c optional lindev
dev/lindev/lindev.c optional lindev

View File

@ -0,0 +1,796 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* HyperV definitions for messages that are sent between instances of the
* Channel Management Library in separate partitions, or in some cases,
* back to itself.
*/
#ifndef __HYPERV_H__
#define __HYPERV_H__
#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/queue.h>
#include <sys/malloc.h>
#include <sys/kthread.h>
#include <sys/taskqueue.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/sema.h>
#include <sys/mutex.h>
#include <sys/bus.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <amd64/include/xen/synch_bitops.h>
#include <amd64/include/atomic.h>
typedef uint8_t hv_bool_uint8_t;
#define HV_S_OK 0x00000000
#define HV_E_FAIL 0x80004005
#define HV_ERROR_NOT_SUPPORTED 0x80070032
#define HV_ERROR_MACHINE_LOCKED 0x800704F7
/*
* A revision number of vmbus that is used for ensuring both ends on a
* partition are using compatible versions.
*/
#define HV_VMBUS_REVISION_NUMBER 13
/*
* Make maximum size of pipe payload of 16K
*/
#define HV_MAX_PIPE_DATA_PAYLOAD (sizeof(BYTE) * 16384)
/*
* Define pipe_mode values
*/
#define HV_VMBUS_PIPE_TYPE_BYTE 0x00000000
#define HV_VMBUS_PIPE_TYPE_MESSAGE 0x00000004
/*
* The size of the user defined data buffer for non-pipe offers
*/
#define HV_MAX_USER_DEFINED_BYTES 120
/*
* The size of the user defined data buffer for pipe offers
*/
#define HV_MAX_PIPE_USER_DEFINED_BYTES 116
#define HV_MAX_PAGE_BUFFER_COUNT 16
#define HV_MAX_MULTIPAGE_BUFFER_COUNT 32
#define HV_ALIGN_UP(value, align) \
(((value) & (align-1)) ? \
(((value) + (align-1)) & ~(align-1) ) : (value))
#define HV_ALIGN_DOWN(value, align) ( (value) & ~(align-1) )
#define HV_NUM_PAGES_SPANNED(addr, len) \
((HV_ALIGN_UP(addr+len, PAGE_SIZE) - \
HV_ALIGN_DOWN(addr, PAGE_SIZE)) >> PAGE_SHIFT )
typedef struct hv_guid {
unsigned char data[16];
} __packed hv_guid;
/*
* At the center of the Channel Management library is
* the Channel Offer. This struct contains the
* fundamental information about an offer.
*/
typedef struct hv_vmbus_channel_offer {
hv_guid interface_type;
hv_guid interface_instance;
uint64_t interrupt_latency_in_100ns_units;
uint32_t interface_revision;
uint32_t server_context_area_size; /* in bytes */
uint16_t channel_flags;
uint16_t mmio_megabytes; /* in bytes * 1024 * 1024 */
union
{
/*
* Non-pipes: The user has HV_MAX_USER_DEFINED_BYTES bytes.
*/
struct {
uint8_t user_defined[HV_MAX_USER_DEFINED_BYTES];
} __packed standard;
/*
* Pipes: The following structure is an integrated pipe protocol, which
* is implemented on top of standard user-defined data. pipe
* clients have HV_MAX_PIPE_USER_DEFINED_BYTES left for their
* own use.
*/
struct {
uint32_t pipe_mode;
uint8_t user_defined[HV_MAX_PIPE_USER_DEFINED_BYTES];
} __packed pipe;
} u;
uint32_t padding;
} __packed hv_vmbus_channel_offer;
typedef uint32_t hv_gpadl_handle;
typedef struct {
uint16_t type;
uint16_t data_offset8;
uint16_t length8;
uint16_t flags;
uint64_t transaction_id;
} __packed hv_vm_packet_descriptor;
typedef uint32_t hv_previous_packet_offset;
typedef struct {
hv_previous_packet_offset previous_packet_start_offset;
hv_vm_packet_descriptor descriptor;
} __packed hv_vm_packet_header;
typedef struct {
uint32_t byte_count;
uint32_t byte_offset;
} __packed hv_vm_transfer_page;
typedef struct {
hv_vm_packet_descriptor d;
uint16_t transfer_page_set_id;
hv_bool_uint8_t sender_owns_set;
uint8_t reserved;
uint32_t range_count;
hv_vm_transfer_page ranges[1];
} __packed hv_vm_transfer_page_packet_header;
typedef struct {
hv_vm_packet_descriptor d;
uint32_t gpadl;
uint32_t reserved;
} __packed hv_vm_gpadl_packet_header;
typedef struct {
hv_vm_packet_descriptor d;
uint32_t gpadl;
uint16_t transfer_page_set_id;
uint16_t reserved;
} __packed hv_vm_add_remove_transfer_page_set;
/*
* This structure defines a range in guest
* physical space that can be made
* to look virtually contiguous.
*/
typedef struct {
uint32_t byte_count;
uint32_t byte_offset;
uint64_t pfn_array[0];
} __packed hv_gpa_range;
/*
* This is the format for an Establish Gpadl packet, which contains a handle
* by which this GPADL will be known and a set of GPA ranges associated with
* it. This can be converted to a MDL by the guest OS. If there are multiple
* GPA ranges, then the resulting MDL will be "chained," representing multiple
* VA ranges.
*/
typedef struct {
hv_vm_packet_descriptor d;
uint32_t gpadl;
uint32_t range_count;
hv_gpa_range range[1];
} __packed hv_vm_establish_gpadl;
/*
* This is the format for a Teardown Gpadl packet, which indicates that the
* GPADL handle in the Establish Gpadl packet will never be referenced again.
*/
typedef struct {
hv_vm_packet_descriptor d;
uint32_t gpadl;
/* for alignment to a 8-byte boundary */
uint32_t reserved;
} __packed hv_vm_teardown_gpadl;
/*
* This is the format for a GPA-Direct packet, which contains a set of GPA
* ranges, in addition to commands and/or data.
*/
typedef struct {
hv_vm_packet_descriptor d;
uint32_t reserved;
uint32_t range_count;
hv_gpa_range range[1];
} __packed hv_vm_data_gpa_direct;
/*
* This is the format for a Additional data Packet.
*/
typedef struct {
hv_vm_packet_descriptor d;
uint64_t total_bytes;
uint32_t byte_offset;
uint32_t byte_count;
uint8_t data[1];
} __packed hv_vm_additional_data;
typedef union {
hv_vm_packet_descriptor simple_header;
hv_vm_transfer_page_packet_header transfer_page_header;
hv_vm_gpadl_packet_header gpadl_header;
hv_vm_add_remove_transfer_page_set add_remove_transfer_page_header;
hv_vm_establish_gpadl establish_gpadl_header;
hv_vm_teardown_gpadl teardown_gpadl_header;
hv_vm_data_gpa_direct data_gpa_direct_header;
} __packed hv_vm_packet_largest_possible_header;
typedef enum {
HV_VMBUS_PACKET_TYPE_INVALID = 0x0,
HV_VMBUS_PACKET_TYPES_SYNCH = 0x1,
HV_VMBUS_PACKET_TYPE_ADD_TRANSFER_PAGE_SET = 0x2,
HV_VMBUS_PACKET_TYPE_REMOVE_TRANSFER_PAGE_SET = 0x3,
HV_VMBUS_PACKET_TYPE_ESTABLISH_GPADL = 0x4,
HV_VMBUS_PACKET_TYPE_TEAR_DOWN_GPADL = 0x5,
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND = 0x6,
HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES = 0x7,
HV_VMBUS_PACKET_TYPE_DATA_USING_GPADL = 0x8,
HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT = 0x9,
HV_VMBUS_PACKET_TYPE_CANCEL_REQUEST = 0xa,
HV_VMBUS_PACKET_TYPE_COMPLETION = 0xb,
HV_VMBUS_PACKET_TYPE_DATA_USING_ADDITIONAL_PACKETS = 0xc,
HV_VMBUS_PACKET_TYPE_ADDITIONAL_DATA = 0xd
} hv_vmbus_packet_type;
#define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED 1
/*
* Version 1 messages
*/
typedef enum {
HV_CHANNEL_MESSAGE_INVALID = 0,
HV_CHANNEL_MESSAGE_OFFER_CHANNEL = 1,
HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER = 2,
HV_CHANNEL_MESSAGE_REQUEST_OFFERS = 3,
HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED = 4,
HV_CHANNEL_MESSAGE_OPEN_CHANNEL = 5,
HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT = 6,
HV_CHANNEL_MESSAGE_CLOSE_CHANNEL = 7,
HV_CHANNEL_MESSAGEL_GPADL_HEADER = 8,
HV_CHANNEL_MESSAGE_GPADL_BODY = 9,
HV_CHANNEL_MESSAGE_GPADL_CREATED = 10,
HV_CHANNEL_MESSAGE_GPADL_TEARDOWN = 11,
HV_CHANNEL_MESSAGE_GPADL_TORNDOWN = 12,
HV_CHANNEL_MESSAGE_REL_ID_RELEASED = 13,
HV_CHANNEL_MESSAGE_INITIATED_CONTACT = 14,
HV_CHANNEL_MESSAGE_VERSION_RESPONSE = 15,
HV_CHANNEL_MESSAGE_UNLOAD = 16,
#ifdef HV_VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD
HV_CHANNEL_MESSAGE_VIEW_RANGE_ADD = 17,
HV_CHANNEL_MESSAGE_VIEW_RANGE_REMOVE = 18,
#endif
HV_CHANNEL_MESSAGE_COUNT
} hv_vmbus_channel_msg_type;
typedef struct {
hv_vmbus_channel_msg_type message_type;
uint32_t padding;
} __packed hv_vmbus_channel_msg_header;
/*
* Query VMBus Version parameters
*/
typedef struct {
hv_vmbus_channel_msg_header header;
uint32_t version;
} __packed hv_vmbus_channel_query_vmbus_version;
/*
* VMBus Version Supported parameters
*/
typedef struct {
hv_vmbus_channel_msg_header header;
hv_bool_uint8_t version_supported;
} __packed hv_vmbus_channel_version_supported;
/*
* Channel Offer parameters
*/
typedef struct {
hv_vmbus_channel_msg_header header;
hv_vmbus_channel_offer offer;
uint32_t child_rel_id;
uint8_t monitor_id;
hv_bool_uint8_t monitor_allocated;
} __packed hv_vmbus_channel_offer_channel;
/*
* Rescind Offer parameters
*/
typedef struct
{
hv_vmbus_channel_msg_header header;
uint32_t child_rel_id;
} __packed hv_vmbus_channel_rescind_offer;
/*
* Request Offer -- no parameters, SynIC message contains the partition ID
*
* Set Snoop -- no parameters, SynIC message contains the partition ID
*
* Clear Snoop -- no parameters, SynIC message contains the partition ID
*
* All Offers Delivered -- no parameters, SynIC message contains the
* partition ID
*
* Flush Client -- no parameters, SynIC message contains the partition ID
*/
/*
* Open Channel parameters
*/
typedef struct
{
hv_vmbus_channel_msg_header header;
/*
* Identifies the specific VMBus channel that is being opened.
*/
uint32_t child_rel_id;
/*
* ID making a particular open request at a channel offer unique.
*/
uint32_t open_id;
/*
* GPADL for the channel's ring buffer.
*/
hv_gpadl_handle ring_buffer_gpadl_handle;
/*
* GPADL for the channel's server context save area.
*/
hv_gpadl_handle server_context_area_gpadl_handle;
/*
* The upstream ring buffer begins at offset zero in the memory described
* by ring_buffer_gpadl_handle. The downstream ring buffer follows it at
* this offset (in pages).
*/
uint32_t downstream_ring_buffer_page_offset;
/*
* User-specific data to be passed along to the server endpoint.
*/
uint8_t user_data[HV_MAX_USER_DEFINED_BYTES];
} __packed hv_vmbus_channel_open_channel;
typedef uint32_t hv_nt_status;
/*
* Open Channel Result parameters
*/
typedef struct
{
hv_vmbus_channel_msg_header header;
uint32_t child_rel_id;
uint32_t open_id;
hv_nt_status status;
} __packed hv_vmbus_channel_open_result;
/*
* Close channel parameters
*/
typedef struct
{
hv_vmbus_channel_msg_header header;
uint32_t child_rel_id;
} __packed hv_vmbus_channel_close_channel;
/*
* Channel Message GPADL
*/
#define HV_GPADL_TYPE_RING_BUFFER 1
#define HV_GPADL_TYPE_SERVER_SAVE_AREA 2
#define HV_GPADL_TYPE_TRANSACTION 8
/*
* The number of PFNs in a GPADL message is defined by the number of pages
* that would be spanned by byte_count and byte_offset. If the implied number
* of PFNs won't fit in this packet, there will be a follow-up packet that
* contains more
*/
typedef struct {
hv_vmbus_channel_msg_header header;
uint32_t child_rel_id;
uint32_t gpadl;
uint16_t range_buf_len;
uint16_t range_count;
hv_gpa_range range[0];
} __packed hv_vmbus_channel_gpadl_header;
/*
* This is the follow-up packet that contains more PFNs
*/
typedef struct {
hv_vmbus_channel_msg_header header;
uint32_t message_number;
uint32_t gpadl;
uint64_t pfn[0];
} __packed hv_vmbus_channel_gpadl_body;
typedef struct {
hv_vmbus_channel_msg_header header;
uint32_t child_rel_id;
uint32_t gpadl;
uint32_t creation_status;
} __packed hv_vmbus_channel_gpadl_created;
typedef struct {
hv_vmbus_channel_msg_header header;
uint32_t child_rel_id;
uint32_t gpadl;
} __packed hv_vmbus_channel_gpadl_teardown;
typedef struct {
hv_vmbus_channel_msg_header header;
uint32_t gpadl;
} __packed hv_vmbus_channel_gpadl_torndown;
typedef struct {
hv_vmbus_channel_msg_header header;
uint32_t child_rel_id;
} __packed hv_vmbus_channel_relid_released;
typedef struct {
hv_vmbus_channel_msg_header header;
uint32_t vmbus_version_requested;
uint32_t padding2;
uint64_t interrupt_page;
uint64_t monitor_page_1;
uint64_t monitor_page_2;
} __packed hv_vmbus_channel_initiate_contact;
typedef struct {
hv_vmbus_channel_msg_header header;
hv_bool_uint8_t version_supported;
} __packed hv_vmbus_channel_version_response;
typedef hv_vmbus_channel_msg_header hv_vmbus_channel_unload;
#define HW_MACADDR_LEN 6
/*
* Fixme: Added to quiet "typeof" errors involving hv_vmbus.h when
* the including C file was compiled with "-std=c99".
*/
#ifndef typeof
#define typeof __typeof
#endif
#ifndef NULL
#define NULL (void *)0
#endif
typedef void *hv_vmbus_handle;
#ifndef CONTAINING_RECORD
#define CONTAINING_RECORD(address, type, field) ((type *)( \
(uint8_t *)(address) - \
(uint8_t *)(&((type *)0)->field)))
#endif /* CONTAINING_RECORD */
#define container_of(ptr, type, member) ({ \
__typeof__( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})
enum {
HV_VMBUS_IVAR_TYPE,
HV_VMBUS_IVAR_INSTANCE,
HV_VMBUS_IVAR_NODE,
HV_VMBUS_IVAR_DEVCTX
};
#define HV_VMBUS_ACCESSOR(var, ivar, type) \
__BUS_ACCESSOR(vmbus, var, HV_VMBUS, ivar, type)
HV_VMBUS_ACCESSOR(type, TYPE, const char *)
HV_VMBUS_ACCESSOR(devctx, DEVCTX, struct hv_device *)
/*
* Common defines for Hyper-V ICs
*/
#define HV_ICMSGTYPE_NEGOTIATE 0
#define HV_ICMSGTYPE_HEARTBEAT 1
#define HV_ICMSGTYPE_KVPEXCHANGE 2
#define HV_ICMSGTYPE_SHUTDOWN 3
#define HV_ICMSGTYPE_TIMESYNC 4
#define HV_ICMSGTYPE_VSS 5
#define HV_ICMSGHDRFLAG_TRANSACTION 1
#define HV_ICMSGHDRFLAG_REQUEST 2
#define HV_ICMSGHDRFLAG_RESPONSE 4
typedef struct hv_vmbus_pipe_hdr {
uint32_t flags;
uint32_t msgsize;
} __packed hv_vmbus_pipe_hdr;
typedef struct hv_vmbus_ic_version {
uint16_t major;
uint16_t minor;
} __packed hv_vmbus_ic_version;
typedef struct hv_vmbus_icmsg_hdr {
hv_vmbus_ic_version icverframe;
uint16_t icmsgtype;
hv_vmbus_ic_version icvermsg;
uint16_t icmsgsize;
uint32_t status;
uint8_t ictransaction_id;
uint8_t icflags;
uint8_t reserved[2];
} __packed hv_vmbus_icmsg_hdr;
typedef struct hv_vmbus_icmsg_negotiate {
uint16_t icframe_vercnt;
uint16_t icmsg_vercnt;
uint32_t reserved;
hv_vmbus_ic_version icversion_data[1]; /* any size array */
} __packed hv_vmbus_icmsg_negotiate;
typedef struct hv_vmbus_shutdown_msg_data {
uint32_t reason_code;
uint32_t timeout_seconds;
uint32_t flags;
uint8_t display_message[2048];
} __packed hv_vmbus_shutdown_msg_data;
typedef struct hv_vmbus_heartbeat_msg_data {
uint64_t seq_num;
uint32_t reserved[8];
} __packed hv_vmbus_heartbeat_msg_data;
typedef struct {
/*
* offset in bytes from the start of ring data below
*/
volatile uint32_t write_index;
/*
* offset in bytes from the start of ring data below
*/
volatile uint32_t read_index;
/*
* NOTE: The interrupt_mask field is used only for channels, but
* vmbus connection also uses this data structure
*/
volatile uint32_t interrupt_mask;
/* pad it to PAGE_SIZE so that data starts on a page */
uint8_t reserved[4084];
/*
* WARNING: Ring data starts here + ring_data_start_offset
* !!! DO NOT place any fields below this !!!
*/
uint8_t buffer[0]; /* doubles as interrupt mask */
} __packed hv_vmbus_ring_buffer;
typedef struct {
int length;
int offset;
uint64_t pfn;
} __packed hv_vmbus_page_buffer;
typedef struct {
int length;
int offset;
uint64_t pfn_array[HV_MAX_MULTIPAGE_BUFFER_COUNT];
} __packed hv_vmbus_multipage_buffer;
typedef struct {
hv_vmbus_ring_buffer* ring_buffer;
uint32_t ring_size; /* Include the shared header */
struct mtx ring_lock;
uint32_t ring_data_size; /* ring_size */
uint32_t ring_data_start_offset;
} hv_vmbus_ring_buffer_info;
typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
typedef enum {
HV_CHANNEL_OFFER_STATE,
HV_CHANNEL_OPENING_STATE,
HV_CHANNEL_OPEN_STATE,
HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE,
} hv_vmbus_channel_state;
typedef struct hv_vmbus_channel {
TAILQ_ENTRY(hv_vmbus_channel) list_entry;
struct hv_device* device;
hv_vmbus_channel_state state;
hv_vmbus_channel_offer_channel offer_msg;
/*
* These are based on the offer_msg.monitor_id.
* Save it here for easy access.
*/
uint8_t monitor_group;
uint8_t monitor_bit;
uint32_t ring_buffer_gpadl_handle;
/*
* Allocated memory for ring buffer
*/
void* ring_buffer_pages;
uint32_t ring_buffer_page_count;
/*
* send to parent
*/
hv_vmbus_ring_buffer_info outbound;
/*
* receive from parent
*/
hv_vmbus_ring_buffer_info inbound;
struct mtx inbound_lock;
hv_vmbus_handle control_work_queue;
hv_vmbus_pfn_channel_callback on_channel_callback;
void* channel_callback_context;
} hv_vmbus_channel;
typedef struct hv_device {
hv_guid class_id;
hv_guid device_id;
device_t device;
hv_vmbus_channel* channel;
} hv_device;
int hv_vmbus_channel_recv_packet(
hv_vmbus_channel* channel,
void* buffer,
uint32_t buffer_len,
uint32_t* buffer_actual_len,
uint64_t* request_id);
int hv_vmbus_channel_recv_packet_raw(
hv_vmbus_channel* channel,
void* buffer,
uint32_t buffer_len,
uint32_t* buffer_actual_len,
uint64_t* request_id);
int hv_vmbus_channel_open(
hv_vmbus_channel* channel,
uint32_t send_ring_buffer_size,
uint32_t recv_ring_buffer_size,
void* user_data,
uint32_t user_data_len,
hv_vmbus_pfn_channel_callback
pfn_on_channel_callback,
void* context);
void hv_vmbus_channel_close(hv_vmbus_channel *channel);
int hv_vmbus_channel_send_packet(
hv_vmbus_channel* channel,
void* buffer,
uint32_t buffer_len,
uint64_t request_id,
hv_vmbus_packet_type type,
uint32_t flags);
int hv_vmbus_channel_send_packet_pagebuffer(
hv_vmbus_channel* channel,
hv_vmbus_page_buffer page_buffers[],
uint32_t page_count,
void* buffer,
uint32_t buffer_len,
uint64_t request_id);
int hv_vmbus_channel_send_packet_multipagebuffer(
hv_vmbus_channel* channel,
hv_vmbus_multipage_buffer* multi_page_buffer,
void* buffer,
uint32_t buffer_len,
uint64_t request_id);
int hv_vmbus_channel_establish_gpadl(
hv_vmbus_channel* channel,
/* must be phys and virt contiguous */
void* contig_buffer,
/* page-size multiple */
uint32_t size,
uint32_t* gpadl_handle);
int hv_vmbus_channel_teardown_gpdal(
hv_vmbus_channel* channel,
uint32_t gpadl_handle);
/*
* Work abstraction defines
*/
typedef struct hv_work_queue {
struct taskqueue* queue;
struct proc* proc;
struct sema* work_sema;
} hv_work_queue;
typedef struct hv_work_item {
struct task work;
void (*callback)(void *);
void* context;
hv_work_queue* wq;
} hv_work_item;
struct hv_work_queue* hv_work_queue_create(char* name);
void hv_work_queue_close(struct hv_work_queue* wq);
int hv_queue_work_item(
hv_work_queue* wq,
void (*callback)(void *),
void* context);
/**
* @brief Get physical address from virtual
*/
static inline unsigned long
hv_get_phys_addr(void *virt)
{
unsigned long ret;
ret = (vtophys(virt) | ((vm_offset_t) virt & PAGE_MASK));
return (ret);
}
#endif /* __HYPERV_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,995 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* HyperV vmbus (virtual machine bus) network VSC (virtual services client)
* header file
*
* (Updated from unencumbered NvspProtocol.h)
*/
#ifndef __HV_NET_VSC_H__
#define __HV_NET_VSC_H__
#include <sys/types.h>
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/sx.h>
#include <dev/hyperv/include/hyperv.h>
#define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF)
#define NVSP_PROTOCOL_VERSION_1 2
#define NVSP_PROTOCOL_VERSION_2 0x30002
#define NVSP_MIN_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_1)
#define NVSP_MAX_PROTOCOL_VERSION (NVSP_PROTOCOL_VERSION_2)
#define NVSP_PROTOCOL_VERSION_CURRENT NVSP_PROTOCOL_VERSION_2
#define NVSP_OPERATIONAL_STATUS_OK (0x00000000)
#define NVSP_OPERATIONAL_STATUS_DEGRADED (0x00000001)
#define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE (0x00000002)
#define NVSP_OPERATIONAL_STATUS_NO_CONTACT (0x00000003)
#define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004)
/*
* Maximun number of transfer pages (packets) the VSP will use on a receive
*/
#define NVSP_MAX_PACKETS_PER_RECEIVE 375
typedef enum nvsp_msg_type_ {
nvsp_msg_type_none = 0,
/*
* Init Messages
*/
nvsp_msg_type_init = 1,
nvsp_msg_type_init_complete = 2,
nvsp_version_msg_start = 100,
/*
* Version 1 Messages
*/
nvsp_msg_1_type_send_ndis_vers = nvsp_version_msg_start,
nvsp_msg_1_type_send_rx_buf,
nvsp_msg_1_type_send_rx_buf_complete,
nvsp_msg_1_type_revoke_rx_buf,
nvsp_msg_1_type_send_send_buf,
nvsp_msg_1_type_send_send_buf_complete,
nvsp_msg_1_type_revoke_send_buf,
nvsp_msg_1_type_send_rndis_pkt,
nvsp_msg_1_type_send_rndis_pkt_complete,
/*
* Version 2 Messages
*/
nvsp_msg_2_type_send_chimney_delegated_buf,
nvsp_msg_2_type_send_chimney_delegated_buf_complete,
nvsp_msg_2_type_revoke_chimney_delegated_buf,
nvsp_msg_2_type_resume_chimney_rx_indication,
nvsp_msg_2_type_terminate_chimney,
nvsp_msg_2_type_terminate_chimney_complete,
nvsp_msg_2_type_indicate_chimney_event,
nvsp_msg_2_type_send_chimney_packet,
nvsp_msg_2_type_send_chimney_packet_complete,
nvsp_msg_2_type_post_chimney_rx_request,
nvsp_msg_2_type_post_chimney_rx_request_complete,
nvsp_msg_2_type_alloc_rx_buf,
nvsp_msg_2_type_alloc_rx_buf_complete,
nvsp_msg_2_type_free_rx_buf,
nvsp_msg_2_send_vmq_rndis_pkt,
nvsp_msg_2_send_vmq_rndis_pkt_complete,
nvsp_msg_2_type_send_ndis_config,
nvsp_msg_2_type_alloc_chimney_handle,
nvsp_msg_2_type_alloc_chimney_handle_complete,
} nvsp_msg_type;
typedef enum nvsp_status_ {
nvsp_status_none = 0,
nvsp_status_success,
nvsp_status_failure,
/* Deprecated */
nvsp_status_prot_vers_range_too_new,
/* Deprecated */
nvsp_status_prot_vers_range_too_old,
nvsp_status_invalid_rndis_pkt,
nvsp_status_busy,
nvsp_status_max,
} nvsp_status;
typedef struct nvsp_msg_hdr_ {
uint32_t msg_type;
} __packed nvsp_msg_hdr;
/*
* Init Messages
*/
/*
* This message is used by the VSC to initialize the channel
* after the channels has been opened. This message should
* never include anything other then versioning (i.e. this
* message will be the same for ever).
*
* Forever is a long time. The values have been redefined
* in Win7 to indicate major and minor protocol version
* number.
*/
typedef struct nvsp_msg_init_ {
union {
struct {
uint16_t minor_protocol_version;
uint16_t major_protocol_version;
} s;
/* Formerly min_protocol_version */
uint32_t protocol_version;
} p1;
/* Formerly max_protocol_version */
uint32_t protocol_version_2;
} __packed nvsp_msg_init;
/*
* This message is used by the VSP to complete the initialization
* of the channel. This message should never include anything other
* then versioning (i.e. this message will be the same forever).
*/
typedef struct nvsp_msg_init_complete_ {
/* Deprecated */
uint32_t negotiated_prot_vers;
uint32_t max_mdl_chain_len;
uint32_t status;
} __packed nvsp_msg_init_complete;
typedef union nvsp_msg_init_uber_ {
nvsp_msg_init init;
nvsp_msg_init_complete init_compl;
} __packed nvsp_msg_init_uber;
/*
* Version 1 Messages
*/
/*
* This message is used by the VSC to send the NDIS version
* to the VSP. The VSP can use this information when handling
* OIDs sent by the VSC.
*/
typedef struct nvsp_1_msg_send_ndis_version_ {
uint32_t ndis_major_vers;
/* Deprecated */
uint32_t ndis_minor_vers;
} __packed nvsp_1_msg_send_ndis_version;
/*
* This message is used by the VSC to send a receive buffer
* to the VSP. The VSP can then use the receive buffer to
* send data to the VSC.
*/
typedef struct nvsp_1_msg_send_rx_buf_ {
uint32_t gpadl_handle;
uint16_t id;
} __packed nvsp_1_msg_send_rx_buf;
typedef struct nvsp_1_rx_buf_section_ {
uint32_t offset;
uint32_t sub_allocation_size;
uint32_t num_sub_allocations;
uint32_t end_offset;
} __packed nvsp_1_rx_buf_section;
/*
* This message is used by the VSP to acknowledge a receive
* buffer send by the VSC. This message must be sent by the
* VSP before the VSP uses the receive buffer.
*/
typedef struct nvsp_1_msg_send_rx_buf_complete_ {
uint32_t status;
uint32_t num_sections;
/*
* The receive buffer is split into two parts, a large
* suballocation section and a small suballocation
* section. These sections are then suballocated by a
* certain size.
*
* For example, the following break up of the receive
* buffer has 6 large suballocations and 10 small
* suballocations.
*
* | Large Section | | Small Section |
* ------------------------------------------------------------
* | | | | | | | | | | | | | | | | | |
* | |
* LargeOffset SmallOffset
*/
nvsp_1_rx_buf_section sections[1];
} __packed nvsp_1_msg_send_rx_buf_complete;
/*
* This message is sent by the VSC to revoke the receive buffer.
* After the VSP completes this transaction, the VSP should never
* use the receive buffer again.
*/
typedef struct nvsp_1_msg_revoke_rx_buf_ {
uint16_t id;
} __packed nvsp_1_msg_revoke_rx_buf;
/*
* This message is used by the VSC to send a send buffer
* to the VSP. The VSC can then use the send buffer to
* send data to the VSP.
*/
typedef struct nvsp_1_msg_send_send_buf_ {
uint32_t gpadl_handle;
uint16_t id;
} __packed nvsp_1_msg_send_send_buf;
/*
* This message is used by the VSP to acknowledge a send
* buffer sent by the VSC. This message must be sent by the
* VSP before the VSP uses the sent buffer.
*/
typedef struct nvsp_1_msg_send_send_buf_complete_ {
uint32_t status;
/*
* The VSC gets to choose the size of the send buffer and
* the VSP gets to choose the sections size of the buffer.
* This was done to enable dynamic reconfigurations when
* the cost of GPA-direct buffers decreases.
*/
uint32_t section_size;
} __packed nvsp_1_msg_send_send_buf_complete;
/*
* This message is sent by the VSC to revoke the send buffer.
* After the VSP completes this transaction, the vsp should never
* use the send buffer again.
*/
typedef struct nvsp_1_msg_revoke_send_buf_ {
uint16_t id;
} __packed nvsp_1_msg_revoke_send_buf;
/*
* This message is used by both the VSP and the VSC to send
* an RNDIS message to the opposite channel endpoint.
*/
typedef struct nvsp_1_msg_send_rndis_pkt_ {
/*
* This field is specified by RNIDS. They assume there's
* two different channels of communication. However,
* the Network VSP only has one. Therefore, the channel
* travels with the RNDIS packet.
*/
uint32_t chan_type;
/*
* This field is used to send part or all of the data
* through a send buffer. This values specifies an
* index into the send buffer. If the index is
* 0xFFFFFFFF, then the send buffer is not being used
* and all of the data was sent through other VMBus
* mechanisms.
*/
uint32_t send_buf_section_idx;
uint32_t send_buf_section_size;
} __packed nvsp_1_msg_send_rndis_pkt;
/*
* This message is used by both the VSP and the VSC to complete
* a RNDIS message to the opposite channel endpoint. At this
* point, the initiator of this message cannot use any resources
* associated with the original RNDIS packet.
*/
typedef struct nvsp_1_msg_send_rndis_pkt_complete_ {
uint32_t status;
} __packed nvsp_1_msg_send_rndis_pkt_complete;
/*
* Version 2 Messages
*/
/*
* This message is used by the VSC to send the NDIS version
* to the VSP. The VSP can use this information when handling
* OIDs sent by the VSC.
*/
typedef struct nvsp_2_netvsc_capabilities_ {
union {
uint64_t as_uint64;
struct {
uint64_t vmq : 1;
uint64_t chimney : 1;
uint64_t sriov : 1;
uint64_t ieee8021q : 1;
uint64_t correlationid : 1;
uint64_t teaming : 1;
} u2;
} u1;
} __packed nvsp_2_netvsc_capabilities;
typedef struct nvsp_2_msg_send_ndis_config_ {
uint32_t mtu;
uint32_t reserved;
nvsp_2_netvsc_capabilities capabilities;
} __packed nvsp_2_msg_send_ndis_config;
/*
* NvspMessage2TypeSendChimneyDelegatedBuffer
*/
typedef struct nvsp_2_msg_send_chimney_buf_
{
/*
* On WIN7 beta, delegated_obj_max_size is defined as a uint32_t
* Since WIN7 RC, it was split into two uint16_t. To have the same
* struct layout, delegated_obj_max_size shall be the first field.
*/
uint16_t delegated_obj_max_size;
/*
* The revision # of chimney protocol used between NVSC and NVSP.
*
* This revision is NOT related to the chimney revision between
* NDIS protocol and miniport drivers.
*/
uint16_t revision;
uint32_t gpadl_handle;
} __packed nvsp_2_msg_send_chimney_buf;
/* Unsupported chimney revision 0 (only present in WIN7 beta) */
#define NVSP_CHIMNEY_REVISION_0 0
/* WIN7 Beta Chimney QFE */
#define NVSP_CHIMNEY_REVISION_1 1
/* The chimney revision since WIN7 RC */
#define NVSP_CHIMNEY_REVISION_2 2
/*
* NvspMessage2TypeSendChimneyDelegatedBufferComplete
*/
typedef struct nvsp_2_msg_send_chimney_buf_complete_ {
uint32_t status;
/*
* Maximum number outstanding sends and pre-posted receives.
*
* NVSC should not post more than SendQuota/ReceiveQuota packets.
* Otherwise, it can block the non-chimney path for an indefinite
* amount of time.
* (since chimney sends/receives are affected by the remote peer).
*
* Note: NVSP enforces the quota restrictions on a per-VMBCHANNEL
* basis. It doesn't enforce the restriction separately for chimney
* send/receive. If NVSC doesn't voluntarily enforce "SendQuota",
* it may kill its own network connectivity.
*/
uint32_t send_quota;
uint32_t rx_quota;
} __packed nvsp_2_msg_send_chimney_buf_complete;
/*
* NvspMessage2TypeRevokeChimneyDelegatedBuffer
*/
typedef struct nvsp_2_msg_revoke_chimney_buf_ {
uint32_t gpadl_handle;
} __packed nvsp_2_msg_revoke_chimney_buf;
#define NVSP_CHIMNEY_OBJECT_TYPE_NEIGHBOR 0
#define NVSP_CHIMNEY_OBJECT_TYPE_PATH4 1
#define NVSP_CHIMNEY_OBJECT_TYPE_PATH6 2
#define NVSP_CHIMNEY_OBJECT_TYPE_TCP 3
/*
* NvspMessage2TypeAllocateChimneyHandle
*/
typedef struct nvsp_2_msg_alloc_chimney_handle_ {
uint64_t vsc_context;
uint32_t object_type;
} __packed nvsp_2_msg_alloc_chimney_handle;
/*
* NvspMessage2TypeAllocateChimneyHandleComplete
*/
typedef struct nvsp_2_msg_alloc_chimney_handle_complete_ {
uint32_t vsp_handle;
} __packed nvsp_2_msg_alloc_chimney_handle_complete;
/*
* NvspMessage2TypeResumeChimneyRXIndication
*/
typedef struct nvsp_2_msg_resume_chimney_rx_indication {
/*
* Handle identifying the offloaded connection
*/
uint32_t vsp_tcp_handle;
} __packed nvsp_2_msg_resume_chimney_rx_indication;
#define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_FIRST_STAGE (0x01u)
#define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_RESERVED (~(0x01u))
/*
* NvspMessage2TypeTerminateChimney
*/
typedef struct nvsp_2_msg_terminate_chimney_ {
/*
* Handle identifying the offloaded object
*/
uint32_t vsp_handle;
/*
* Terminate Offload Flags
* Bit 0:
* When set to 0, terminate the offload at the destination NIC
* Bit 1-31: Reserved, shall be zero
*/
uint32_t flags;
union {
/*
* This field is valid only when bit 0 of flags is clear.
* It specifies the index into the premapped delegated
* object buffer. The buffer was sent through the
* NvspMessage2TypeSendChimneyDelegatedBuffer
* message at initialization time.
*
* NVSP will write the delegated state into the delegated
* buffer upon upload completion.
*/
uint32_t index;
/*
* This field is valid only when bit 0 of flags is set.
*
* The seqence number of the most recently accepted RX
* indication when VSC sets its TCP context into
* "terminating" state.
*
* This allows NVSP to determines if there are any in-flight
* RX indications for which the acceptance state is still
* undefined.
*/
uint64_t last_accepted_rx_seq_no;
} f0;
} __packed nvsp_2_msg_terminate_chimney;
#define NVSP_TERMINATE_CHIMNEY_COMPLETE_FLAG_DATA_CORRUPTED 0x0000001u
/*
* NvspMessage2TypeTerminateChimneyComplete
*/
typedef struct nvsp_2_msg_terminate_chimney_complete_ {
uint64_t vsc_context;
uint32_t flags;
} __packed nvsp_2_msg_terminate_chimney_complete;
/*
* NvspMessage2TypeIndicateChimneyEvent
*/
typedef struct nvsp_2_msg_indicate_chimney_event_ {
/*
* When VscTcpContext is 0, event_type is an NDIS_STATUS event code
* Otherwise, EventType is an TCP connection event (defined in
* NdisTcpOffloadEventHandler chimney DDK document).
*/
uint32_t event_type;
/*
* When VscTcpContext is 0, EventType is an NDIS_STATUS event code
* Otherwise, EventType is an TCP connection event specific information
* (defined in NdisTcpOffloadEventHandler chimney DDK document).
*/
uint32_t event_specific_info;
/*
* If not 0, the event is per-TCP connection event. This field
* contains the VSC's TCP context.
* If 0, the event indication is global.
*/
uint64_t vsc_tcp_context;
} __packed nvsp_2_msg_indicate_chimney_event;
#define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX 0xffffu
#define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX 0xffffu
/*
* NvspMessage2TypeSendChimneyPacket
*/
typedef struct nvsp_2_msg_send_chimney_pkt_ {
/*
* Identify the TCP connection for which this chimney send is
*/
uint32_t vsp_tcp_handle;
/*
* This field is used to send part or all of the data
* through a send buffer. This values specifies an
* index into the send buffer. If the index is
* 0xFFFF, then the send buffer is not being used
* and all of the data was sent through other VMBus
* mechanisms.
*/
uint16_t send_buf_section_index;
uint16_t send_buf_section_size;
/*
* OOB Data Index
* This an index to the OOB data buffer. If the index is 0xFFFFFFFF,
* then there is no OOB data.
*
* This field shall be always 0xFFFFFFFF for now. It is reserved for
* the future.
*/
uint16_t oob_data_index;
/*
* DisconnectFlags = 0
* Normal chimney send. See MiniportTcpOffloadSend for details.
*
* DisconnectFlags = TCP_DISCONNECT_GRACEFUL_CLOSE (0x01)
* Graceful disconnect. See MiniportTcpOffloadDisconnect for details.
*
* DisconnectFlags = TCP_DISCONNECT_ABORTIVE_CLOSE (0x02)
* Abortive disconnect. See MiniportTcpOffloadDisconnect for details.
*/
uint16_t disconnect_flags;
uint32_t seq_no;
} __packed nvsp_2_msg_send_chimney_pkt;
/*
* NvspMessage2TypeSendChimneyPacketComplete
*/
typedef struct nvsp_2_msg_send_chimney_pkt_complete_ {
/*
* The NDIS_STATUS for the chimney send
*/
uint32_t status;
/*
* Number of bytes that have been sent to the peer (and ACKed by the peer).
*/
uint32_t bytes_transferred;
} __packed nvsp_2_msg_send_chimney_pkt_complete;
#define NVSP_1_CHIMNEY_RECV_FLAG_NO_PUSH 0x0001u
#define NVSP_1_CHIMNEY_RECV_INVALID_OOB_INDEX 0xffffu
/*
* NvspMessage2TypePostChimneyRecvRequest
*/
typedef struct nvsp_2_msg_post_chimney_rx_request_ {
/*
* Identify the TCP connection which this chimney receive request
* is for.
*/
uint32_t vsp_tcp_handle;
/*
* OOB Data Index
* This an index to the OOB data buffer. If the index is 0xFFFFFFFF,
* then there is no OOB data.
*
* This field shall be always 0xFFFFFFFF for now. It is reserved for
* the future.
*/
uint32_t oob_data_index;
/*
* Bit 0
* When it is set, this is a "no-push" receive.
* When it is clear, this is a "push" receive.
*
* Bit 1-15: Reserved and shall be zero
*/
uint16_t flags;
/*
* For debugging and diagnoses purpose.
* The SeqNo is per TCP connection and starts from 0.
*/
uint32_t seq_no;
} __packed nvsp_2_msg_post_chimney_rx_request;
/*
* NvspMessage2TypePostChimneyRecvRequestComplete
*/
typedef struct nvsp_2_msg_post_chimney_rx_request_complete_ {
/*
* The NDIS_STATUS for the chimney send
*/
uint32_t status;
/*
* Number of bytes that have been sent to the peer (and ACKed by
* the peer).
*/
uint32_t bytes_xferred;
} __packed nvsp_2_msg_post_chimney_rx_request_complete;
/*
* NvspMessage2TypeAllocateReceiveBuffer
*/
typedef struct nvsp_2_msg_alloc_rx_buf_ {
/*
* Allocation ID to match the allocation request and response
*/
uint32_t allocation_id;
/*
* Length of the VM shared memory receive buffer that needs to
* be allocated
*/
uint32_t length;
} __packed nvsp_2_msg_alloc_rx_buf;
/*
* NvspMessage2TypeAllocateReceiveBufferComplete
*/
typedef struct nvsp_2_msg_alloc_rx_buf_complete_ {
/*
* The NDIS_STATUS code for buffer allocation
*/
uint32_t status;
/*
* Allocation ID from NVSP_2_MESSAGE_ALLOCATE_RECEIVE_BUFFER
*/
uint32_t allocation_id;
/*
* GPADL handle for the allocated receive buffer
*/
uint32_t gpadl_handle;
/*
* Receive buffer ID that is further used in
* NvspMessage2SendVmqRndisPacket
*/
uint64_t rx_buf_id;
} __packed nvsp_2_msg_alloc_rx_buf_complete;
/*
* NvspMessage2TypeFreeReceiveBuffer
*/
typedef struct nvsp_2_msg_free_rx_buf_ {
/*
* Receive buffer ID previous returned in
* NvspMessage2TypeAllocateReceiveBufferComplete message
*/
uint64_t rx_buf_id;
} __packed nvsp_2_msg_free_rx_buf;
/*
* This structure is used in defining the buffers in
* NVSP_2_MESSAGE_SEND_VMQ_RNDIS_PACKET structure
*/
typedef struct nvsp_xfer_page_range_ {
/*
* Specifies the ID of the receive buffer that has the buffer. This
* ID can be the general receive buffer ID specified in
* NvspMessage1TypeSendReceiveBuffer or it can be the shared memory
* receive buffer ID allocated by the VSC and specified in
* NvspMessage2TypeAllocateReceiveBufferComplete message
*/
uint64_t xfer_page_set_id;
/*
* Number of bytes
*/
uint32_t byte_count;
/*
* Offset in bytes from the beginning of the buffer
*/
uint32_t byte_offset;
} __packed nvsp_xfer_page_range;
/*
* NvspMessage2SendVmqRndisPacket
*/
typedef struct nvsp_2_msg_send_vmq_rndis_pkt_ {
/*
* This field is specified by RNIDS. They assume there's
* two different channels of communication. However,
* the Network VSP only has one. Therefore, the channel
* travels with the RNDIS packet. It must be RMC_DATA
*/
uint32_t channel_type;
/*
* Only the Range element corresponding to the RNDIS header of
* the first RNDIS message in the multiple RNDIS messages sent
* in one NVSP message. Information about the data portions as well
* as the subsequent RNDIS messages in the same NVSP message are
* embedded in the RNDIS header itself
*/
nvsp_xfer_page_range range;
} __packed nvsp_2_msg_send_vmq_rndis_pkt;
/*
* This message is used by the VSC to complete
* a RNDIS VMQ message to the VSP. At this point,
* the initiator of this message can use any resources
* associated with the original RNDIS VMQ packet.
*/
typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_
{
uint32_t status;
} __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
typedef union nvsp_1_msg_uber_ {
nvsp_1_msg_send_ndis_version send_ndis_vers;
nvsp_1_msg_send_rx_buf send_rx_buf;
nvsp_1_msg_send_rx_buf_complete send_rx_buf_complete;
nvsp_1_msg_revoke_rx_buf revoke_rx_buf;
nvsp_1_msg_send_send_buf send_send_buf;
nvsp_1_msg_send_send_buf_complete send_send_buf_complete;
nvsp_1_msg_revoke_send_buf revoke_send_buf;
nvsp_1_msg_send_rndis_pkt send_rndis_pkt;
nvsp_1_msg_send_rndis_pkt_complete send_rndis_pkt_complete;
} __packed nvsp_1_msg_uber;
typedef union nvsp_2_msg_uber_ {
nvsp_2_msg_send_ndis_config send_ndis_config;
nvsp_2_msg_send_chimney_buf send_chimney_buf;
nvsp_2_msg_send_chimney_buf_complete send_chimney_buf_complete;
nvsp_2_msg_revoke_chimney_buf revoke_chimney_buf;
nvsp_2_msg_resume_chimney_rx_indication resume_chimney_rx_indication;
nvsp_2_msg_terminate_chimney terminate_chimney;
nvsp_2_msg_terminate_chimney_complete terminate_chimney_complete;
nvsp_2_msg_indicate_chimney_event indicate_chimney_event;
nvsp_2_msg_send_chimney_pkt send_chimney_packet;
nvsp_2_msg_send_chimney_pkt_complete send_chimney_packet_complete;
nvsp_2_msg_post_chimney_rx_request post_chimney_rx_request;
nvsp_2_msg_post_chimney_rx_request_complete
post_chimney_rx_request_complete;
nvsp_2_msg_alloc_rx_buf alloc_rx_buffer;
nvsp_2_msg_alloc_rx_buf_complete alloc_rx_buffer_complete;
nvsp_2_msg_free_rx_buf free_rx_buffer;
nvsp_2_msg_send_vmq_rndis_pkt send_vmq_rndis_pkt;
nvsp_2_msg_send_vmq_rndis_pkt_complete send_vmq_rndis_pkt_complete;
nvsp_2_msg_alloc_chimney_handle alloc_chimney_handle;
nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
} __packed nvsp_2_msg_uber;
typedef union nvsp_all_msgs_ {
nvsp_msg_init_uber init_msgs;
nvsp_1_msg_uber vers_1_msgs;
nvsp_2_msg_uber vers_2_msgs;
} __packed nvsp_all_msgs;
/*
* ALL Messages
*/
typedef struct nvsp_msg_ {
nvsp_msg_hdr hdr;
nvsp_all_msgs msgs;
} __packed nvsp_msg;
/*
* The following arguably belongs in a separate header file
*/
/*
* Defines
*/
#define NETVSC_SEND_BUFFER_SIZE (64*1024) /* 64K */
#define NETVSC_SEND_BUFFER_ID 0xface
#define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024) /* 1MB */
#define NETVSC_RECEIVE_BUFFER_ID 0xcafe
#define NETVSC_RECEIVE_SG_COUNT 1
/* Preallocated receive packets */
#define NETVSC_RECEIVE_PACKETLIST_COUNT 256
/*
* Maximum MTU we permit to be configured for a netvsc interface.
* When the code was developed, a max MTU of 12232 was tested and
* proven to work. 9K is a reasonable maximum for an Ethernet.
*/
#define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024)
/*
* Data types
*/
/*
* Per netvsc channel-specific
*/
typedef struct netvsc_dev_ {
struct hv_device *dev;
int num_outstanding_sends;
/* List of free preallocated NETVSC_PACKET to represent RX packet */
STAILQ_HEAD(PQ, netvsc_packet_) myrx_packet_list;
struct mtx rx_pkt_list_lock;
/* Send buffer allocated by us but manages by NetVSP */
void *send_buf;
uint32_t send_buf_size;
uint32_t send_buf_gpadl_handle;
uint32_t send_section_size;
/* Receive buffer allocated by us but managed by NetVSP */
void *rx_buf;
uint32_t rx_buf_size;
uint32_t rx_buf_gpadl_handle;
uint32_t rx_section_count;
nvsp_1_rx_buf_section *rx_sections;
/* Used for NetVSP initialization protocol */
struct sema channel_init_sema;
nvsp_msg channel_init_packet;
nvsp_msg revoke_packet;
/*uint8_t hw_mac_addr[HW_MACADDR_LEN];*/
/* Holds rndis device info */
void *extension;
hv_bool_uint8_t destroy;
/* Negotiated NVSP version */
uint32_t nvsp_version;
} netvsc_dev;
typedef void (*pfn_on_send_rx_completion)(void *);
#define NETVSC_DEVICE_RING_BUFFER_SIZE (64 * PAGE_SIZE)
#define NETVSC_PACKET_MAXPAGE 16
typedef struct xfer_page_packet_ {
/*
* This needs to be here because the network RX code casts
* an instantiation of this structure to a netvsc_packet.
*/
STAILQ_ENTRY(netvsc_packet_) mylist_entry;
uint32_t count;
} xfer_page_packet;
typedef struct netvsc_packet_ {
/*
* List used when enqueued on &net_dev->rx_packet_list,
* and when enqueued within the netvsc code
*/
STAILQ_ENTRY(netvsc_packet_) mylist_entry;
struct hv_device *device;
hv_bool_uint8_t is_data_pkt; /* One byte */
uint16_t vlan_tci;
xfer_page_packet *xfer_page_pkt;
/* Completion */
union {
struct {
uint64_t rx_completion_tid;
void *rx_completion_context;
/* This is no longer used */
pfn_on_send_rx_completion on_rx_completion;
} rx;
struct {
uint64_t send_completion_tid;
void *send_completion_context;
/* Still used in netvsc and filter code */
pfn_on_send_rx_completion on_send_completion;
} send;
} compl;
void *extension;
uint32_t tot_data_buf_len;
uint32_t page_buf_count;
hv_vmbus_page_buffer page_buffers[NETVSC_PACKET_MAXPAGE];
} netvsc_packet;
typedef struct {
uint8_t mac_addr[6]; /* Assumption unsigned long */
hv_bool_uint8_t link_state;
} netvsc_device_info;
/*
* Device-specific softc structure
*/
typedef struct hn_softc {
struct ifnet *hn_ifp;
struct arpcom arpcom;
device_t hn_dev;
uint8_t hn_unit;
int hn_carrier;
int hn_if_flags;
struct mtx hn_lock;
int hn_initdone;
struct hv_device *hn_dev_obj;
netvsc_dev *net_dev;
} hn_softc_t;
/*
* Externs
*/
extern int hv_promisc_mode;
extern void netvsc_linkstatus_callback(struct hv_device *device_obj,
uint32_t status);
extern int netvsc_recv(struct hv_device *device_obj, netvsc_packet *packet);
extern void netvsc_xmit_completion(void *context);
extern void hv_nv_on_receive_completion(void *context);
extern netvsc_dev *hv_nv_on_device_add(struct hv_device *device, void *additional_info);
extern int hv_nv_on_device_remove(struct hv_device *device,
boolean_t destroy_channel);
extern int hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt);
#endif /* __HV_NET_VSC_H__ */

View File

@ -0,0 +1,948 @@
/*-
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 2004-2006 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/sx.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/bpf.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
#include <net/if.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/if_ether.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_kern.h>
#include <vm/pmap.h>
#include <machine/bus.h>
#include <machine/resource.h>
#include <machine/frame.h>
#include <machine/vmparam.h>
#include <sys/bus.h>
#include <sys/rman.h>
#include <sys/mutex.h>
#include <sys/errno.h>
#include <sys/types.h>
#include <machine/atomic.h>
#include <machine/intr_machdep.h>
#include <dev/hyperv/include/hyperv.h>
#include "hv_net_vsc.h"
#include "hv_rndis.h"
#include "hv_rndis_filter.h"
/* Short for Hyper-V network interface */
#define NETVSC_DEVNAME "hn"
/*
* It looks like offset 0 of buf is reserved to hold the softc pointer.
* The sc pointer evidently not needed, and is not presently populated.
* The packet offset is where the netvsc_packet starts in the buffer.
*/
#define HV_NV_SC_PTR_OFFSET_IN_BUF 0
#define HV_NV_PACKET_OFFSET_IN_BUF 16
/*
* Data types
*/
struct hv_netvsc_driver_context {
uint32_t drv_inited;
};
/*
* Be aware that this sleepable mutex will exhibit WITNESS errors when
* certain TCP and ARP code paths are taken. This appears to be a
* well-known condition, as all other drivers checked use a sleeping
* mutex to protect their transmit paths.
* Also Be aware that mutexes do not play well with semaphores, and there
* is a conflicting semaphore in a certain channel code path.
*/
#define NV_LOCK_INIT(_sc, _name) \
mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF)
#define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock)
#define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED)
#define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock)
#define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock)
/*
* Globals
*/
int hv_promisc_mode = 0; /* normal mode by default */
/* The one and only one */
static struct hv_netvsc_driver_context g_netvsc_drv;
/*
* Forward declarations
*/
static void hn_stop(hn_softc_t *sc);
static void hn_ifinit_locked(hn_softc_t *sc);
static void hn_ifinit(void *xsc);
static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
static int hn_start_locked(struct ifnet *ifp);
static void hn_start(struct ifnet *ifp);
/*
* NetVsc driver initialization
* Note: Filter init is no longer required
*/
static int
netvsc_drv_init(void)
{
return (0);
}
/*
* NetVsc global initialization entry point
*/
static void
netvsc_init(void)
{
printf("Netvsc initializing... ");
/*
* XXXKYS: cleanup initialization
*/
if (!cold && !g_netvsc_drv.drv_inited) {
g_netvsc_drv.drv_inited = 1;
netvsc_drv_init();
} else {
printf("Already initialized!\n");
}
}
/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
static const hv_guid g_net_vsc_device_type = {
.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
};
/*
* Standard probe entry point.
*
*/
static int
netvsc_probe(device_t dev)
{
const char *p;
p = vmbus_get_type(dev);
if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) {
device_set_desc(dev, "Synthetic Network Interface");
printf("Netvsc probe... DONE \n");
return (0);
}
return (ENXIO);
}
/*
* Standard attach entry point.
*
* Called when the driver is loaded. It allocates needed resources,
* and initializes the "hardware" and software.
*/
static int
netvsc_attach(device_t dev)
{
struct hv_device *device_ctx = vmbus_get_devctx(dev);
netvsc_device_info device_info;
hn_softc_t *sc;
int unit = device_get_unit(dev);
struct ifnet *ifp;
int ret;
netvsc_init();
sc = device_get_softc(dev);
if (sc == NULL) {
return (ENOMEM);
}
bzero(sc, sizeof(hn_softc_t));
sc->hn_unit = unit;
sc->hn_dev = dev;
NV_LOCK_INIT(sc, "NetVSCLock");
sc->hn_dev_obj = device_ctx;
ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
ifp->if_softc = sc;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_dunit = unit;
ifp->if_dname = NETVSC_DEVNAME;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = hn_ioctl;
ifp->if_start = hn_start;
ifp->if_init = hn_ifinit;
/* needed by hv_rf_on_device_add() code */
ifp->if_mtu = ETHERMTU;
IFQ_SET_MAXLEN(&ifp->if_snd, 512);
ifp->if_snd.ifq_drv_maxlen = 511;
IFQ_SET_READY(&ifp->if_snd);
/*
* Tell upper layers that we support full VLAN capability.
*/
ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
ret = hv_rf_on_device_add(device_ctx, &device_info);
if (ret != 0) {
if_free(ifp);
return (ret);
}
if (device_info.link_state == 0) {
sc->hn_carrier = 1;
}
ether_ifattach(ifp, device_info.mac_addr);
return (0);
}
/*
* Standard detach entry point
*/
static int
netvsc_detach(device_t dev)
{
struct hv_device *hv_device = vmbus_get_devctx(dev);
printf("netvsc_detach\n");
/*
* XXXKYS: Need to clean up all our
* driver state; this is the driver
* unloading.
*/
/*
* XXXKYS: Need to stop outgoing traffic and unregister
* the netdevice.
*/
hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
return (0);
}
/*
* Standard shutdown entry point
*/
static int
netvsc_shutdown(device_t dev)
{
return (0);
}
/*
* Send completion processing
*
* Note: It looks like offset 0 of buf is reserved to hold the softc
* pointer. The sc pointer is not currently needed in this function, and
* it is not presently populated by the TX function.
*/
void
netvsc_xmit_completion(void *context)
{
netvsc_packet *packet = (netvsc_packet *)context;
struct mbuf *mb;
uint8_t *buf;
mb = (struct mbuf *)packet->compl.send.send_completion_tid;
buf = ((uint8_t *)packet) - HV_NV_PACKET_OFFSET_IN_BUF;
free(buf, M_DEVBUF);
if (mb != NULL) {
m_freem(mb);
}
}
/*
* Start a transmit of one or more packets
*/
static int
hn_start_locked(struct ifnet *ifp)
{
hn_softc_t *sc = ifp->if_softc;
struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
uint8_t *buf;
netvsc_packet *packet;
struct mbuf *m_head, *m;
struct mbuf *mc_head = NULL;
int i;
int num_frags;
int len;
int xlen;
int rppi_size;
int retries = 0;
int ret = 0;
while (!IFQ_DRV_IS_EMPTY(&sc->hn_ifp->if_snd)) {
IFQ_DRV_DEQUEUE(&sc->hn_ifp->if_snd, m_head);
if (m_head == NULL) {
break;
}
len = 0;
num_frags = 0;
xlen = 0;
/* Walk the mbuf list computing total length and num frags */
for (m = m_head; m != NULL; m = m->m_next) {
if (m->m_len != 0) {
num_frags++;
len += m->m_len;
}
}
/*
* Reserve the number of pages requested. Currently,
* one page is reserved for the message in the RNDIS
* filter packet
*/
num_frags += HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
/* If exceeds # page_buffers in netvsc_packet */
if (num_frags > NETVSC_PACKET_MAXPAGE) {
m_freem(m);
return (EINVAL);
}
rppi_size = 0;
if (m_head->m_flags & M_VLANTAG) {
rppi_size = sizeof(rndis_per_packet_info) +
sizeof(ndis_8021q_info);
}
/*
* Allocate a buffer with space for a netvsc packet plus a
* number of reserved areas. First comes a (currently 16
* bytes, currently unused) reserved data area. Second is
* the netvsc_packet, which includes (currently 4) page
* buffers. Third (optional) is a rndis_per_packet_info
* struct, but only if a VLAN tag should be inserted into the
* Ethernet frame by the Hyper-V infrastructure. Fourth is
* an area reserved for an rndis_filter_packet struct.
* Changed malloc to M_NOWAIT to avoid sleep under spin lock.
* No longer reserving extra space for page buffers, as they
* are already part of the netvsc_packet.
*/
buf = malloc(HV_NV_PACKET_OFFSET_IN_BUF +
sizeof(netvsc_packet) + rppi_size +
sizeof(rndis_filter_packet),
M_DEVBUF, M_ZERO | M_NOWAIT);
if (buf == NULL) {
m_freem(m);
return (ENOMEM);
}
packet = (netvsc_packet *)(buf + HV_NV_PACKET_OFFSET_IN_BUF);
*(vm_offset_t *)buf = HV_NV_SC_PTR_OFFSET_IN_BUF;
/*
* extension points to the area reserved for the
* rndis_filter_packet, which is placed just after
* the netvsc_packet (and rppi struct, if present;
* length is updated later).
*/
packet->extension = packet + 1;
/* Set up the rndis header */
packet->page_buf_count = num_frags;
/* Initialize it from the mbuf */
packet->tot_data_buf_len = len;
/*
* If the Hyper-V infrastructure needs to embed a VLAN tag,
* initialize netvsc_packet and rppi struct values as needed.
*/
if (rppi_size) {
/* Lower layers need the VLAN TCI */
packet->vlan_tci = m_head->m_pkthdr.ether_vtag;
}
/*
* Fill the page buffers with mbuf info starting at index
* HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
*/
i = HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
for (m = m_head; m != NULL; m = m->m_next) {
if (m->m_len) {
vm_offset_t paddr =
vtophys(mtod(m, vm_offset_t));
packet->page_buffers[i].pfn =
paddr >> PAGE_SHIFT;
packet->page_buffers[i].offset =
paddr & (PAGE_SIZE - 1);
packet->page_buffers[i].length = m->m_len;
i++;
}
}
/*
* If bpf, copy the mbuf chain. This is less expensive than
* it appears; the mbuf clusters are not copied, only their
* reference counts are incremented.
* Needed to avoid a race condition where the completion
* callback is invoked, freeing the mbuf chain, before the
* bpf_mtap code has a chance to run.
*/
if (ifp->if_bpf) {
mc_head = m_copypacket(m_head, M_DONTWAIT);
}
retry_send:
/* Set the completion routine */
packet->compl.send.on_send_completion = netvsc_xmit_completion;
packet->compl.send.send_completion_context = packet;
packet->compl.send.send_completion_tid = (uint64_t)m_head;
/* Removed critical_enter(), does not appear necessary */
ret = hv_rf_on_send(device_ctx, packet);
if (ret == 0) {
ifp->if_opackets++;
/* if bpf && mc_head, call bpf_mtap code */
if (mc_head) {
ETHER_BPF_MTAP(ifp, mc_head);
}
} else {
retries++;
if (retries < 4) {
goto retry_send;
}
IF_PREPEND(&ifp->if_snd, m_head);
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
/*
* Null the mbuf pointer so the completion function
* does not free the mbuf chain. We just pushed the
* mbuf chain back on the if_snd queue.
*/
packet->compl.send.send_completion_tid = 0;
/*
* Release the resources since we will not get any
* send completion
*/
netvsc_xmit_completion(packet);
}
/* if bpf && mc_head, free the mbuf chain copy */
if (mc_head) {
m_freem(mc_head);
}
}
return (ret);
}
/*
* Link up/down notification
*/
void
netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status)
{
hn_softc_t *sc = device_get_softc(device_obj->device);
if (sc == NULL) {
return;
}
if (status == 1) {
sc->hn_carrier = 1;
} else {
sc->hn_carrier = 0;
}
}
/*
* Append the specified data to the indicated mbuf chain,
* Extend the mbuf chain if the new data does not fit in
* existing space.
*
* This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
* There should be an equivalent in the kernel mbuf code,
* but there does not appear to be one yet.
*
* Differs from m_append() in that additional mbufs are
* allocated with cluster size MJUMPAGESIZE, and filled
* accordingly.
*
* Return 1 if able to complete the job; otherwise 0.
*/
static int
hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
{
struct mbuf *m, *n;
int remainder, space;
for (m = m0; m->m_next != NULL; m = m->m_next)
;
remainder = len;
space = M_TRAILINGSPACE(m);
if (space > 0) {
/*
* Copy into available space.
*/
if (space > remainder)
space = remainder;
bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
m->m_len += space;
cp += space;
remainder -= space;
}
while (remainder > 0) {
/*
* Allocate a new mbuf; could check space
* and allocate a cluster instead.
*/
n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
if (n == NULL)
break;
n->m_len = min(MJUMPAGESIZE, remainder);
bcopy(cp, mtod(n, caddr_t), n->m_len);
cp += n->m_len;
remainder -= n->m_len;
m->m_next = n;
m = n;
}
if (m0->m_flags & M_PKTHDR)
m0->m_pkthdr.len += len - remainder;
return (remainder == 0);
}
/*
* Called when we receive a data packet from the "wire" on the
* specified device
*
* Note: This is no longer used as a callback
*/
int
netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet)
{
hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device);
struct mbuf *m_new;
struct ifnet *ifp = sc->hn_ifp;
int size;
int i;
if (sc == NULL) {
return (0); /* TODO: KYS how can this be! */
}
ifp = sc->arpcom.ac_ifp;
if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
return (0);
}
/*
* Bail out if packet contains more data than configured MTU.
*/
if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) {
return (0);
}
/*
* Get an mbuf with a cluster. For packets 2K or less,
* get a standard 2K cluster. For anything larger, get a
* 4K cluster. Any buffers larger than 4K can cause problems
* if looped around to the Hyper-V TX channel, so avoid them.
*/
size = MCLBYTES;
if (packet->tot_data_buf_len > MCLBYTES) {
/* 4096 */
size = MJUMPAGESIZE;
}
m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size);
if (m_new == NULL)
return (0);
/*
* Remove trailing junk from RX data buffer.
* Fixme: This will not work for multiple Hyper-V RX buffers.
* Fortunately, the channel gathers all RX data into one buffer.
*
* L2 frame length, with L2 header, not including CRC
*/
packet->page_buffers[0].length = packet->tot_data_buf_len;
/*
* Copy the received packet to one or more mbufs.
* The copy is required since the memory pointed to by netvsc_packet
* cannot be deallocated
*/
for (i=0; i < packet->page_buf_count; i++) {
/* Shift virtual page number to form virtual page address */
uint8_t *vaddr = (uint8_t *)
(packet->page_buffers[i].pfn << PAGE_SHIFT);
hv_m_append(m_new, packet->page_buffers[i].length,
vaddr + packet->page_buffers[i].offset);
}
m_new->m_pkthdr.rcvif = ifp;
if ((packet->vlan_tci != 0) &&
(ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
m_new->m_pkthdr.ether_vtag = packet->vlan_tci;
m_new->m_flags |= M_VLANTAG;
}
/*
* Note: Moved RX completion back to hv_nv_on_receive() so all
* messages (not just data messages) will trigger a response.
*/
ifp->if_ipackets++;
/* We're not holding the lock here, so don't release it */
(*ifp->if_input)(ifp, m_new);
return (0);
}
/*
* Standard ioctl entry point. Called when the user wants to configure
* the interface.
*/
static int
hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
hn_softc_t *sc = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
netvsc_device_info device_info;
struct hv_device *hn_dev;
int mask, error = 0;
switch(cmd) {
case SIOCSIFADDR:
case SIOCGIFADDR:
error = ether_ioctl(ifp, cmd, data);
break;
case SIOCSIFMTU:
hn_dev = vmbus_get_devctx(sc->hn_dev);
NV_LOCK(sc);
if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) {
error = EINVAL;
NV_UNLOCK(sc);
break;
}
/* Obtain and record requested MTU */
ifp->if_mtu = ifr->ifr_mtu;
/*
* We must remove and add back the device to cause the new
* MTU to take effect. This includes tearing down, but not
* deleting the channel, then bringing it back up.
*/
error = hv_rf_on_device_remove(hn_dev, HV_RF_NV_RETAIN_CHANNEL);
if (error) {
NV_UNLOCK(sc);
break;
}
error = hv_rf_on_device_add(hn_dev, &device_info);
if (error) {
NV_UNLOCK(sc);
break;
}
hn_ifinit_locked(sc);
NV_UNLOCK(sc);
break;
case SIOCSIFFLAGS:
NV_LOCK(sc);
if (ifp->if_flags & IFF_UP) {
/*
* If only the state of the PROMISC flag changed,
* then just use the 'set promisc mode' command
* instead of reinitializing the entire NIC. Doing
* a full re-init means reloading the firmware and
* waiting for it to start up, which may take a
* second or two.
*/
#ifdef notyet
/* Fixme: Promiscuous mode? */
/* No promiscuous mode with Xen */
if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
ifp->if_flags & IFF_PROMISC &&
!(sc->hn_if_flags & IFF_PROMISC)) {
/* do something here for Hyper-V */
;
/* XN_SETBIT(sc, XN_RX_MODE, */
/* XN_RXMODE_RX_PROMISC); */
} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
!(ifp->if_flags & IFF_PROMISC) &&
sc->hn_if_flags & IFF_PROMISC) {
/* do something here for Hyper-V */
;
/* XN_CLRBIT(sc, XN_RX_MODE, */
/* XN_RXMODE_RX_PROMISC); */
} else
#endif
hn_ifinit_locked(sc);
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
hn_stop(sc);
}
}
sc->hn_if_flags = ifp->if_flags;
NV_UNLOCK(sc);
error = 0;
break;
case SIOCSIFCAP:
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_HWCSUM) {
if (IFCAP_HWCSUM & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_HWCSUM;
} else {
ifp->if_capenable |= IFCAP_HWCSUM;
}
}
error = 0;
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
#ifdef notyet
/* Fixme: Multicast mode? */
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
NV_LOCK(sc);
netvsc_setmulti(sc);
NV_UNLOCK(sc);
error = 0;
}
#endif
/* FALLTHROUGH */
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
error = EINVAL;
break;
default:
error = ether_ioctl(ifp, cmd, data);
break;
}
return (error);
}
/*
*
*/
static void
hn_stop(hn_softc_t *sc)
{
struct ifnet *ifp;
int ret;
struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
NV_LOCK_ASSERT(sc);
ifp = sc->hn_ifp;
printf(" Closing Device ...\n");
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
sc->hn_initdone = 0;
ret = hv_rf_on_close(device_ctx);
}
/*
* FreeBSD transmit entry point
*/
static void
hn_start(struct ifnet *ifp)
{
hn_softc_t *sc;
sc = ifp->if_softc;
NV_LOCK(sc);
hn_start_locked(ifp);
NV_UNLOCK(sc);
}
/*
*
*/
static void
hn_ifinit_locked(hn_softc_t *sc)
{
struct ifnet *ifp;
struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
int ret;
NV_LOCK_ASSERT(sc);
ifp = sc->hn_ifp;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
return;
}
hv_promisc_mode = 1;
ret = hv_rf_on_open(device_ctx);
if (ret != 0) {
return;
} else {
sc->hn_initdone = 1;
}
ifp->if_drv_flags |= IFF_DRV_RUNNING;
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
}
/*
*
*/
static void
hn_ifinit(void *xsc)
{
hn_softc_t *sc = xsc;
NV_LOCK(sc);
hn_ifinit_locked(sc);
NV_UNLOCK(sc);
}
#ifdef LATER
/*
*
*/
static void
hn_watchdog(struct ifnet *ifp)
{
hn_softc_t *sc;
sc = ifp->if_softc;
printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit);
hn_ifinit(sc); /*???*/
ifp->if_oerrors++;
}
#endif
static device_method_t netvsc_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, netvsc_probe),
DEVMETHOD(device_attach, netvsc_attach),
DEVMETHOD(device_detach, netvsc_detach),
DEVMETHOD(device_shutdown, netvsc_shutdown),
{ 0, 0 }
};
static driver_t netvsc_driver = {
NETVSC_DEVNAME,
netvsc_methods,
sizeof(hn_softc_t)
};
static devclass_t netvsc_devclass;
DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0);
MODULE_VERSION(hn, 1);
MODULE_DEPEND(hn, vmbus, 1, 1, 1);
SYSINIT(netvsc_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1, netvsc_init,
NULL);

View File

@ -0,0 +1,911 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __HV_RNDIS_H__
#define __HV_RNDIS_H__
/*
* NDIS protocol version numbers
*/
#define NDIS_VERSION_5_0 0x00050000
#define NDIS_VERSION_5_1 0x00050001
#define NDIS_VERSION_6_0 0x00060000
#define NDIS_VERSION (NDIS_VERSION_5_1)
/*
* Status codes
*/
#define STATUS_SUCCESS (0x00000000L)
#define STATUS_UNSUCCESSFUL (0xC0000001L)
#define STATUS_PENDING (0x00000103L)
#define STATUS_INSUFFICIENT_RESOURCES (0xC000009AL)
#define STATUS_BUFFER_OVERFLOW (0x80000005L)
#define STATUS_NOT_SUPPORTED (0xC00000BBL)
#define RNDIS_STATUS_SUCCESS (STATUS_SUCCESS)
#define RNDIS_STATUS_PENDING (STATUS_PENDING)
#define RNDIS_STATUS_NOT_RECOGNIZED (0x00010001L)
#define RNDIS_STATUS_NOT_COPIED (0x00010002L)
#define RNDIS_STATUS_NOT_ACCEPTED (0x00010003L)
#define RNDIS_STATUS_CALL_ACTIVE (0x00010007L)
#define RNDIS_STATUS_ONLINE (0x40010003L)
#define RNDIS_STATUS_RESET_START (0x40010004L)
#define RNDIS_STATUS_RESET_END (0x40010005L)
#define RNDIS_STATUS_RING_STATUS (0x40010006L)
#define RNDIS_STATUS_CLOSED (0x40010007L)
#define RNDIS_STATUS_WAN_LINE_UP (0x40010008L)
#define RNDIS_STATUS_WAN_LINE_DOWN (0x40010009L)
#define RNDIS_STATUS_WAN_FRAGMENT (0x4001000AL)
#define RNDIS_STATUS_MEDIA_CONNECT (0x4001000BL)
#define RNDIS_STATUS_MEDIA_DISCONNECT (0x4001000CL)
#define RNDIS_STATUS_HARDWARE_LINE_UP (0x4001000DL)
#define RNDIS_STATUS_HARDWARE_LINE_DOWN (0x4001000EL)
#define RNDIS_STATUS_INTERFACE_UP (0x4001000FL)
#define RNDIS_STATUS_INTERFACE_DOWN (0x40010010L)
#define RNDIS_STATUS_MEDIA_BUSY (0x40010011L)
#define RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION (0x40010012L)
#define RNDIS_STATUS_WW_INDICATION RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION
#define RNDIS_STATUS_LINK_SPEED_CHANGE (0x40010013L)
#define RNDIS_STATUS_NOT_RESETTABLE (0x80010001L)
#define RNDIS_STATUS_SOFT_ERRORS (0x80010003L)
#define RNDIS_STATUS_HARD_ERRORS (0x80010004L)
#define RNDIS_STATUS_BUFFER_OVERFLOW (STATUS_BUFFER_OVERFLOW)
#define RNDIS_STATUS_FAILURE (STATUS_UNSUCCESSFUL)
#define RNDIS_STATUS_RESOURCES (STATUS_INSUFFICIENT_RESOURCES)
#define RNDIS_STATUS_CLOSING (0xC0010002L)
#define RNDIS_STATUS_BAD_VERSION (0xC0010004L)
#define RNDIS_STATUS_BAD_CHARACTERISTICS (0xC0010005L)
#define RNDIS_STATUS_ADAPTER_NOT_FOUND (0xC0010006L)
#define RNDIS_STATUS_OPEN_FAILED (0xC0010007L)
#define RNDIS_STATUS_DEVICE_FAILED (0xC0010008L)
#define RNDIS_STATUS_MULTICAST_FULL (0xC0010009L)
#define RNDIS_STATUS_MULTICAST_EXISTS (0xC001000AL)
#define RNDIS_STATUS_MULTICAST_NOT_FOUND (0xC001000BL)
#define RNDIS_STATUS_REQUEST_ABORTED (0xC001000CL)
#define RNDIS_STATUS_RESET_IN_PROGRESS (0xC001000DL)
#define RNDIS_STATUS_CLOSING_INDICATING (0xC001000EL)
#define RNDIS_STATUS_NOT_SUPPORTED (STATUS_NOT_SUPPORTED)
#define RNDIS_STATUS_INVALID_PACKET (0xC001000FL)
#define RNDIS_STATUS_OPEN_LIST_FULL (0xC0010010L)
#define RNDIS_STATUS_ADAPTER_NOT_READY (0xC0010011L)
#define RNDIS_STATUS_ADAPTER_NOT_OPEN (0xC0010012L)
#define RNDIS_STATUS_NOT_INDICATING (0xC0010013L)
#define RNDIS_STATUS_INVALID_LENGTH (0xC0010014L)
#define RNDIS_STATUS_INVALID_DATA (0xC0010015L)
#define RNDIS_STATUS_BUFFER_TOO_SHORT (0xC0010016L)
#define RNDIS_STATUS_INVALID_OID (0xC0010017L)
#define RNDIS_STATUS_ADAPTER_REMOVED (0xC0010018L)
#define RNDIS_STATUS_UNSUPPORTED_MEDIA (0xC0010019L)
#define RNDIS_STATUS_GROUP_ADDRESS_IN_USE (0xC001001AL)
#define RNDIS_STATUS_FILE_NOT_FOUND (0xC001001BL)
#define RNDIS_STATUS_ERROR_READING_FILE (0xC001001CL)
#define RNDIS_STATUS_ALREADY_MAPPED (0xC001001DL)
#define RNDIS_STATUS_RESOURCE_CONFLICT (0xC001001EL)
#define RNDIS_STATUS_NO_CABLE (0xC001001FL)
#define RNDIS_STATUS_INVALID_SAP (0xC0010020L)
#define RNDIS_STATUS_SAP_IN_USE (0xC0010021L)
#define RNDIS_STATUS_INVALID_ADDRESS (0xC0010022L)
#define RNDIS_STATUS_VC_NOT_ACTIVATED (0xC0010023L)
#define RNDIS_STATUS_DEST_OUT_OF_ORDER (0xC0010024L)
#define RNDIS_STATUS_VC_NOT_AVAILABLE (0xC0010025L)
#define RNDIS_STATUS_CELLRATE_NOT_AVAILABLE (0xC0010026L)
#define RNDIS_STATUS_INCOMPATABLE_QOS (0xC0010027L)
#define RNDIS_STATUS_AAL_PARAMS_UNSUPPORTED (0xC0010028L)
#define RNDIS_STATUS_NO_ROUTE_TO_DESTINATION (0xC0010029L)
#define RNDIS_STATUS_TOKEN_RING_OPEN_ERROR (0xC0011000L)
/*
* Object Identifiers used by NdisRequest Query/Set Information
*/
/*
* General Objects
*/
#define RNDIS_OID_GEN_SUPPORTED_LIST 0x00010101
#define RNDIS_OID_GEN_HARDWARE_STATUS 0x00010102
#define RNDIS_OID_GEN_MEDIA_SUPPORTED 0x00010103
#define RNDIS_OID_GEN_MEDIA_IN_USE 0x00010104
#define RNDIS_OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105
#define RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106
#define RNDIS_OID_GEN_LINK_SPEED 0x00010107
#define RNDIS_OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108
#define RNDIS_OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109
#define RNDIS_OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A
#define RNDIS_OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B
#define RNDIS_OID_GEN_VENDOR_ID 0x0001010C
#define RNDIS_OID_GEN_VENDOR_DESCRIPTION 0x0001010D
#define RNDIS_OID_GEN_CURRENT_PACKET_FILTER 0x0001010E
#define RNDIS_OID_GEN_CURRENT_LOOKAHEAD 0x0001010F
#define RNDIS_OID_GEN_DRIVER_VERSION 0x00010110
#define RNDIS_OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111
#define RNDIS_OID_GEN_PROTOCOL_OPTIONS 0x00010112
#define RNDIS_OID_GEN_MAC_OPTIONS 0x00010113
#define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS 0x00010114
#define RNDIS_OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115
#define RNDIS_OID_GEN_VENDOR_DRIVER_VERSION 0x00010116
#define RNDIS_OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118
#define RNDIS_OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119
#define RNDIS_OID_GEN_MACHINE_NAME 0x0001021A
#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
#define RNDIS_OID_GEN_XMIT_OK 0x00020101
#define RNDIS_OID_GEN_RCV_OK 0x00020102
#define RNDIS_OID_GEN_XMIT_ERROR 0x00020103
#define RNDIS_OID_GEN_RCV_ERROR 0x00020104
#define RNDIS_OID_GEN_RCV_NO_BUFFER 0x00020105
#define RNDIS_OID_GEN_DIRECTED_BYTES_XMIT 0x00020201
#define RNDIS_OID_GEN_DIRECTED_FRAMES_XMIT 0x00020202
#define RNDIS_OID_GEN_MULTICAST_BYTES_XMIT 0x00020203
#define RNDIS_OID_GEN_MULTICAST_FRAMES_XMIT 0x00020204
#define RNDIS_OID_GEN_BROADCAST_BYTES_XMIT 0x00020205
#define RNDIS_OID_GEN_BROADCAST_FRAMES_XMIT 0x00020206
#define RNDIS_OID_GEN_DIRECTED_BYTES_RCV 0x00020207
#define RNDIS_OID_GEN_DIRECTED_FRAMES_RCV 0x00020208
#define RNDIS_OID_GEN_MULTICAST_BYTES_RCV 0x00020209
#define RNDIS_OID_GEN_MULTICAST_FRAMES_RCV 0x0002020A
#define RNDIS_OID_GEN_BROADCAST_BYTES_RCV 0x0002020B
#define RNDIS_OID_GEN_BROADCAST_FRAMES_RCV 0x0002020C
#define RNDIS_OID_GEN_RCV_CRC_ERROR 0x0002020D
#define RNDIS_OID_GEN_TRANSMIT_QUEUE_LENGTH 0x0002020E
#define RNDIS_OID_GEN_GET_TIME_CAPS 0x0002020F
#define RNDIS_OID_GEN_GET_NETCARD_TIME 0x00020210
/*
* These are connection-oriented general OIDs.
* These replace the above OIDs for connection-oriented media.
*/
#define RNDIS_OID_GEN_CO_SUPPORTED_LIST 0x00010101
#define RNDIS_OID_GEN_CO_HARDWARE_STATUS 0x00010102
#define RNDIS_OID_GEN_CO_MEDIA_SUPPORTED 0x00010103
#define RNDIS_OID_GEN_CO_MEDIA_IN_USE 0x00010104
#define RNDIS_OID_GEN_CO_LINK_SPEED 0x00010105
#define RNDIS_OID_GEN_CO_VENDOR_ID 0x00010106
#define RNDIS_OID_GEN_CO_VENDOR_DESCRIPTION 0x00010107
#define RNDIS_OID_GEN_CO_DRIVER_VERSION 0x00010108
#define RNDIS_OID_GEN_CO_PROTOCOL_OPTIONS 0x00010109
#define RNDIS_OID_GEN_CO_MAC_OPTIONS 0x0001010A
#define RNDIS_OID_GEN_CO_MEDIA_CONNECT_STATUS 0x0001010B
#define RNDIS_OID_GEN_CO_VENDOR_DRIVER_VERSION 0x0001010C
#define RNDIS_OID_GEN_CO_MINIMUM_LINK_SPEED 0x0001010D
#define RNDIS_OID_GEN_CO_GET_TIME_CAPS 0x00010201
#define RNDIS_OID_GEN_CO_GET_NETCARD_TIME 0x00010202
/*
* These are connection-oriented statistics OIDs.
*/
#define RNDIS_OID_GEN_CO_XMIT_PDUS_OK 0x00020101
#define RNDIS_OID_GEN_CO_RCV_PDUS_OK 0x00020102
#define RNDIS_OID_GEN_CO_XMIT_PDUS_ERROR 0x00020103
#define RNDIS_OID_GEN_CO_RCV_PDUS_ERROR 0x00020104
#define RNDIS_OID_GEN_CO_RCV_PDUS_NO_BUFFER 0x00020105
#define RNDIS_OID_GEN_CO_RCV_CRC_ERROR 0x00020201
#define RNDIS_OID_GEN_CO_TRANSMIT_QUEUE_LENGTH 0x00020202
#define RNDIS_OID_GEN_CO_BYTES_XMIT 0x00020203
#define RNDIS_OID_GEN_CO_BYTES_RCV 0x00020204
#define RNDIS_OID_GEN_CO_BYTES_XMIT_OUTSTANDING 0x00020205
#define RNDIS_OID_GEN_CO_NETCARD_LOAD 0x00020206
/*
* These are objects for Connection-oriented media call-managers.
*/
#define RNDIS_OID_CO_ADD_PVC 0xFF000001
#define RNDIS_OID_CO_DELETE_PVC 0xFF000002
#define RNDIS_OID_CO_GET_CALL_INFORMATION 0xFF000003
#define RNDIS_OID_CO_ADD_ADDRESS 0xFF000004
#define RNDIS_OID_CO_DELETE_ADDRESS 0xFF000005
#define RNDIS_OID_CO_GET_ADDRESSES 0xFF000006
#define RNDIS_OID_CO_ADDRESS_CHANGE 0xFF000007
#define RNDIS_OID_CO_SIGNALING_ENABLED 0xFF000008
#define RNDIS_OID_CO_SIGNALING_DISABLED 0xFF000009
/*
* 802.3 Objects (Ethernet)
*/
#define RNDIS_OID_802_3_PERMANENT_ADDRESS 0x01010101
#define RNDIS_OID_802_3_CURRENT_ADDRESS 0x01010102
#define RNDIS_OID_802_3_MULTICAST_LIST 0x01010103
#define RNDIS_OID_802_3_MAXIMUM_LIST_SIZE 0x01010104
#define RNDIS_OID_802_3_MAC_OPTIONS 0x01010105
/*
*
*/
#define NDIS_802_3_MAC_OPTION_PRIORITY 0x00000001
#define RNDIS_OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101
#define RNDIS_OID_802_3_XMIT_ONE_COLLISION 0x01020102
#define RNDIS_OID_802_3_XMIT_MORE_COLLISIONS 0x01020103
#define RNDIS_OID_802_3_XMIT_DEFERRED 0x01020201
#define RNDIS_OID_802_3_XMIT_MAX_COLLISIONS 0x01020202
#define RNDIS_OID_802_3_RCV_OVERRUN 0x01020203
#define RNDIS_OID_802_3_XMIT_UNDERRUN 0x01020204
#define RNDIS_OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205
#define RNDIS_OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206
#define RNDIS_OID_802_3_XMIT_LATE_COLLISIONS 0x01020207
/*
* RNDIS MP custom OID for test
*/
#define OID_RNDISMP_GET_RECEIVE_BUFFERS 0xFFA0C90D // Query only
/*
* Remote NDIS message types
*/
#define REMOTE_NDIS_PACKET_MSG 0x00000001
#define REMOTE_NDIS_INITIALIZE_MSG 0x00000002
#define REMOTE_NDIS_HALT_MSG 0x00000003
#define REMOTE_NDIS_QUERY_MSG 0x00000004
#define REMOTE_NDIS_SET_MSG 0x00000005
#define REMOTE_NDIS_RESET_MSG 0x00000006
#define REMOTE_NDIS_INDICATE_STATUS_MSG 0x00000007
#define REMOTE_NDIS_KEEPALIVE_MSG 0x00000008
#define REMOTE_CONDIS_MP_CREATE_VC_MSG 0x00008001
#define REMOTE_CONDIS_MP_DELETE_VC_MSG 0x00008002
#define REMOTE_CONDIS_MP_ACTIVATE_VC_MSG 0x00008005
#define REMOTE_CONDIS_MP_DEACTIVATE_VC_MSG 0x00008006
#define REMOTE_CONDIS_INDICATE_STATUS_MSG 0x00008007
/*
* Remote NDIS message completion types
*/
#define REMOTE_NDIS_INITIALIZE_CMPLT 0x80000002
#define REMOTE_NDIS_QUERY_CMPLT 0x80000004
#define REMOTE_NDIS_SET_CMPLT 0x80000005
#define REMOTE_NDIS_RESET_CMPLT 0x80000006
#define REMOTE_NDIS_KEEPALIVE_CMPLT 0x80000008
#define REMOTE_CONDIS_MP_CREATE_VC_CMPLT 0x80008001
#define REMOTE_CONDIS_MP_DELETE_VC_CMPLT 0x80008002
#define REMOTE_CONDIS_MP_ACTIVATE_VC_CMPLT 0x80008005
#define REMOTE_CONDIS_MP_DEACTIVATE_VC_CMPLT 0x80008006
/*
* Reserved message type for private communication between lower-layer
* host driver and remote device, if necessary.
*/
#define REMOTE_NDIS_BUS_MSG 0xff000001
/*
* Defines for DeviceFlags in rndis_initialize_complete
*/
#define RNDIS_DF_CONNECTIONLESS 0x00000001
#define RNDIS_DF_CONNECTION_ORIENTED 0x00000002
#define RNDIS_DF_RAW_DATA 0x00000004
/*
* Remote NDIS medium types.
*/
#define RNDIS_MEDIUM_802_3 0x00000000
#define RNDIS_MEDIUM_802_5 0x00000001
#define RNDIS_MEDIUM_FDDI 0x00000002
#define RNDIS_MEDIUM_WAN 0x00000003
#define RNDIS_MEDIUM_LOCAL_TALK 0x00000004
#define RNDIS_MEDIUM_ARCNET_RAW 0x00000006
#define RNDIS_MEDIUM_ARCNET_878_2 0x00000007
#define RNDIS_MEDIUM_ATM 0x00000008
#define RNDIS_MEDIUM_WIRELESS_WAN 0x00000009
#define RNDIS_MEDIUM_IRDA 0x0000000a
#define RNDIS_MEDIUM_CO_WAN 0x0000000b
/* Not a real medium, defined as an upper bound */
#define RNDIS_MEDIUM_MAX 0x0000000d
/*
* Remote NDIS medium connection states.
*/
#define RNDIS_MEDIA_STATE_CONNECTED 0x00000000
#define RNDIS_MEDIA_STATE_DISCONNECTED 0x00000001
/*
* Remote NDIS version numbers
*/
#define RNDIS_MAJOR_VERSION 0x00000001
#define RNDIS_MINOR_VERSION 0x00000000
/*
* NdisInitialize message
*/
typedef struct rndis_initialize_request_ {
/* RNDIS request ID */
uint32_t request_id;
uint32_t major_version;
uint32_t minor_version;
uint32_t max_xfer_size;
} rndis_initialize_request;
/*
* Response to NdisInitialize
*/
typedef struct rndis_initialize_complete_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS status */
uint32_t status;
uint32_t major_version;
uint32_t minor_version;
uint32_t device_flags;
/* RNDIS medium */
uint32_t medium;
uint32_t max_pkts_per_msg;
uint32_t max_xfer_size;
uint32_t pkt_align_factor;
uint32_t af_list_offset;
uint32_t af_list_size;
} rndis_initialize_complete;
/*
* Call manager devices only: Information about an address family
* supported by the device is appended to the response to NdisInitialize.
*/
typedef struct rndis_co_address_family_ {
/* RNDIS AF */
uint32_t address_family;
uint32_t major_version;
uint32_t minor_version;
} rndis_co_address_family;
/*
* NdisHalt message
*/
typedef struct rndis_halt_request_ {
/* RNDIS request ID */
uint32_t request_id;
} rndis_halt_request;
/*
* NdisQueryRequest message
*/
typedef struct rndis_query_request_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS OID */
uint32_t oid;
uint32_t info_buffer_length;
uint32_t info_buffer_offset;
/* RNDIS handle */
uint32_t device_vc_handle;
} rndis_query_request;
/*
* Response to NdisQueryRequest
*/
typedef struct rndis_query_complete_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS status */
uint32_t status;
uint32_t info_buffer_length;
uint32_t info_buffer_offset;
} rndis_query_complete;
/*
* NdisSetRequest message
*/
typedef struct rndis_set_request_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS OID */
uint32_t oid;
uint32_t info_buffer_length;
uint32_t info_buffer_offset;
/* RNDIS handle */
uint32_t device_vc_handle;
} rndis_set_request;
/*
* Response to NdisSetRequest
*/
typedef struct rndis_set_complete_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS status */
uint32_t status;
} rndis_set_complete;
/*
* NdisReset message
*/
typedef struct rndis_reset_request_ {
uint32_t reserved;
} rndis_reset_request;
/*
* Response to NdisReset
*/
typedef struct rndis_reset_complete_ {
/* RNDIS status */
uint32_t status;
uint32_t addressing_reset;
} rndis_reset_complete;
/*
* NdisMIndicateStatus message
*/
typedef struct rndis_indicate_status_ {
/* RNDIS status */
uint32_t status;
uint32_t status_buf_length;
uint32_t status_buf_offset;
} rndis_indicate_status;
/*
* Diagnostic information passed as the status buffer in
* rndis_indicate_status messages signifying error conditions.
*/
typedef struct rndis_diagnostic_info_ {
/* RNDIS status */
uint32_t diag_status;
uint32_t error_offset;
} rndis_diagnostic_info;
/*
* NdisKeepAlive message
*/
typedef struct rndis_keepalive_request_ {
/* RNDIS request ID */
uint32_t request_id;
} rndis_keepalive_request;
/*
* Response to NdisKeepAlive
*/
typedef struct rndis_keepalive_complete_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS status */
uint32_t status;
} rndis_keepalive_complete;
/*
* Data message. All offset fields contain byte offsets from the beginning
* of the rndis_packet structure. All length fields are in bytes.
* VcHandle is set to 0 for connectionless data, otherwise it
* contains the VC handle.
*/
typedef struct rndis_packet_ {
uint32_t data_offset;
uint32_t data_length;
uint32_t oob_data_offset;
uint32_t oob_data_length;
uint32_t num_oob_data_elements;
uint32_t per_pkt_info_offset;
uint32_t per_pkt_info_length;
/* RNDIS handle */
uint32_t vc_handle;
uint32_t reserved;
} rndis_packet;
typedef struct rndis_packet_ex_ {
uint32_t data_offset;
uint32_t data_length;
uint32_t oob_data_offset;
uint32_t oob_data_length;
uint32_t num_oob_data_elements;
uint32_t per_pkt_info_offset;
uint32_t per_pkt_info_length;
/* RNDIS handle */
uint32_t vc_handle;
uint32_t reserved;
uint64_t data_buf_id;
uint32_t data_buf_offset;
uint64_t next_header_buf_id;
uint32_t next_header_byte_offset;
uint32_t next_header_byte_count;
} rndis_packet_ex;
/*
* Optional Out of Band data associated with a Data message.
*/
typedef struct rndis_oobd_ {
uint32_t size;
/* RNDIS class ID */
uint32_t type;
uint32_t class_info_offset;
} rndis_oobd;
/*
* Packet extension field contents associated with a Data message.
*/
typedef struct rndis_per_packet_info_ {
uint32_t size;
uint32_t type;
uint32_t per_packet_info_offset;
} rndis_per_packet_info;
typedef enum ndis_per_pkt_infotype_ {
tcpip_chksum_info,
ipsec_info,
tcp_large_send_info,
classification_handle_info,
ndis_reserved,
sgl_info,
ieee_8021q_info,
original_pkt_info,
pkt_cancel_id,
original_netbuf_list,
cached_netbuf_list,
short_pkt_padding_info,
max_perpkt_info
} ndis_per_pkt_infotype;
typedef struct ndis_8021q_info_ {
union {
struct {
uint32_t user_pri : 3; /* User Priority */
uint32_t cfi : 1; /* Canonical Format ID */
uint32_t vlan_id : 12;
uint32_t reserved : 16;
} s1;
uint32_t value;
} u1;
} ndis_8021q_info;
/*
* Format of Information buffer passed in a SetRequest for the OID
* OID_GEN_RNDIS_CONFIG_PARAMETER.
*/
typedef struct rndis_config_parameter_info_ {
uint32_t parameter_name_offset;
uint32_t parameter_name_length;
uint32_t parameter_type;
uint32_t parameter_value_offset;
uint32_t parameter_value_length;
} rndis_config_parameter_info;
/*
* Values for ParameterType in rndis_config_parameter_info
*/
#define RNDIS_CONFIG_PARAM_TYPE_INTEGER 0
#define RNDIS_CONFIG_PARAM_TYPE_STRING 2
/*
* CONDIS Miniport messages for connection oriented devices
* that do not implement a call manager.
*/
/*
* CoNdisMiniportCreateVc message
*/
typedef struct rcondis_mp_create_vc_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS handle */
uint32_t ndis_vc_handle;
} rcondis_mp_create_vc;
/*
* Response to CoNdisMiniportCreateVc
*/
typedef struct rcondis_mp_create_vc_complete_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS handle */
uint32_t device_vc_handle;
/* RNDIS status */
uint32_t status;
} rcondis_mp_create_vc_complete;
/*
* CoNdisMiniportDeleteVc message
*/
typedef struct rcondis_mp_delete_vc_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS handle */
uint32_t device_vc_handle;
} rcondis_mp_delete_vc;
/*
* Response to CoNdisMiniportDeleteVc
*/
typedef struct rcondis_mp_delete_vc_complete_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS status */
uint32_t status;
} rcondis_mp_delete_vc_complete;
/*
* CoNdisMiniportQueryRequest message
*/
typedef struct rcondis_mp_query_request_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS request type */
uint32_t request_type;
/* RNDIS OID */
uint32_t oid;
/* RNDIS handle */
uint32_t device_vc_handle;
uint32_t info_buf_length;
uint32_t info_buf_offset;
} rcondis_mp_query_request;
/*
* CoNdisMiniportSetRequest message
*/
typedef struct rcondis_mp_set_request_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS request type */
uint32_t request_type;
/* RNDIS OID */
uint32_t oid;
/* RNDIS handle */
uint32_t device_vc_handle;
uint32_t info_buf_length;
uint32_t info_buf_offset;
} rcondis_mp_set_request;
/*
* CoNdisIndicateStatus message
*/
typedef struct rcondis_indicate_status_ {
/* RNDIS handle */
uint32_t ndis_vc_handle;
/* RNDIS status */
uint32_t status;
uint32_t status_buf_length;
uint32_t status_buf_offset;
} rcondis_indicate_status;
/*
* CONDIS Call/VC parameters
*/
typedef struct rcondis_specific_parameters_ {
uint32_t parameter_type;
uint32_t parameter_length;
uint32_t parameter_offset;
} rcondis_specific_parameters;
typedef struct rcondis_media_parameters_ {
uint32_t flags;
uint32_t reserved1;
uint32_t reserved2;
rcondis_specific_parameters media_specific;
} rcondis_media_parameters;
typedef struct rndis_flowspec_ {
uint32_t token_rate;
uint32_t token_bucket_size;
uint32_t peak_bandwidth;
uint32_t latency;
uint32_t delay_variation;
uint32_t service_type;
uint32_t max_sdu_size;
uint32_t minimum_policed_size;
} rndis_flowspec;
typedef struct rcondis_call_manager_parameters_ {
rndis_flowspec transmit;
rndis_flowspec receive;
rcondis_specific_parameters call_mgr_specific;
} rcondis_call_manager_parameters;
/*
* CoNdisMiniportActivateVc message
*/
typedef struct rcondis_mp_activate_vc_request_ {
/* RNDIS request ID */
uint32_t request_id;
uint32_t flags;
/* RNDIS handle */
uint32_t device_vc_handle;
uint32_t media_params_offset;
uint32_t media_params_length;
uint32_t call_mgr_params_offset;
uint32_t call_mgr_params_length;
} rcondis_mp_activate_vc_request;
/*
* Response to CoNdisMiniportActivateVc
*/
typedef struct rcondis_mp_activate_vc_complete_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS status */
uint32_t status;
} rcondis_mp_activate_vc_complete;
/*
* CoNdisMiniportDeactivateVc message
*/
typedef struct rcondis_mp_deactivate_vc_request_ {
/* RNDIS request ID */
uint32_t request_id;
uint32_t flags;
/* RNDIS handle */
uint32_t device_vc_handle;
} rcondis_mp_deactivate_vc_request;
/*
* Response to CoNdisMiniportDeactivateVc
*/
typedef struct rcondis_mp_deactivate_vc_complete_ {
/* RNDIS request ID */
uint32_t request_id;
/* RNDIS status */
uint32_t status;
} rcondis_mp_deactivate_vc_complete;
/*
* union with all of the RNDIS messages
*/
typedef union rndis_msg_container_ {
rndis_packet packet;
rndis_initialize_request init_request;
rndis_halt_request halt_request;
rndis_query_request query_request;
rndis_set_request set_request;
rndis_reset_request reset_request;
rndis_keepalive_request keepalive_request;
rndis_indicate_status indicate_status;
rndis_initialize_complete init_complete;
rndis_query_complete query_complete;
rndis_set_complete set_complete;
rndis_reset_complete reset_complete;
rndis_keepalive_complete keepalive_complete;
rcondis_mp_create_vc co_miniport_create_vc;
rcondis_mp_delete_vc co_miniport_delete_vc;
rcondis_indicate_status co_miniport_status;
rcondis_mp_activate_vc_request co_miniport_activate_vc;
rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc;
rcondis_mp_create_vc_complete co_miniport_create_vc_complete;
rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete;
rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete;
rcondis_mp_deactivate_vc_complete co_miniport_deactivate_vc_complete;
rndis_packet_ex packet_ex;
} rndis_msg_container;
/*
* Remote NDIS message format
*/
typedef struct rndis_msg_ {
uint32_t ndis_msg_type;
/*
* Total length of this message, from the beginning
* of the rndis_msg struct, in bytes.
*/
uint32_t msg_len;
/* Actual message */
rndis_msg_container msg;
} rndis_msg;
/*
* Handy macros
*/
/*
* get the size of an RNDIS message. Pass in the message type,
* rndis_set_request, rndis_packet for example
*/
#define RNDIS_MESSAGE_SIZE(message) \
(sizeof(message) + (sizeof(rndis_msg) - sizeof(rndis_msg_container)))
/*
* get pointer to info buffer with message pointer
*/
#define MESSAGE_TO_INFO_BUFFER(message) \
(((PUCHAR)(message)) + message->InformationBufferOffset)
/*
* get pointer to status buffer with message pointer
*/
#define MESSAGE_TO_STATUS_BUFFER(message) \
(((PUCHAR)(message)) + message->StatusBufferOffset)
/*
* get pointer to OOBD buffer with message pointer
*/
#define MESSAGE_TO_OOBD_BUFFER(message) \
(((PUCHAR)(message)) + message->OOBDataOffset)
/*
* get pointer to data buffer with message pointer
*/
#define MESSAGE_TO_DATA_BUFFER(message) \
(((PUCHAR)(message)) + message->PerPacketInfoOffset)
/*
* get pointer to contained message from NDIS_MESSAGE pointer
*/
#define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_message) \
((void *) &rndis_message->Message)
/*
* get pointer to contained message from NDIS_MESSAGE pointer
*/
#define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_message) \
((void *) rndis_message)
/*
* Structures used in OID_RNDISMP_GET_RECEIVE_BUFFERS
*/
#define RNDISMP_RECEIVE_BUFFER_ELEM_FLAG_VMQ_RECEIVE_BUFFER 0x00000001
typedef struct rndismp_rx_buf_elem_ {
uint32_t flags;
uint32_t length;
uint64_t rx_buf_id;
uint32_t gpadl_handle;
void *rx_buf;
} rndismp_rx_buf_elem;
typedef struct rndismp_rx_bufs_info_ {
uint32_t num_rx_bufs;
rndismp_rx_buf_elem rx_buf_elems[1];
} rndismp_rx_bufs_info;
#define RNDIS_HEADER_SIZE (sizeof(rndis_msg) - sizeof(rndis_msg_container))
#define NDIS_PACKET_TYPE_DIRECTED 0x00000001
#define NDIS_PACKET_TYPE_MULTICAST 0x00000002
#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004
#define NDIS_PACKET_TYPE_BROADCAST 0x00000008
#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020
#define NDIS_PACKET_TYPE_SMT 0x00000040
#define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080
#define NDIS_PACKET_TYPE_GROUP 0x00000100
#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00000200
#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400
#define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800
#endif /* __HV_RNDIS_H__ */

View File

@ -0,0 +1,929 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <sys/types.h>
#include <machine/atomic.h>
#include <sys/sema.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <dev/hyperv/include/hyperv.h>
#include "hv_net_vsc.h"
#include "hv_rndis.h"
#include "hv_rndis_filter.h"
/*
* Forward declarations
*/
static int hv_rf_send_request(rndis_device *device, rndis_request *request,
uint32_t message_type);
static void hv_rf_receive_response(rndis_device *device, rndis_msg *response);
static void hv_rf_receive_indicate_status(rndis_device *device,
rndis_msg *response);
static void hv_rf_receive_data(rndis_device *device, rndis_msg *message,
netvsc_packet *pkt);
static int hv_rf_query_device(rndis_device *device, uint32_t oid,
void *result, uint32_t *result_size);
static inline int hv_rf_query_device_mac(rndis_device *device);
static inline int hv_rf_query_device_link_status(rndis_device *device);
static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter);
static int hv_rf_init_device(rndis_device *device);
static int hv_rf_open_device(rndis_device *device);
static int hv_rf_close_device(rndis_device *device);
static void hv_rf_on_send_completion(void *context);
static void hv_rf_on_send_request_completion(void *context);
static void hv_rf_on_send_request_halt_completion(void *context);
/*
* Allow module_param to work and override to switch to promiscuous mode.
*/
static inline rndis_device *
hv_get_rndis_device(void)
{
rndis_device *device;
device = malloc(sizeof(rndis_device), M_DEVBUF, M_NOWAIT | M_ZERO);
if (device == NULL) {
return (NULL);
}
mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_SPIN | MTX_RECURSE);
/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
STAILQ_INIT(&device->myrequest_list);
device->state = RNDIS_DEV_UNINITIALIZED;
return (device);
}
/*
*
*/
static inline void
hv_put_rndis_device(rndis_device *device)
{
mtx_destroy(&device->req_lock);
free(device, M_DEVBUF);
}
/*
*
*/
static inline rndis_request *
hv_rndis_request(rndis_device *device, uint32_t message_type,
uint32_t message_length)
{
rndis_request *request;
rndis_msg *rndis_mesg;
rndis_set_request *set;
request = malloc(sizeof(rndis_request), M_DEVBUF, M_NOWAIT | M_ZERO);
if (request == NULL) {
return (NULL);
}
sema_init(&request->wait_sema, 0, "rndis sema");
rndis_mesg = &request->request_msg;
rndis_mesg->ndis_msg_type = message_type;
rndis_mesg->msg_len = message_length;
/*
* Set the request id. This field is always after the rndis header
* for request/response packet types so we just use the set_request
* as a template.
*/
set = &rndis_mesg->msg.set_request;
set->request_id = atomic_fetchadd_int(&device->new_request_id, 1);
/* Increment to get the new value (call above returns old value) */
set->request_id += 1;
/* Add to the request list */
mtx_lock_spin(&device->req_lock);
STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry);
mtx_unlock_spin(&device->req_lock);
return (request);
}
/*
*
*/
static inline void
hv_put_rndis_request(rndis_device *device, rndis_request *request)
{
mtx_lock_spin(&device->req_lock);
/* Fixme: Has O(n) performance */
/*
* XXXKYS: Use Doubly linked lists.
*/
STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_,
mylist_entry);
mtx_unlock_spin(&device->req_lock);
sema_destroy(&request->wait_sema);
free(request, M_DEVBUF);
}
/*
*
*/
static int
hv_rf_send_request(rndis_device *device, rndis_request *request,
uint32_t message_type)
{
int ret;
netvsc_packet *packet;
/* Set up the packet to send it */
packet = &request->pkt;
packet->is_data_pkt = FALSE;
packet->tot_data_buf_len = request->request_msg.msg_len;
packet->page_buf_count = 1;
packet->page_buffers[0].pfn =
hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT;
packet->page_buffers[0].length = request->request_msg.msg_len;
packet->page_buffers[0].offset =
(unsigned long)&request->request_msg & (PAGE_SIZE - 1);
packet->compl.send.send_completion_context = request; /* packet */
if (message_type != REMOTE_NDIS_HALT_MSG) {
packet->compl.send.on_send_completion =
hv_rf_on_send_request_completion;
} else {
packet->compl.send.on_send_completion =
hv_rf_on_send_request_halt_completion;
}
packet->compl.send.send_completion_tid = (unsigned long)device;
ret = hv_nv_on_send(device->net_dev->dev, packet);
return (ret);
}
/*
* RNDIS filter receive response
*/
static void
hv_rf_receive_response(rndis_device *device, rndis_msg *response)
{
rndis_request *request = NULL;
rndis_request *next_request;
boolean_t found = FALSE;
mtx_lock_spin(&device->req_lock);
request = STAILQ_FIRST(&device->myrequest_list);
while (request != NULL) {
/*
* All request/response message contains request_id as the
* first field
*/
if (request->request_msg.msg.init_request.request_id ==
response->msg.init_complete.request_id) {
found = TRUE;
break;
}
next_request = STAILQ_NEXT(request, mylist_entry);
request = next_request;
}
mtx_unlock_spin(&device->req_lock);
if (found) {
if (response->msg_len <= sizeof(rndis_msg)) {
memcpy(&request->response_msg, response,
response->msg_len);
} else {
if (response->ndis_msg_type == REMOTE_NDIS_RESET_CMPLT) {
/* Does not have a request id field */
request->response_msg.msg.reset_complete.status =
STATUS_BUFFER_OVERFLOW;
} else {
request->response_msg.msg.init_complete.status =
STATUS_BUFFER_OVERFLOW;
}
}
sema_post(&request->wait_sema);
}
}
/*
* RNDIS filter receive indicate status
*/
static void
hv_rf_receive_indicate_status(rndis_device *device, rndis_msg *response)
{
rndis_indicate_status *indicate = &response->msg.indicate_status;
if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT) {
netvsc_linkstatus_callback(device->net_dev->dev, 1);
} else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT) {
netvsc_linkstatus_callback(device->net_dev->dev, 0);
} else {
/* TODO: */
}
}
/*
* RNDIS filter receive data
*/
static void
hv_rf_receive_data(rndis_device *device, rndis_msg *message, netvsc_packet *pkt)
{
rndis_packet *rndis_pkt;
rndis_per_packet_info *rppi;
ndis_8021q_info *rppi_vlan_info;
uint32_t data_offset;
rndis_pkt = &message->msg.packet;
/*
* Fixme: Handle multiple rndis pkt msgs that may be enclosed in this
* netvsc packet (ie tot_data_buf_len != message_length)
*/
/* Remove rndis header, then pass data packet up the stack */
data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
/* L2 frame length, with L2 header, not including CRC */
pkt->tot_data_buf_len = rndis_pkt->data_length;
pkt->page_buffers[0].offset += data_offset;
/* Buffer length now L2 frame length plus trailing junk */
pkt->page_buffers[0].length -= data_offset;
pkt->is_data_pkt = TRUE;
pkt->vlan_tci = 0;
/*
* Read the VLAN ID if supplied by the Hyper-V infrastructure.
* Let higher-level driver code decide if it wants to use it.
* Ignore CFI, priority for now as FreeBSD does not support these.
*/
if (rndis_pkt->per_pkt_info_offset != 0) {
/* rppi struct exists; compute its address */
rppi = (rndis_per_packet_info *)((uint8_t *)rndis_pkt +
rndis_pkt->per_pkt_info_offset);
/* if VLAN ppi struct, get the VLAN ID */
if (rppi->type == ieee_8021q_info) {
rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi
+ rppi->per_packet_info_offset);
pkt->vlan_tci = rppi_vlan_info->u1.s1.vlan_id;
}
}
netvsc_recv(device->net_dev->dev, pkt);
}
/*
* RNDIS filter on receive
*/
int
hv_rf_on_receive(struct hv_device *device, netvsc_packet *pkt)
{
hn_softc_t *sc = device_get_softc(device->device);
netvsc_dev *net_dev = sc->net_dev;
rndis_device *rndis_dev;
rndis_msg rndis_mesg;
rndis_msg *rndis_hdr;
/* Make sure the rndis device state is initialized */
if (net_dev->extension == NULL)
return (ENODEV);
rndis_dev = (rndis_device *)net_dev->extension;
if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED)
return (EINVAL);
/* Shift virtual page number to form virtual page address */
rndis_hdr = (rndis_msg *)(pkt->page_buffers[0].pfn << PAGE_SHIFT);
rndis_hdr = (void *)((unsigned long)rndis_hdr
+ pkt->page_buffers[0].offset);
/*
* Make sure we got a valid rndis message
* Fixme: There seems to be a bug in set completion msg where
* its msg_len is 16 bytes but the byte_count field in the
* xfer page range shows 52 bytes
*/
#if 0
if (pkt->tot_data_buf_len != rndis_hdr->msg_len) {
DPRINT_ERR(NETVSC, "invalid rndis message? (expected %u "
"bytes got %u)... dropping this message!",
rndis_hdr->msg_len, pkt->tot_data_buf_len);
DPRINT_EXIT(NETVSC);
return (-1);
}
#endif
memcpy(&rndis_mesg, rndis_hdr,
(rndis_hdr->msg_len > sizeof(rndis_msg)) ?
sizeof(rndis_msg) : rndis_hdr->msg_len);
switch (rndis_mesg.ndis_msg_type) {
/* data message */
case REMOTE_NDIS_PACKET_MSG:
hv_rf_receive_data(rndis_dev, &rndis_mesg, pkt);
break;
/* completion messages */
case REMOTE_NDIS_INITIALIZE_CMPLT:
case REMOTE_NDIS_QUERY_CMPLT:
case REMOTE_NDIS_SET_CMPLT:
case REMOTE_NDIS_RESET_CMPLT:
case REMOTE_NDIS_KEEPALIVE_CMPLT:
hv_rf_receive_response(rndis_dev, &rndis_mesg);
break;
/* notification message */
case REMOTE_NDIS_INDICATE_STATUS_MSG:
hv_rf_receive_indicate_status(rndis_dev, &rndis_mesg);
break;
default:
printf("hv_rf_on_receive(): Unknown msg_type 0x%x\n",
rndis_mesg.ndis_msg_type);
break;
}
return (0);
}
/*
* RNDIS filter query device
*/
static int
hv_rf_query_device(rndis_device *device, uint32_t oid, void *result,
uint32_t *result_size)
{
rndis_request *request;
uint32_t in_result_size = *result_size;
rndis_query_request *query;
rndis_query_complete *query_complete;
int ret = 0;
*result_size = 0;
request = hv_rndis_request(device, REMOTE_NDIS_QUERY_MSG,
RNDIS_MESSAGE_SIZE(rndis_query_request));
if (request == NULL) {
ret = -1;
goto cleanup;
}
/* Set up the rndis query */
query = &request->request_msg.msg.query_request;
query->oid = oid;
query->info_buffer_offset = sizeof(rndis_query_request);
query->info_buffer_length = 0;
query->device_vc_handle = 0;
ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
if (ret != 0) {
/* Fixme: printf added */
printf("RNDISFILTER request failed to Send!\n");
goto cleanup;
}
sema_wait(&request->wait_sema);
/* Copy the response back */
query_complete = &request->response_msg.msg.query_complete;
if (query_complete->info_buffer_length > in_result_size) {
ret = EINVAL;
goto cleanup;
}
memcpy(result, (void *)((unsigned long)query_complete +
query_complete->info_buffer_offset),
query_complete->info_buffer_length);
*result_size = query_complete->info_buffer_length;
cleanup:
if (request != NULL)
hv_put_rndis_request(device, request);
return (ret);
}
/*
* RNDIS filter query device MAC address
*/
static inline int
hv_rf_query_device_mac(rndis_device *device)
{
uint32_t size = HW_MACADDR_LEN;
return (hv_rf_query_device(device,
RNDIS_OID_802_3_PERMANENT_ADDRESS, device->hw_mac_addr, &size));
}
/*
* RNDIS filter query device link status
*/
static inline int
hv_rf_query_device_link_status(rndis_device *device)
{
uint32_t size = sizeof(uint32_t);
return (hv_rf_query_device(device,
RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
}
/*
* RNDIS filter set packet filter
* Sends an rndis request with the new filter, then waits for a response
* from the host.
* Returns zero on success, non-zero on failure.
*/
static int
hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter)
{
rndis_request *request;
rndis_set_request *set;
rndis_set_complete *set_complete;
uint32_t status;
int ret;
request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
RNDIS_MESSAGE_SIZE(rndis_set_request) + sizeof(uint32_t));
if (request == NULL) {
ret = -1;
goto cleanup;
}
/* Set up the rndis set */
set = &request->request_msg.msg.set_request;
set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
set->info_buffer_length = sizeof(uint32_t);
set->info_buffer_offset = sizeof(rndis_set_request);
memcpy((void *)((unsigned long)set + sizeof(rndis_set_request)),
&new_filter, sizeof(uint32_t));
ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
if (ret != 0) {
goto cleanup;
}
/*
* Wait for the response from the host. Another thread will signal
* us when the response has arrived. In the failure case,
* sema_timedwait() returns a non-zero status after waiting 5 seconds.
*/
ret = sema_timedwait(&request->wait_sema, 500);
if (ret == 0) {
/* Response received, check status */
set_complete = &request->response_msg.msg.set_complete;
status = set_complete->status;
if (status != RNDIS_STATUS_SUCCESS) {
/* Bad response status, return error */
ret = -2;
}
} else {
/*
* We cannot deallocate the request since we may still
* receive a send completion for it.
*/
goto exit;
}
cleanup:
if (request != NULL) {
hv_put_rndis_request(device, request);
}
exit:
return (ret);
}
/*
* RNDIS filter init device
*/
static int
hv_rf_init_device(rndis_device *device)
{
rndis_request *request;
rndis_initialize_request *init;
rndis_initialize_complete *init_complete;
uint32_t status;
int ret;
request = hv_rndis_request(device, REMOTE_NDIS_INITIALIZE_MSG,
RNDIS_MESSAGE_SIZE(rndis_initialize_request));
if (!request) {
ret = -1;
goto cleanup;
}
/* Set up the rndis set */
init = &request->request_msg.msg.init_request;
init->major_version = RNDIS_MAJOR_VERSION;
init->minor_version = RNDIS_MINOR_VERSION;
/*
* Per the RNDIS document, this should be set to the max MTU
* plus the header size. However, 2048 works fine, so leaving
* it as is.
*/
init->max_xfer_size = 2048;
device->state = RNDIS_DEV_INITIALIZING;
ret = hv_rf_send_request(device, request, REMOTE_NDIS_INITIALIZE_MSG);
if (ret != 0) {
device->state = RNDIS_DEV_UNINITIALIZED;
goto cleanup;
}
sema_wait(&request->wait_sema);
init_complete = &request->response_msg.msg.init_complete;
status = init_complete->status;
if (status == RNDIS_STATUS_SUCCESS) {
device->state = RNDIS_DEV_INITIALIZED;
ret = 0;
} else {
device->state = RNDIS_DEV_UNINITIALIZED;
ret = -1;
}
cleanup:
if (request) {
hv_put_rndis_request(device, request);
}
return (ret);
}
#define HALT_COMPLETION_WAIT_COUNT 25
/*
* RNDIS filter halt device
*/
static int
hv_rf_halt_device(rndis_device *device)
{
rndis_request *request;
rndis_halt_request *halt;
int i, ret;
/* Attempt to do a rndis device halt */
request = hv_rndis_request(device, REMOTE_NDIS_HALT_MSG,
RNDIS_MESSAGE_SIZE(rndis_halt_request));
if (request == NULL) {
return (-1);
}
/* initialize "poor man's semaphore" */
request->halt_complete_flag = 0;
/* Set up the rndis set */
halt = &request->request_msg.msg.halt_request;
halt->request_id = atomic_fetchadd_int(&device->new_request_id, 1);
/* Increment to get the new value (call above returns old value) */
halt->request_id += 1;
ret = hv_rf_send_request(device, request, REMOTE_NDIS_HALT_MSG);
if (ret != 0) {
return (-1);
}
/*
* Wait for halt response from halt callback. We must wait for
* the transaction response before freeing the request and other
* resources.
*/
for (i=HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
if (request->halt_complete_flag != 0) {
break;
}
DELAY(400);
}
if (i == 0) {
return (-1);
}
device->state = RNDIS_DEV_UNINITIALIZED;
if (request != NULL) {
hv_put_rndis_request(device, request);
}
return (0);
}
/*
* RNDIS filter open device
*/
static int
hv_rf_open_device(rndis_device *device)
{
int ret;
if (device->state != RNDIS_DEV_INITIALIZED) {
return (0);
}
if (hv_promisc_mode != 1) {
ret = hv_rf_set_packet_filter(device,
NDIS_PACKET_TYPE_BROADCAST |
NDIS_PACKET_TYPE_ALL_MULTICAST |
NDIS_PACKET_TYPE_DIRECTED);
} else {
ret = hv_rf_set_packet_filter(device,
NDIS_PACKET_TYPE_PROMISCUOUS);
}
if (ret == 0) {
device->state = RNDIS_DEV_DATAINITIALIZED;
}
return (ret);
}
/*
* RNDIS filter close device
*/
static int
hv_rf_close_device(rndis_device *device)
{
int ret;
if (device->state != RNDIS_DEV_DATAINITIALIZED) {
return (0);
}
ret = hv_rf_set_packet_filter(device, 0);
if (ret == 0) {
device->state = RNDIS_DEV_INITIALIZED;
}
return (ret);
}
/*
* RNDIS filter on device add
*/
int
hv_rf_on_device_add(struct hv_device *device, void *additl_info)
{
int ret;
netvsc_dev *net_dev;
rndis_device *rndis_dev;
netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
rndis_dev = hv_get_rndis_device();
if (rndis_dev == NULL) {
return (ENOMEM);
}
/*
* Let the inner driver handle this first to create the netvsc channel
* NOTE! Once the channel is created, we may get a receive callback
* (hv_rf_on_receive()) before this call is completed.
* Note: Earlier code used a function pointer here.
*/
net_dev = hv_nv_on_device_add(device, additl_info);
if (!net_dev) {
hv_put_rndis_device(rndis_dev);
return (ENOMEM);
}
/*
* Initialize the rndis device
*/
net_dev->extension = rndis_dev;
rndis_dev->net_dev = net_dev;
/* Send the rndis initialization message */
ret = hv_rf_init_device(rndis_dev);
if (ret != 0) {
/*
* TODO: If rndis init failed, we will need to shut down
* the channel
*/
}
/* Get the mac address */
ret = hv_rf_query_device_mac(rndis_dev);
if (ret != 0) {
/* TODO: shut down rndis device and the channel */
}
memcpy(dev_info->mac_addr, rndis_dev->hw_mac_addr, HW_MACADDR_LEN);
hv_rf_query_device_link_status(rndis_dev);
dev_info->link_state = rndis_dev->link_status;
return (ret);
}
/*
* RNDIS filter on device remove
*/
int
hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
{
hn_softc_t *sc = device_get_softc(device->device);
netvsc_dev *net_dev = sc->net_dev;
rndis_device *rndis_dev = (rndis_device *)net_dev->extension;
int ret;
/* Halt and release the rndis device */
ret = hv_rf_halt_device(rndis_dev);
hv_put_rndis_device(rndis_dev);
net_dev->extension = NULL;
/* Pass control to inner driver to remove the device */
ret |= hv_nv_on_device_remove(device, destroy_channel);
return (ret);
}
/*
* RNDIS filter on open
*/
int
hv_rf_on_open(struct hv_device *device)
{
hn_softc_t *sc = device_get_softc(device->device);
netvsc_dev *net_dev = sc->net_dev;
return (hv_rf_open_device((rndis_device *)net_dev->extension));
}
/*
* RNDIS filter on close
*/
int
hv_rf_on_close(struct hv_device *device)
{
hn_softc_t *sc = device_get_softc(device->device);
netvsc_dev *net_dev = sc->net_dev;
return (hv_rf_close_device((rndis_device *)net_dev->extension));
}
/*
* RNDIS filter on send
*/
int
hv_rf_on_send(struct hv_device *device, netvsc_packet *pkt)
{
rndis_filter_packet *filter_pkt;
rndis_msg *rndis_mesg;
rndis_packet *rndis_pkt;
rndis_per_packet_info *rppi;
ndis_8021q_info *rppi_vlan_info;
uint32_t rndis_msg_size;
int ret = 0;
/* Add the rndis header */
filter_pkt = (rndis_filter_packet *)pkt->extension;
memset(filter_pkt, 0, sizeof(rndis_filter_packet));
rndis_mesg = &filter_pkt->message;
rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
if (pkt->vlan_tci != 0) {
rndis_msg_size += sizeof(rndis_per_packet_info) +
sizeof(ndis_8021q_info);
}
rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
rndis_mesg->msg_len = pkt->tot_data_buf_len + rndis_msg_size;
rndis_pkt = &rndis_mesg->msg.packet;
rndis_pkt->data_offset = sizeof(rndis_packet);
rndis_pkt->data_length = pkt->tot_data_buf_len;
pkt->is_data_pkt = TRUE;
pkt->page_buffers[0].pfn = hv_get_phys_addr(rndis_mesg) >> PAGE_SHIFT;
pkt->page_buffers[0].offset =
(unsigned long)rndis_mesg & (PAGE_SIZE - 1);
pkt->page_buffers[0].length = rndis_msg_size;
/* Save the packet context */
filter_pkt->completion_context =
pkt->compl.send.send_completion_context;
/* Use ours */
pkt->compl.send.on_send_completion = hv_rf_on_send_completion;
pkt->compl.send.send_completion_context = filter_pkt;
/*
* If there is a VLAN tag, we need to set up some additional
* fields so the Hyper-V infrastructure will stuff the VLAN tag
* into the frame.
*/
if (pkt->vlan_tci != 0) {
/* Move data offset past end of rppi + VLAN structs */
rndis_pkt->data_offset += sizeof(rndis_per_packet_info) +
sizeof(ndis_8021q_info);
/* must be set when we have rppi, VLAN info */
rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
rndis_pkt->per_pkt_info_length = sizeof(rndis_per_packet_info) +
sizeof(ndis_8021q_info);
/* rppi immediately follows rndis_pkt */
rppi = (rndis_per_packet_info *)(rndis_pkt + 1);
rppi->size = sizeof(rndis_per_packet_info) +
sizeof(ndis_8021q_info);
rppi->type = ieee_8021q_info;
rppi->per_packet_info_offset = sizeof(rndis_per_packet_info);
/* VLAN info immediately follows rppi struct */
rppi_vlan_info = (ndis_8021q_info *)(rppi + 1);
/* FreeBSD does not support CFI or priority */
rppi_vlan_info->u1.s1.vlan_id = pkt->vlan_tci & 0xfff;
}
/*
* Invoke netvsc send. If return status is bad, the caller now
* resets the context pointers before retrying.
*/
ret = hv_nv_on_send(device, pkt);
return (ret);
}
/*
* RNDIS filter on send completion callback
*/
static void
hv_rf_on_send_completion(void *context)
{
rndis_filter_packet *filter_pkt = (rndis_filter_packet *)context;
/* Pass it back to the original handler */
netvsc_xmit_completion(filter_pkt->completion_context);
}
/*
* RNDIS filter on send request completion callback
*/
static void
hv_rf_on_send_request_completion(void *context)
{
}
/*
* RNDIS filter on send request (halt only) completion callback
*/
static void
hv_rf_on_send_request_halt_completion(void *context)
{
rndis_request *request = context;
/*
* Notify hv_rf_halt_device() about halt completion.
* The halt code must wait for completion before freeing
* the transaction resources.
*/
request->halt_complete_flag = 1;
}

View File

@ -0,0 +1,116 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __HV_RNDIS_FILTER_H__
#define __HV_RNDIS_FILTER_H__
/*
* Defines
*/
/* Destroy or preserve channel on filter/netvsc teardown */
#define HV_RF_NV_DESTROY_CHANNEL TRUE
#define HV_RF_NV_RETAIN_CHANNEL FALSE
/*
* Number of page buffers to reserve for the RNDIS filter packet in the
* transmitted message.
*/
#define HV_RF_NUM_TX_RESERVED_PAGE_BUFS 1
/*
* Data types
*/
typedef enum {
RNDIS_DEV_UNINITIALIZED = 0,
RNDIS_DEV_INITIALIZING,
RNDIS_DEV_INITIALIZED,
RNDIS_DEV_DATAINITIALIZED,
} rndis_device_state;
typedef struct rndis_request_ {
STAILQ_ENTRY(rndis_request_) mylist_entry;
struct sema wait_sema;
/*
* Fixme: We assumed a fixed size response here. If we do ever
* need to handle a bigger response, we can either define a max
* response message or add a response buffer variable above this field
*/
rndis_msg response_msg;
/* Simplify allocation by having a netvsc packet inline */
netvsc_packet pkt;
hv_vmbus_page_buffer buffer;
/* Fixme: We assumed a fixed size request here. */
rndis_msg request_msg;
/* Fixme: Poor man's semaphore. */
uint32_t halt_complete_flag;
} rndis_request;
typedef struct rndis_device_ {
netvsc_dev *net_dev;
rndis_device_state state;
uint32_t link_status;
uint32_t new_request_id;
struct mtx req_lock;
STAILQ_HEAD(RQ, rndis_request_) myrequest_list;
uint8_t hw_mac_addr[HW_MACADDR_LEN];
} rndis_device;
typedef struct rndis_filter_packet_ {
void *completion_context;
/* No longer used */
pfn_on_send_rx_completion on_completion;
rndis_msg message;
} rndis_filter_packet;
/*
* Externs
*/
extern int hv_rf_on_receive(struct hv_device *device, netvsc_packet *pkt);
extern int hv_rf_on_device_add(struct hv_device *device, void *additl_info);
extern int hv_rf_on_device_remove(struct hv_device *device,
boolean_t destroy_channel);
extern int hv_rf_on_open(struct hv_device *device);
extern int hv_rf_on_close(struct hv_device *device);
extern int hv_rf_on_send(struct hv_device *device, netvsc_packet *pkt);
#endif /* __HV_RNDIS_FILTER_H__ */

View File

@ -0,0 +1,194 @@
/*-
* Copyright (c) 1998 - 2008 Søren Schmidt <sos@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer,
* without modification, immediately at the beginning of the file.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* Copyright (c) 2009-2013 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/ata.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/malloc.h>
#include <sys/sema.h>
#include <sys/taskqueue.h>
#include <vm/uma.h>
#include <machine/stdarg.h>
#include <machine/resource.h>
#include <machine/bus.h>
#include <sys/rman.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include <dev/ata/ata-all.h>
#include <dev/ata/ata-pci.h>
#include <ata_if.h>
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
#define HV_X64_CPUID_MIN 0x40000005
#define HV_X64_CPUID_MAX 0x4000ffff
/* prototypes */
static int hv_ata_pci_probe(device_t dev);
static int hv_ata_pci_attach(device_t dev);
static int hv_ata_pci_detach(device_t dev);
static int hv_check_for_hyper_v(void);
/*
* generic PCI ATA device probe
*/
static int
hv_ata_pci_probe(device_t dev)
{
int ata_disk_enable = 0;
if(bootverbose)
device_printf(dev,
"hv_ata_pci_probe dev_class/subslcass = %d, %d\n",
pci_get_class(dev), pci_get_subclass(dev));
/* is this a storage class device ? */
if (pci_get_class(dev) != PCIC_STORAGE)
return (ENXIO);
/* is this an IDE/ATA type device ? */
if (pci_get_subclass(dev) != PCIS_STORAGE_IDE)
return (ENXIO);
if(bootverbose)
device_printf(dev,
"Hyper-V probe for disabling ATA-PCI, emulated driver\n");
/*
* On Hyper-V the default is to use the enlightened driver for
* IDE disks. However, if the user wishes to use the native
* ATA driver, the environment variable
* hw_ata.disk_enable must be explicitly set to 1.
*/
if (hv_check_for_hyper_v()) {
if (getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
if(bootverbose)
device_printf(dev,
"hw.ata.disk_enable flag is disabling Hyper-V"
" ATA driver support\n");
return (ENXIO);
}
}
if(bootverbose)
device_printf(dev, "Hyper-V ATA storage driver enabled.\n");
return (BUS_PROBE_VENDOR);
}
static int
hv_ata_pci_attach(device_t dev)
{
return 0;
}
static int
hv_ata_pci_detach(device_t dev)
{
return 0;
}
/**
* Detect Hyper-V and enable fast IDE
* via enlighted storage driver
*/
static int
hv_check_for_hyper_v(void)
{
u_int regs[4];
int hyper_v_detected = 0;
do_cpuid(1, regs);
if (regs[2] & 0x80000000) {
/* if(a hypervisor is detected) */
/* make sure this really is Hyper-V */
/* we look at the CPUID info */
do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs);
hyper_v_detected =
regs[0] >= HV_X64_CPUID_MIN &&
regs[0] <= HV_X64_CPUID_MAX &&
!memcmp("Microsoft Hv", &regs[1], 12);
}
return (hyper_v_detected);
}
static device_method_t hv_ata_pci_methods[] = {
/* device interface */
DEVMETHOD(device_probe, hv_ata_pci_probe),
DEVMETHOD(device_attach, hv_ata_pci_attach),
DEVMETHOD(device_detach, hv_ata_pci_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
DEVMETHOD_END
};
devclass_t hv_ata_pci_devclass;
static driver_t hv_ata_pci_disengage_driver = {
"pciata-disable",
hv_ata_pci_methods,
sizeof(struct ata_pci_controller),
};
DRIVER_MODULE(atapci_dis, pci, hv_ata_pci_disengage_driver,
hv_ata_pci_devclass, NULL, NULL);
MODULE_VERSION(atapci_dis, 1);
MODULE_DEPEND(atapci_dis, ata, 1, 1, 1);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,231 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __HV_VSTORAGE_H__
#define __HV_VSTORAGE_H__
/*
* Major/minor macros. Minor version is in LSB, meaning that earlier flat
* version numbers will be interpreted as "0.x" (i.e., 1 becomes 0.1).
*/
#define VMSTOR_PROTOCOL_MAJOR(VERSION_) (((VERSION_) >> 8) & 0xff)
#define VMSTOR_PROTOCOL_MINOR(VERSION_) (((VERSION_) ) & 0xff)
#define VMSTOR_PROTOCOL_VERSION(MAJOR_, MINOR_) ((((MAJOR_) & 0xff) << 8) | \
(((MINOR_) & 0xff) ))
/*
* Invalid version.
*/
#define VMSTOR_INVALID_PROTOCOL_VERSION -1
/*
* Version history:
* V1 Beta 0.1
* V1 RC < 2008/1/31 1.0
* V1 RC > 2008/1/31 2.0
*/
#define VMSTOR_PROTOCOL_VERSION_CURRENT VMSTOR_PROTOCOL_VERSION(2, 0)
/**
* Packet structure ops describing virtual storage requests.
*/
enum vstor_packet_ops {
VSTOR_OPERATION_COMPLETEIO = 1,
VSTOR_OPERATION_REMOVEDEVICE = 2,
VSTOR_OPERATION_EXECUTESRB = 3,
VSTOR_OPERATION_RESETLUN = 4,
VSTOR_OPERATION_RESETADAPTER = 5,
VSTOR_OPERATION_RESETBUS = 6,
VSTOR_OPERATION_BEGININITIALIZATION = 7,
VSTOR_OPERATION_ENDINITIALIZATION = 8,
VSTOR_OPERATION_QUERYPROTOCOLVERSION = 9,
VSTOR_OPERATION_QUERYPROPERTIES = 10,
VSTOR_OPERATION_MAXIMUM = 10
};
/*
* Platform neutral description of a scsi request -
* this remains the same across the write regardless of 32/64 bit
* note: it's patterned off the Windows DDK SCSI_PASS_THROUGH structure
*/
#define CDB16GENERIC_LENGTH 0x10
#define SENSE_BUFFER_SIZE 0x12
#define MAX_DATA_BUFFER_LENGTH_WITH_PADDING 0x14
struct vmscsi_req {
uint16_t length;
uint8_t srb_status;
uint8_t scsi_status;
/* HBA number, set to the order number detected by initiator. */
uint8_t port;
/* SCSI bus number or bus_id, different from CAM's path_id. */
uint8_t path_id;
uint8_t target_id;
uint8_t lun;
uint8_t cdb_len;
uint8_t sense_info_len;
uint8_t data_in;
uint8_t reserved;
uint32_t transfer_len;
union {
uint8_t cdb[CDB16GENERIC_LENGTH];
uint8_t sense_data[SENSE_BUFFER_SIZE];
uint8_t reserved_array[MAX_DATA_BUFFER_LENGTH_WITH_PADDING];
};
} __packed;
/**
* This structure is sent during the initialization phase to get the different
* properties of the channel.
*/
struct vmstor_chan_props {
uint16_t proto_ver;
uint8_t path_id;
uint8_t target_id;
/**
* Note: port number is only really known on the client side
*/
uint32_t port;
uint32_t flags;
uint32_t max_transfer_bytes;
/**
* This id is unique for each channel and will correspond with
* vendor specific data in the inquiry_ata
*/
uint64_t unique_id;
} __packed;
/**
* This structure is sent during the storage protocol negotiations.
*/
struct vmstor_proto_ver
{
/**
* Major (MSW) and minor (LSW) version numbers.
*/
uint16_t major_minor;
uint16_t revision; /* always zero */
} __packed;
/**
* Channel Property Flags
*/
#define STORAGE_CHANNEL_REMOVABLE_FLAG 0x1
#define STORAGE_CHANNEL_EMULATED_IDE_FLAG 0x2
struct vstor_packet {
/**
* Requested operation type
*/
enum vstor_packet_ops operation;
/*
* Flags - see below for values
*/
uint32_t flags;
/**
* Status of the request returned from the server side.
*/
uint32_t status;
union
{
/**
* Structure used to forward SCSI commands from the client to
* the server.
*/
struct vmscsi_req vm_srb;
/**
* Structure used to query channel properties.
*/
struct vmstor_chan_props chan_props;
/**
* Used during version negotiations.
*/
struct vmstor_proto_ver version;
};
} __packed;
/**
* SRB (SCSI Request Block) Status Codes
*/
#define SRB_STATUS_PENDING 0x00
#define SRB_STATUS_SUCCESS 0x01
#define SRB_STATUS_ABORTED 0x02
#define SRB_STATUS_ABORT_FAILED 0x03
#define SRB_STATUS_ERROR 0x04
#define SRB_STATUS_BUSY 0x05
/**
* SRB Status Masks (can be combined with above status codes)
*/
#define SRB_STATUS_QUEUE_FROZEN 0x40
#define SRB_STATUS_AUTOSENSE_VALID 0x80
/**
* Packet flags
*/
/**
* This flag indicates that the server should send back a completion for this
* packet.
*/
#define REQUEST_COMPLETION_FLAG 0x1
/**
* This is the set of flags that the vsc can set in any packets it sends
*/
#define VSC_LEGAL_FLAGS (REQUEST_COMPLETION_FLAG)
#endif /* __HV_VSTORAGE_H__ */

View File

@ -0,0 +1,285 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _KVP_H
#define _KVP_H
/*
* An implementation of HyperV key value pair (KVP) functionality for FreeBSD
*
*/
/*
* Maximum value size - used for both key names and value data, and includes
* any applicable NULL terminators.
*
* Note: This limit is somewhat arbitrary, but falls easily within what is
* supported for all native guests (back to Win 2000) and what is reasonable
* for the IC KVP exchange functionality. Note that Windows Me/98/95 are
* limited to 255 character key names.
*
* MSDN recommends not storing data values larger than 2048 bytes in the
* registry.
*
* Note: This value is used in defining the KVP exchange message - this value
* cannot be modified without affecting the message size and compatibility.
*/
/*
* bytes, including any null terminators
*/
#define HV_KVP_EXCHANGE_MAX_VALUE_SIZE (2048)
/*
* Maximum key size - the registry limit for the length of an entry name
* is 256 characters, including the null terminator
*/
#define HV_KVP_EXCHANGE_MAX_KEY_SIZE (512)
/*
* In FreeBSD, we implement the KVP functionality in two components:
* 1) The kernel component which is packaged as part of the hv_utils driver
* is responsible for communicating with the host and responsible for
* implementing the host/guest protocol. 2) A user level daemon that is
* responsible for data gathering.
*
* Host/Guest Protocol: The host iterates over an index and expects the guest
* to assign a key name to the index and also return the value corresponding to
* the key. The host will have atmost one KVP transaction outstanding at any
* given point in time. The host side iteration stops when the guest returns
* an error. Microsoft has specified the following mapping of key names to
* host specified index:
*
* Index Key Name
* 0 FullyQualifiedDomainName
* 1 IntegrationServicesVersion
* 2 NetworkAddressIPv4
* 3 NetworkAddressIPv6
* 4 OSBuildNumber
* 5 OSName
* 6 OSMajorVersion
* 7 OSMinorVersion
* 8 OSVersion
* 9 ProcessorArchitecture
*
* The Windows host expects the Key Name and Key Value to be encoded in utf16.
*
* Guest Kernel/KVP Daemon Protocol: As noted earlier, we implement all of the
* data gathering functionality in a user mode daemon. The user level daemon
* is also responsible for binding the key name to the index as well. The
* kernel and user-level daemon communicate using a connector channel.
*
* The user mode component first registers with the
* the kernel component. Subsequently, the kernel component requests, data
* for the specified keys. In response to this message the user mode component
* fills in the value corresponding to the specified key. We overload the
* sequence field in the cn_msg header to define our KVP message types.
*
*
* The kernel component simply acts as a conduit for communication between the
* Windows host and the user-level daemon. The kernel component passes up the
* index received from the Host to the user-level daemon. If the index is
* valid (supported), the corresponding key as well as its
* value (both are strings) is returned. If the index is invalid
* (not supported), a NULL key string is returned.
*/
/*
* Registry value types.
*/
#define HV_REG_SZ 1
#define HV_REG_U32 4
#define HV_REG_U64 8
/*
* Daemon code not supporting IP injection (legacy daemon).
*/
#define HV_KVP_OP_REGISTER 4
/*
* Daemon code supporting IP injection.
* The KVP opcode field is used to communicate the
* registration information; so define a namespace that
* will be distinct from the host defined KVP opcode.
*/
#define KVP_OP_REGISTER1 100
enum hv_kvp_exchg_op {
HV_KVP_OP_GET = 0,
HV_KVP_OP_SET,
HV_KVP_OP_DELETE,
HV_KVP_OP_ENUMERATE,
HV_KVP_OP_GET_IP_INFO,
HV_KVP_OP_SET_IP_INFO,
HV_KVP_OP_COUNT /* Number of operations, must be last. */
};
enum hv_kvp_exchg_pool {
HV_KVP_POOL_EXTERNAL = 0,
HV_KVP_POOL_GUEST,
HV_KVP_POOL_AUTO,
HV_KVP_POOL_AUTO_EXTERNAL,
HV_KVP_POOL_AUTO_INTERNAL,
HV_KVP_POOL_COUNT /* Number of pools, must be last. */
};
/*
* Some Hyper-V status codes.
*/
#define HV_KVP_S_OK 0x00000000
#define HV_KVP_E_FAIL 0x80004005
#define HV_KVP_S_CONT 0x80070103
#define HV_ERROR_NOT_SUPPORTED 0x80070032
#define HV_ERROR_MACHINE_LOCKED 0x800704F7
#define HV_ERROR_DEVICE_NOT_CONNECTED 0x8007048F
#define HV_INVALIDARG 0x80070057
#define HV_KVP_GUID_NOTFOUND 0x80041002
#define ADDR_FAMILY_NONE 0x00
#define ADDR_FAMILY_IPV4 0x01
#define ADDR_FAMILY_IPV6 0x02
#define MAX_ADAPTER_ID_SIZE 128
#define MAX_IP_ADDR_SIZE 1024
#define MAX_GATEWAY_SIZE 512
struct hv_kvp_ipaddr_value {
uint16_t adapter_id[MAX_ADAPTER_ID_SIZE];
uint8_t addr_family;
uint8_t dhcp_enabled;
uint16_t ip_addr[MAX_IP_ADDR_SIZE];
uint16_t sub_net[MAX_IP_ADDR_SIZE];
uint16_t gate_way[MAX_GATEWAY_SIZE];
uint16_t dns_addr[MAX_IP_ADDR_SIZE];
} __attribute__((packed));
struct hv_kvp_hdr {
uint8_t operation;
uint8_t pool;
uint16_t pad;
} __attribute__((packed));
struct hv_kvp_exchg_msg_value {
uint32_t value_type;
uint32_t key_size;
uint32_t value_size;
uint8_t key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
union {
uint8_t value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
uint32_t value_u32;
uint64_t value_u64;
} msg_value;
} __attribute__((packed));
struct hv_kvp_msg_enumerate {
uint32_t index;
struct hv_kvp_exchg_msg_value data;
} __attribute__((packed));
struct hv_kvp_msg_get {
struct hv_kvp_exchg_msg_value data;
} __attribute__((packed));
struct hv_kvp_msg_set {
struct hv_kvp_exchg_msg_value data;
} __attribute__((packed));
struct hv_kvp_msg_delete {
uint32_t key_size;
uint8_t key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
} __attribute__((packed));
struct hv_kvp_register {
uint8_t version[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
} __attribute__((packed));
struct hv_kvp_msg {
union {
struct hv_kvp_hdr kvp_hdr;
int error;
} hdr;
union {
struct hv_kvp_msg_get kvp_get;
struct hv_kvp_msg_set kvp_set;
struct hv_kvp_msg_delete kvp_delete;
struct hv_kvp_msg_enumerate kvp_enum_data;
struct hv_kvp_ipaddr_value kvp_ip_val;
struct hv_kvp_register kvp_register;
} body;
} __attribute__((packed));
struct hv_kvp_ip_msg {
uint8_t operation;
uint8_t pool;
struct hv_kvp_ipaddr_value kvp_ip_val;
} __attribute__((packed));
#define BSD_SOC_PATH "/etc/hyperv/socket"
#define HV_SHUT_DOWN 0
#define HV_TIME_SYNCH 1
#define HV_HEART_BEAT 2
#define HV_KVP 3
#define HV_MAX_UTIL_SERVICES 4
#define HV_WLTIMEDELTA 116444736000000000L /* in 100ns unit */
#define HV_ICTIMESYNCFLAG_PROBE 0
#define HV_ICTIMESYNCFLAG_SYNC 1
#define HV_ICTIMESYNCFLAG_SAMPLE 2
#define HV_NANO_SEC_PER_SEC 1000000000
typedef struct hv_vmbus_service {
hv_guid guid; /* Hyper-V GUID */
char* name; /* name of service */
boolean_t enabled; /* service enabled */
hv_work_queue* work_queue; /* background work queue */
//
// function to initialize service
//
int (*init)(struct hv_vmbus_service *);
//
// function to process Hyper-V messages
//
void (*callback)(void *);
} hv_vmbus_service;
extern uint8_t* receive_buffer[];
extern hv_vmbus_service service_table[];
#endif /* _KVP_H */

View File

@ -0,0 +1,474 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* A common driver for all hyper-V util services.
*/
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/reboot.h>
#include <sys/timetc.h>
#include <sys/syscallsubr.h>
#include <dev/hyperv/include/hyperv.h>
#include "hv_kvp.h"
/* Time Sync data */
typedef struct {
uint64_t data;
} time_sync_data;
static void hv_shutdown_cb(void *context);
static void hv_heartbeat_cb(void *context);
static void hv_timesync_cb(void *context);
static int hv_timesync_init(hv_vmbus_service *serv);
/**
* Note: GUID codes below are predefined by the host hypervisor
* (Hyper-V and Azure)interface and required for correct operation.
*/
hv_vmbus_service service_table[] = {
/* Shutdown Service */
{ .guid.data = {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB},
.name = "Hyper-V Shutdown Service\n",
.enabled = TRUE,
.callback = hv_shutdown_cb,
},
/* Time Synch Service */
{ .guid.data = {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf},
.name = "Hyper-V Time Synch Service\n",
.enabled = TRUE,
.init = hv_timesync_init,
.callback = hv_timesync_cb,
},
/* Heartbeat Service */
{ .guid.data = {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d},
.name = "Hyper-V Heartbeat Service\n",
.enabled = TRUE,
.callback = hv_heartbeat_cb,
},
};
/*
* Receive buffer pointers. There is one buffer per utility service. The
* buffer is allocated during attach().
*/
uint8_t *receive_buffer[HV_MAX_UTIL_SERVICES];
struct hv_ictimesync_data {
uint64_t parenttime;
uint64_t childtime;
uint64_t roundtriptime;
uint8_t flags;
} __packed;
static int
hv_timesync_init(hv_vmbus_service *serv)
{
serv->work_queue = hv_work_queue_create("Time Sync");
if (serv->work_queue == NULL)
return (ENOMEM);
return (0);
}
static void
hv_negotiate_version(
struct hv_vmbus_icmsg_hdr* icmsghdrp,
struct hv_vmbus_icmsg_negotiate* negop,
uint8_t* buf)
{
icmsghdrp->icmsgsize = 0x10;
negop = (struct hv_vmbus_icmsg_negotiate *)&buf[
sizeof(struct hv_vmbus_pipe_hdr) +
sizeof(struct hv_vmbus_icmsg_hdr)];
if (negop->icframe_vercnt >= 2 &&
negop->icversion_data[1].major == 3) {
negop->icversion_data[0].major = 3;
negop->icversion_data[0].minor = 0;
negop->icversion_data[1].major = 3;
negop->icversion_data[1].minor = 0;
} else {
negop->icversion_data[0].major = 1;
negop->icversion_data[0].minor = 0;
negop->icversion_data[1].major = 1;
negop->icversion_data[1].minor = 0;
}
negop->icframe_vercnt = 1;
negop->icmsg_vercnt = 1;
}
/**
* Set host time based on time sync message from host
*/
static void
hv_set_host_time(void *context)
{
time_sync_data *time_msg = context;
uint64_t hosttime = time_msg->data;
struct timespec guest_ts, host_ts;
uint64_t host_tns;
int64_t diff;
int error;
host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC);
host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC);
nanotime(&guest_ts);
diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec;
/*
* If host differs by 5 seconds then make the guest catch up
*/
if (diff > 5 || diff < -5) {
error = kern_clock_settime(curthread, CLOCK_REALTIME,
&host_ts);
}
/*
* Free the hosttime that was allocated in hv_adj_guesttime()
*/
free(time_msg, M_DEVBUF);
}
/**
* @brief Synchronize time with host after reboot, restore, etc.
*
* ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
* After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
* message after the timesync channel is opened. Since the hv_utils module is
* loaded after hv_vmbus, the first message is usually missed. The other
* thing is, systime is automatically set to emulated hardware clock which may
* not be UTC time or in the same time zone. So, to override these effects, we
* use the first 50 time samples for initial system time setting.
*/
static inline
void hv_adj_guesttime(uint64_t hosttime, uint8_t flags)
{
time_sync_data* time_msg;
time_msg = malloc(sizeof(time_sync_data), M_DEVBUF, M_NOWAIT);
if (time_msg == NULL)
return;
time_msg->data = hosttime;
if ((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) {
hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
hv_set_host_time, time_msg);
} else if ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0) {
hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
hv_set_host_time, time_msg);
} else {
free(time_msg, M_DEVBUF);
}
}
/**
* Time Sync Channel message handler
*/
static void
hv_timesync_cb(void *context)
{
hv_vmbus_channel* channel = context;
hv_vmbus_icmsg_hdr* icmsghdrp;
uint32_t recvlen;
uint64_t requestId;
int ret;
uint8_t* time_buf;
struct hv_ictimesync_data* timedatap;
time_buf = receive_buffer[HV_TIME_SYNCH];
ret = hv_vmbus_channel_recv_packet(channel, time_buf,
PAGE_SIZE, &recvlen, &requestId);
if ((ret == 0) && recvlen > 0) {
icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
sizeof(struct hv_vmbus_pipe_hdr)];
if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
hv_negotiate_version(icmsghdrp, NULL, time_buf);
} else {
timedatap = (struct hv_ictimesync_data *) &time_buf[
sizeof(struct hv_vmbus_pipe_hdr) +
sizeof(struct hv_vmbus_icmsg_hdr)];
hv_adj_guesttime(timedatap->parenttime, timedatap->flags);
}
icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
| HV_ICMSGHDRFLAG_RESPONSE;
hv_vmbus_channel_send_packet(channel, time_buf,
recvlen, requestId,
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
}
}
/**
* Shutdown
*/
static void
hv_shutdown_cb(void *context)
{
uint8_t* buf;
hv_vmbus_channel* channel = context;
uint8_t execute_shutdown = 0;
hv_vmbus_icmsg_hdr* icmsghdrp;
uint32_t recv_len;
uint64_t request_id;
int ret;
hv_vmbus_shutdown_msg_data* shutdown_msg;
buf = receive_buffer[HV_SHUT_DOWN];
ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
&recv_len, &request_id);
if ((ret == 0) && recv_len > 0) {
icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
&buf[sizeof(struct hv_vmbus_pipe_hdr)];
if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
hv_negotiate_version(icmsghdrp, NULL, buf);
} else {
shutdown_msg =
(struct hv_vmbus_shutdown_msg_data *)
&buf[sizeof(struct hv_vmbus_pipe_hdr) +
sizeof(struct hv_vmbus_icmsg_hdr)];
switch (shutdown_msg->flags) {
case 0:
case 1:
icmsghdrp->status = HV_S_OK;
execute_shutdown = 1;
if(bootverbose)
printf("Shutdown request received -"
" graceful shutdown initiated\n");
break;
default:
icmsghdrp->status = HV_E_FAIL;
execute_shutdown = 0;
printf("Shutdown request received -"
" Invalid request\n");
break;
}
}
icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
HV_ICMSGHDRFLAG_RESPONSE;
hv_vmbus_channel_send_packet(channel, buf,
recv_len, request_id,
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
}
if (execute_shutdown)
shutdown_nice(RB_POWEROFF);
}
/**
* Process heartbeat message
*/
static void
hv_heartbeat_cb(void *context)
{
uint8_t* buf;
hv_vmbus_channel* channel = context;
uint32_t recvlen;
uint64_t requestid;
int ret;
struct hv_vmbus_heartbeat_msg_data* heartbeat_msg;
struct hv_vmbus_icmsg_hdr* icmsghdrp;
buf = receive_buffer[HV_HEART_BEAT];
ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
&requestid);
if ((ret == 0) && recvlen > 0) {
icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
&buf[sizeof(struct hv_vmbus_pipe_hdr)];
if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
hv_negotiate_version(icmsghdrp, NULL, buf);
} else {
heartbeat_msg =
(struct hv_vmbus_heartbeat_msg_data *)
&buf[sizeof(struct hv_vmbus_pipe_hdr) +
sizeof(struct hv_vmbus_icmsg_hdr)];
heartbeat_msg->seq_num += 1;
}
icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
HV_ICMSGHDRFLAG_RESPONSE;
hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
}
}
static int
hv_util_probe(device_t dev)
{
int i;
int rtn_value = ENXIO;
for (i = 0; i < HV_MAX_UTIL_SERVICES; i++) {
const char *p = vmbus_get_type(dev);
if (service_table[i].enabled && !memcmp(p, &service_table[i].guid, sizeof(hv_guid))) {
device_set_softc(dev, (void *) (&service_table[i]));
rtn_value = 0;
}
}
return rtn_value;
}
static int
hv_util_attach(device_t dev)
{
struct hv_device* hv_dev;
struct hv_vmbus_service* service;
int ret;
size_t receive_buffer_offset;
hv_dev = vmbus_get_devctx(dev);
service = device_get_softc(dev);
receive_buffer_offset = service - &service_table[0];
device_printf(dev, "Hyper-V Service attaching: %s\n", service->name);
receive_buffer[receive_buffer_offset] =
malloc(4 * PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
if (service->init != NULL) {
ret = service->init(service);
if (ret) {
ret = ENODEV;
goto error0;
}
}
ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE,
4 * PAGE_SIZE, NULL, 0,
service->callback, hv_dev->channel);
if (ret)
goto error0;
return (0);
error0:
free(receive_buffer[receive_buffer_offset], M_DEVBUF);
receive_buffer[receive_buffer_offset] = NULL;
return (ret);
}
static int
hv_util_detach(device_t dev)
{
struct hv_device* hv_dev;
struct hv_vmbus_service* service;
size_t receive_buffer_offset;
hv_dev = vmbus_get_devctx(dev);
hv_vmbus_channel_close(hv_dev->channel);
service = device_get_softc(dev);
receive_buffer_offset = service - &service_table[0];
if (service->work_queue != NULL)
hv_work_queue_close(service->work_queue);
free(receive_buffer[receive_buffer_offset], M_DEVBUF);
receive_buffer[receive_buffer_offset] = NULL;
return (0);
}
static void hv_util_init(void)
{
}
static int hv_util_modevent(module_t mod, int event, void *arg)
{
switch (event) {
case MOD_LOAD:
break;
case MOD_UNLOAD:
break;
default:
break;
}
return (0);
}
static device_method_t util_methods[] = {
/* Device interface */
DEVMETHOD(device_probe, hv_util_probe),
DEVMETHOD(device_attach, hv_util_attach),
DEVMETHOD(device_detach, hv_util_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
{ 0, 0 } }
;
static driver_t util_driver = { "hyperv-utils", util_methods, 0 };
static devclass_t util_devclass;
DRIVER_MODULE(hv_utils, vmbus, util_driver, util_devclass, hv_util_modevent, 0);
MODULE_VERSION(hv_utils, 1);
MODULE_DEPEND(hv_utils, vmbus, 1, 1, 1);
SYSINIT(hv_util_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1,
hv_util_init, NULL);

View File

@ -0,0 +1,842 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <machine/bus.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include "hv_vmbus_priv.h"
static int vmbus_channel_create_gpadl_header(
/* must be phys and virt contiguous*/
void* contig_buffer,
/* page-size multiple */
uint32_t size,
hv_vmbus_channel_msg_info** msg_info,
uint32_t* message_count);
static void vmbus_channel_set_event(hv_vmbus_channel* channel);
/**
* @brief Trigger an event notification on the specified channel
*/
static void
vmbus_channel_set_event(hv_vmbus_channel *channel)
{
hv_vmbus_monitor_page *monitor_page;
if (channel->offer_msg.monitor_allocated) {
/* Each uint32_t represents 32 channels */
synch_set_bit((channel->offer_msg.child_rel_id & 31),
((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
+ ((channel->offer_msg.child_rel_id >> 5))));
monitor_page = (hv_vmbus_monitor_page *)
hv_vmbus_g_connection.monitor_pages;
monitor_page++; /* Get the child to parent monitor page */
synch_set_bit(channel->monitor_bit,
(uint32_t *)&monitor_page->
trigger_group[channel->monitor_group].pending);
} else {
hv_vmbus_set_event(channel->offer_msg.child_rel_id);
}
}
/**
* @brief Open the specified channel
*/
int
hv_vmbus_channel_open(
hv_vmbus_channel* new_channel,
uint32_t send_ring_buffer_size,
uint32_t recv_ring_buffer_size,
void* user_data,
uint32_t user_data_len,
hv_vmbus_pfn_channel_callback pfn_on_channel_callback,
void* context)
{
int ret = 0;
void *in, *out;
hv_vmbus_channel_open_channel* open_msg;
hv_vmbus_channel_msg_info* open_info;
new_channel->on_channel_callback = pfn_on_channel_callback;
new_channel->channel_callback_context = context;
/* Allocate the ring buffer */
out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
KASSERT(out != NULL,
("Error VMBUS: contigmalloc failed to allocate Ring Buffer!"));
if (out == NULL)
return (ENOMEM);
in = ((uint8_t *) out + send_ring_buffer_size);
new_channel->ring_buffer_pages = out;
new_channel->ring_buffer_page_count = (send_ring_buffer_size
+ recv_ring_buffer_size) >> PAGE_SHIFT;
hv_vmbus_ring_buffer_init(
&new_channel->outbound,
out,
send_ring_buffer_size);
hv_vmbus_ring_buffer_init(
&new_channel->inbound,
in,
recv_ring_buffer_size);
/**
* Establish the gpadl for the ring buffer
*/
new_channel->ring_buffer_gpadl_handle = 0;
ret = hv_vmbus_channel_establish_gpadl(new_channel,
new_channel->outbound.ring_buffer,
send_ring_buffer_size + recv_ring_buffer_size,
&new_channel->ring_buffer_gpadl_handle);
/**
* Create and init the channel open message
*/
open_info = (hv_vmbus_channel_msg_info*) malloc(
sizeof(hv_vmbus_channel_msg_info) +
sizeof(hv_vmbus_channel_open_channel),
M_DEVBUF,
M_NOWAIT);
KASSERT(open_info != NULL,
("Error VMBUS: malloc failed to allocate Open Channel message!"));
if (open_info == NULL)
return (ENOMEM);
sema_init(&open_info->wait_sema, 0, "Open Info Sema");
open_msg = (hv_vmbus_channel_open_channel*) open_info->msg;
open_msg->header.message_type = HV_CHANNEL_MESSAGE_OPEN_CHANNEL;
open_msg->open_id = new_channel->offer_msg.child_rel_id;
open_msg->child_rel_id = new_channel->offer_msg.child_rel_id;
open_msg->ring_buffer_gpadl_handle =
new_channel->ring_buffer_gpadl_handle;
open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
>> PAGE_SHIFT;
open_msg->server_context_area_gpadl_handle = 0;
if (user_data_len)
memcpy(open_msg->user_data, user_data, user_data_len);
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_INSERT_TAIL(
&hv_vmbus_g_connection.channel_msg_anchor,
open_info,
msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
ret = hv_vmbus_post_message(
open_msg, sizeof(hv_vmbus_channel_open_channel));
if (ret != 0)
goto cleanup;
ret = sema_timedwait(&open_info->wait_sema, 500); /* KYS 5 seconds */
if (ret)
goto cleanup;
if (open_info->response.open_result.status == 0) {
if(bootverbose)
printf("VMBUS: channel <%p> open success.\n", new_channel);
} else {
if(bootverbose)
printf("Error VMBUS: channel <%p> open failed - %d!\n",
new_channel, open_info->response.open_result.status);
}
cleanup:
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(
&hv_vmbus_g_connection.channel_msg_anchor,
open_info,
msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
sema_destroy(&open_info->wait_sema);
free(open_info, M_DEVBUF);
return (ret);
}
/**
* @brief Create a gpadl for the specified buffer
*/
static int
vmbus_channel_create_gpadl_header(
void* contig_buffer,
uint32_t size, /* page-size multiple */
hv_vmbus_channel_msg_info** msg_info,
uint32_t* message_count)
{
int i;
int page_count;
unsigned long long pfn;
uint32_t msg_size;
hv_vmbus_channel_gpadl_header* gpa_header;
hv_vmbus_channel_gpadl_body* gpadl_body;
hv_vmbus_channel_msg_info* msg_header;
hv_vmbus_channel_msg_info* msg_body;
int pfnSum, pfnCount, pfnLeft, pfnCurr, pfnSize;
page_count = size >> PAGE_SHIFT;
pfn = hv_get_phys_addr(contig_buffer) >> PAGE_SHIFT;
/*do we need a gpadl body msg */
pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
- sizeof(hv_vmbus_channel_gpadl_header)
- sizeof(hv_gpa_range);
pfnCount = pfnSize / sizeof(uint64_t);
if (page_count > pfnCount) { /* if(we need a gpadl body) */
/* fill in the header */
msg_size = sizeof(hv_vmbus_channel_msg_info)
+ sizeof(hv_vmbus_channel_gpadl_header)
+ sizeof(hv_gpa_range)
+ pfnCount * sizeof(uint64_t);
msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
KASSERT(
msg_header != NULL,
("Error VMBUS: malloc failed to allocate Gpadl Message!"));
if (msg_header == NULL)
return (ENOMEM);
TAILQ_INIT(&msg_header->sub_msg_list_anchor);
msg_header->message_size = msg_size;
gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
gpa_header->range_count = 1;
gpa_header->range_buf_len = sizeof(hv_gpa_range)
+ page_count * sizeof(uint64_t);
gpa_header->range[0].byte_offset = 0;
gpa_header->range[0].byte_count = size;
for (i = 0; i < pfnCount; i++) {
gpa_header->range[0].pfn_array[i] = pfn + i;
}
*msg_info = msg_header;
*message_count = 1;
pfnSum = pfnCount;
pfnLeft = page_count - pfnCount;
/*
* figure out how many pfns we can fit
*/
pfnSize = HV_MAX_SIZE_CHANNEL_MESSAGE
- sizeof(hv_vmbus_channel_gpadl_body);
pfnCount = pfnSize / sizeof(uint64_t);
/*
* fill in the body
*/
while (pfnLeft) {
if (pfnLeft > pfnCount) {
pfnCurr = pfnCount;
} else {
pfnCurr = pfnLeft;
}
msg_size = sizeof(hv_vmbus_channel_msg_info) +
sizeof(hv_vmbus_channel_gpadl_body) +
pfnCurr * sizeof(uint64_t);
msg_body = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
KASSERT(
msg_body != NULL,
("Error VMBUS: malloc failed to allocate Gpadl msg_body!"));
if (msg_body == NULL)
return (ENOMEM);
msg_body->message_size = msg_size;
(*message_count)++;
gpadl_body =
(hv_vmbus_channel_gpadl_body*) msg_body->msg;
/*
* gpadl_body->gpadl = kbuffer;
*/
for (i = 0; i < pfnCurr; i++) {
gpadl_body->pfn[i] = pfn + pfnSum + i;
}
TAILQ_INSERT_TAIL(
&msg_header->sub_msg_list_anchor,
msg_body,
msg_list_entry);
pfnSum += pfnCurr;
pfnLeft -= pfnCurr;
}
} else { /* else everything fits in a header */
msg_size = sizeof(hv_vmbus_channel_msg_info) +
sizeof(hv_vmbus_channel_gpadl_header) +
sizeof(hv_gpa_range) +
page_count * sizeof(uint64_t);
msg_header = malloc(msg_size, M_DEVBUF, M_NOWAIT | M_ZERO);
KASSERT(
msg_header != NULL,
("Error VMBUS: malloc failed to allocate Gpadl Message!"));
if (msg_header == NULL)
return (ENOMEM);
msg_header->message_size = msg_size;
gpa_header = (hv_vmbus_channel_gpadl_header*) msg_header->msg;
gpa_header->range_count = 1;
gpa_header->range_buf_len = sizeof(hv_gpa_range) +
page_count * sizeof(uint64_t);
gpa_header->range[0].byte_offset = 0;
gpa_header->range[0].byte_count = size;
for (i = 0; i < page_count; i++) {
gpa_header->range[0].pfn_array[i] = pfn + i;
}
*msg_info = msg_header;
*message_count = 1;
}
return (0);
}
/**
* @brief Establish a GPADL for the specified buffer
*/
int
hv_vmbus_channel_establish_gpadl(
hv_vmbus_channel* channel,
void* contig_buffer,
uint32_t size, /* page-size multiple */
uint32_t* gpadl_handle)
{
int ret = 0;
hv_vmbus_channel_gpadl_header* gpadl_msg;
hv_vmbus_channel_gpadl_body* gpadl_body;
hv_vmbus_channel_msg_info* msg_info;
hv_vmbus_channel_msg_info* sub_msg_info;
uint32_t msg_count;
hv_vmbus_channel_msg_info* curr;
uint32_t next_gpadl_handle;
next_gpadl_handle = hv_vmbus_g_connection.next_gpadl_handle;
atomic_add_int((int*) &hv_vmbus_g_connection.next_gpadl_handle, 1);
ret = vmbus_channel_create_gpadl_header(
contig_buffer, size, &msg_info, &msg_count);
if(ret != 0) { /* if(allocation failed) return immediately */
/* reverse atomic_add_int above */
atomic_subtract_int((int*)
&hv_vmbus_g_connection.next_gpadl_handle, 1);
return ret;
}
sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
gpadl_msg = (hv_vmbus_channel_gpadl_header*) msg_info->msg;
gpadl_msg->header.message_type = HV_CHANNEL_MESSAGEL_GPADL_HEADER;
gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
gpadl_msg->gpadl = next_gpadl_handle;
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_INSERT_TAIL(
&hv_vmbus_g_connection.channel_msg_anchor,
msg_info,
msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
ret = hv_vmbus_post_message(
gpadl_msg,
msg_info->message_size -
(uint32_t) sizeof(hv_vmbus_channel_msg_info));
if (ret != 0)
goto cleanup;
if (msg_count > 1) {
TAILQ_FOREACH(curr,
&msg_info->sub_msg_list_anchor, msg_list_entry) {
sub_msg_info = curr;
gpadl_body =
(hv_vmbus_channel_gpadl_body*) sub_msg_info->msg;
gpadl_body->header.message_type =
HV_CHANNEL_MESSAGE_GPADL_BODY;
gpadl_body->gpadl = next_gpadl_handle;
ret = hv_vmbus_post_message(
gpadl_body,
sub_msg_info->message_size
- (uint32_t) sizeof(hv_vmbus_channel_msg_info));
/* if (the post message failed) give up and clean up */
if(ret != 0)
goto cleanup;
}
}
ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds*/
if (ret != 0)
goto cleanup;
*gpadl_handle = gpadl_msg->gpadl;
cleanup:
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
msg_info, msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
sema_destroy(&msg_info->wait_sema);
free(msg_info, M_DEVBUF);
return (ret);
}
/**
* @brief Teardown the specified GPADL handle
*/
int
hv_vmbus_channel_teardown_gpdal(
hv_vmbus_channel* channel,
uint32_t gpadl_handle)
{
int ret = 0;
hv_vmbus_channel_gpadl_teardown* msg;
hv_vmbus_channel_msg_info* info;
info = (hv_vmbus_channel_msg_info *)
malloc( sizeof(hv_vmbus_channel_msg_info) +
sizeof(hv_vmbus_channel_gpadl_teardown),
M_DEVBUF, M_NOWAIT);
KASSERT(info != NULL,
("Error VMBUS: malloc failed to allocate Gpadl Teardown Msg!"));
if (info == NULL) {
ret = ENOMEM;
goto cleanup;
}
sema_init(&info->wait_sema, 0, "Open Info Sema");
msg = (hv_vmbus_channel_gpadl_teardown*) info->msg;
msg->header.message_type = HV_CHANNEL_MESSAGE_GPADL_TEARDOWN;
msg->child_rel_id = channel->offer_msg.child_rel_id;
msg->gpadl = gpadl_handle;
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
info, msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
ret = hv_vmbus_post_message(msg,
sizeof(hv_vmbus_channel_gpadl_teardown));
if (ret != 0)
goto cleanup;
ret = sema_timedwait(&info->wait_sema, 500); /* KYS 5 seconds */
cleanup:
/*
* Received a torndown response
*/
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
info, msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
sema_destroy(&info->wait_sema);
free(info, M_DEVBUF);
return (ret);
}
/**
* @brief Close the specified channel
*/
void
hv_vmbus_channel_close(hv_vmbus_channel *channel)
{
int ret = 0;
hv_vmbus_channel_close_channel* msg;
hv_vmbus_channel_msg_info* info;
mtx_lock(&channel->inbound_lock);
channel->on_channel_callback = NULL;
mtx_unlock(&channel->inbound_lock);
/**
* Send a closing message
*/
info = (hv_vmbus_channel_msg_info *)
malloc( sizeof(hv_vmbus_channel_msg_info) +
sizeof(hv_vmbus_channel_close_channel),
M_DEVBUF, M_NOWAIT);
KASSERT(info != NULL, ("VMBUS: malloc failed hv_vmbus_channel_close!"));
if(info == NULL)
return;
msg = (hv_vmbus_channel_close_channel*) info->msg;
msg->header.message_type = HV_CHANNEL_MESSAGE_CLOSE_CHANNEL;
msg->child_rel_id = channel->offer_msg.child_rel_id;
ret = hv_vmbus_post_message(
msg, sizeof(hv_vmbus_channel_close_channel));
/* Tear down the gpadl for the channel's ring buffer */
if (channel->ring_buffer_gpadl_handle) {
hv_vmbus_channel_teardown_gpdal(channel,
channel->ring_buffer_gpadl_handle);
}
/* TODO: Send a msg to release the childRelId */
/* cleanup the ring buffers for this channel */
hv_ring_buffer_cleanup(&channel->outbound);
hv_ring_buffer_cleanup(&channel->inbound);
contigfree(
channel->ring_buffer_pages,
channel->ring_buffer_page_count,
M_DEVBUF);
free(info, M_DEVBUF);
/*
* If we are closing the channel during an error path in
* opening the channel, don't free the channel
* since the caller will free the channel
*/
if (channel->state == HV_CHANNEL_OPEN_STATE) {
mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
TAILQ_REMOVE(
&hv_vmbus_g_connection.channel_anchor,
channel,
list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
hv_vmbus_free_vmbus_channel(channel);
}
}
/**
* @brief Send the specified buffer on the given channel
*/
int
hv_vmbus_channel_send_packet(
hv_vmbus_channel* channel,
void* buffer,
uint32_t buffer_len,
uint64_t request_id,
hv_vmbus_packet_type type,
uint32_t flags)
{
int ret = 0;
hv_vm_packet_descriptor desc;
uint32_t packet_len;
uint64_t aligned_data;
uint32_t packet_len_aligned;
hv_vmbus_sg_buffer_list buffer_list[3];
packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
aligned_data = 0;
/* Setup the descriptor */
desc.type = type; /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND; */
desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
/* in 8-bytes granularity */
desc.data_offset8 = sizeof(hv_vm_packet_descriptor) >> 3;
desc.length8 = (uint16_t) (packet_len_aligned >> 3);
desc.transaction_id = request_id;
buffer_list[0].data = &desc;
buffer_list[0].length = sizeof(hv_vm_packet_descriptor);
buffer_list[1].data = buffer;
buffer_list[1].length = buffer_len;
buffer_list[2].data = &aligned_data;
buffer_list[2].length = packet_len_aligned - packet_len;
ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
/* TODO: We should determine if this is optional */
if (ret == 0
&& !hv_vmbus_get_ring_buffer_interrupt_mask(
&channel->outbound)) {
vmbus_channel_set_event(channel);
}
return (ret);
}
/**
* @brief Send a range of single-page buffer packets using
* a GPADL Direct packet type
*/
int
hv_vmbus_channel_send_packet_pagebuffer(
hv_vmbus_channel* channel,
hv_vmbus_page_buffer page_buffers[],
uint32_t page_count,
void* buffer,
uint32_t buffer_len,
uint64_t request_id)
{
int ret = 0;
int i = 0;
uint32_t packet_len;
uint32_t packetLen_aligned;
hv_vmbus_sg_buffer_list buffer_list[3];
hv_vmbus_channel_packet_page_buffer desc;
uint32_t descSize;
uint64_t alignedData = 0;
if (page_count > HV_MAX_PAGE_BUFFER_COUNT)
return (EINVAL);
/*
* Adjust the size down since hv_vmbus_channel_packet_page_buffer
* is the largest size we support
*/
descSize = sizeof(hv_vmbus_channel_packet_page_buffer) -
((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
sizeof(hv_vmbus_page_buffer));
packet_len = descSize + buffer_len;
packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
/* Setup the descriptor */
desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
desc.length8 = (uint16_t) (packetLen_aligned >> 3);
desc.transaction_id = request_id;
desc.range_count = page_count;
for (i = 0; i < page_count; i++) {
desc.range[i].length = page_buffers[i].length;
desc.range[i].offset = page_buffers[i].offset;
desc.range[i].pfn = page_buffers[i].pfn;
}
buffer_list[0].data = &desc;
buffer_list[0].length = descSize;
buffer_list[1].data = buffer;
buffer_list[1].length = buffer_len;
buffer_list[2].data = &alignedData;
buffer_list[2].length = packetLen_aligned - packet_len;
ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
/* TODO: We should determine if this is optional */
if (ret == 0 &&
!hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
vmbus_channel_set_event(channel);
}
return (ret);
}
/**
* @brief Send a multi-page buffer packet using a GPADL Direct packet type
*/
int
hv_vmbus_channel_send_packet_multipagebuffer(
hv_vmbus_channel* channel,
hv_vmbus_multipage_buffer* multi_page_buffer,
void* buffer,
uint32_t buffer_len,
uint64_t request_id)
{
int ret = 0;
uint32_t desc_size;
uint32_t packet_len;
uint32_t packet_len_aligned;
uint32_t pfn_count;
uint64_t aligned_data = 0;
hv_vmbus_sg_buffer_list buffer_list[3];
hv_vmbus_channel_packet_multipage_buffer desc;
pfn_count =
HV_NUM_PAGES_SPANNED(
multi_page_buffer->offset,
multi_page_buffer->length);
if ((pfn_count == 0) || (pfn_count > HV_MAX_MULTIPAGE_BUFFER_COUNT))
return (EINVAL);
/*
* Adjust the size down since hv_vmbus_channel_packet_multipage_buffer
* is the largest size we support
*/
desc_size =
sizeof(hv_vmbus_channel_packet_multipage_buffer) -
((HV_MAX_MULTIPAGE_BUFFER_COUNT - pfn_count) *
sizeof(uint64_t));
packet_len = desc_size + buffer_len;
packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
/*
* Setup the descriptor
*/
desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
desc.data_offset8 = desc_size >> 3; /* in 8-bytes granularity */
desc.length8 = (uint16_t) (packet_len_aligned >> 3);
desc.transaction_id = request_id;
desc.range_count = 1;
desc.range.length = multi_page_buffer->length;
desc.range.offset = multi_page_buffer->offset;
memcpy(desc.range.pfn_array, multi_page_buffer->pfn_array,
pfn_count * sizeof(uint64_t));
buffer_list[0].data = &desc;
buffer_list[0].length = desc_size;
buffer_list[1].data = buffer;
buffer_list[1].length = buffer_len;
buffer_list[2].data = &aligned_data;
buffer_list[2].length = packet_len_aligned - packet_len;
ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
/* TODO: We should determine if this is optional */
if (ret == 0 &&
!hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
vmbus_channel_set_event(channel);
}
return (ret);
}
/**
* @brief Retrieve the user packet on the specified channel
*/
int
hv_vmbus_channel_recv_packet(
hv_vmbus_channel* channel,
void* Buffer,
uint32_t buffer_len,
uint32_t* buffer_actual_len,
uint64_t* request_id)
{
int ret;
uint32_t user_len;
uint32_t packet_len;
hv_vm_packet_descriptor desc;
*buffer_actual_len = 0;
*request_id = 0;
ret = hv_ring_buffer_peek(&channel->inbound, &desc,
sizeof(hv_vm_packet_descriptor));
if (ret != 0)
return (0);
packet_len = desc.length8 << 3;
user_len = packet_len - (desc.data_offset8 << 3);
*buffer_actual_len = user_len;
if (user_len > buffer_len)
return (EINVAL);
*request_id = desc.transaction_id;
/* Copy over the packet to the user buffer */
ret = hv_ring_buffer_read(&channel->inbound, Buffer, user_len,
(desc.data_offset8 << 3));
return (0);
}
/**
* @brief Retrieve the raw packet on the specified channel
*/
int
hv_vmbus_channel_recv_packet_raw(
hv_vmbus_channel* channel,
void* buffer,
uint32_t buffer_len,
uint32_t* buffer_actual_len,
uint64_t* request_id)
{
int ret;
uint32_t packetLen;
uint32_t userLen;
hv_vm_packet_descriptor desc;
*buffer_actual_len = 0;
*request_id = 0;
ret = hv_ring_buffer_peek(
&channel->inbound, &desc,
sizeof(hv_vm_packet_descriptor));
if (ret != 0)
return (0);
packetLen = desc.length8 << 3;
userLen = packetLen - (desc.data_offset8 << 3);
*buffer_actual_len = packetLen;
if (packetLen > buffer_len)
return (ENOBUFS);
*request_id = desc.transaction_id;
/* Copy over the entire packet to the user buffer */
ret = hv_ring_buffer_read(&channel->inbound, buffer, packetLen, 0);
return (0);
}

View File

@ -0,0 +1,680 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/mbuf.h>
#include "hv_vmbus_priv.h"
typedef void (*hv_pfn_channel_msg_handler)(hv_vmbus_channel_msg_header* msg);
typedef struct hv_vmbus_channel_msg_table_entry {
hv_vmbus_channel_msg_type messageType;
hv_pfn_channel_msg_handler messageHandler;
} hv_vmbus_channel_msg_table_entry;
/*
* Internal functions
*/
static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_process_offer(void *context);
/**
* Channel message dispatch table
*/
hv_vmbus_channel_msg_table_entry
g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
{ HV_CHANNEL_MESSAGE_INVALID, NULL },
{ HV_CHANNEL_MESSAGE_OFFER_CHANNEL, vmbus_channel_on_offer },
{ HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
vmbus_channel_on_offer_rescind },
{ HV_CHANNEL_MESSAGE_REQUEST_OFFERS, NULL },
{ HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
vmbus_channel_on_offers_delivered },
{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL, NULL },
{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
vmbus_channel_on_open_result },
{ HV_CHANNEL_MESSAGE_CLOSE_CHANNEL, NULL },
{ HV_CHANNEL_MESSAGEL_GPADL_HEADER, NULL },
{ HV_CHANNEL_MESSAGE_GPADL_BODY, NULL },
{ HV_CHANNEL_MESSAGE_GPADL_CREATED,
vmbus_channel_on_gpadl_created },
{ HV_CHANNEL_MESSAGE_GPADL_TEARDOWN, NULL },
{ HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
vmbus_channel_on_gpadl_torndown },
{ HV_CHANNEL_MESSAGE_REL_ID_RELEASED, NULL },
{ HV_CHANNEL_MESSAGE_INITIATED_CONTACT, NULL },
{ HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
vmbus_channel_on_version_response },
{ HV_CHANNEL_MESSAGE_UNLOAD, NULL }
};
/**
* Implementation of the work abstraction.
*/
static void
work_item_callback(void *work, int pending)
{
struct hv_work_item *w = (struct hv_work_item *)work;
/*
* Serialize work execution.
*/
if (w->wq->work_sema != NULL) {
sema_wait(w->wq->work_sema);
}
w->callback(w->context);
if (w->wq->work_sema != NULL) {
sema_post(w->wq->work_sema);
}
free(w, M_DEVBUF);
}
struct hv_work_queue*
hv_work_queue_create(char* name)
{
static unsigned int qid = 0;
char qname[64];
int pri;
struct hv_work_queue* wq;
wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n"));
if (wq == NULL)
return (NULL);
/*
* We use work abstraction to handle messages
* coming from the host and these are typically offers.
* Some FreeBsd drivers appear to have a concurrency issue
* where probe/attach needs to be serialized. We ensure that
* by having only one thread process work elements in a
* specific queue by serializing work execution.
*
*/
if (strcmp(name, "vmbusQ") == 0) {
pri = PI_DISK;
} else { /* control */
pri = PI_NET;
/*
* Initialize semaphore for this queue by pointing
* to the globale semaphore used for synchronizing all
* control messages.
*/
wq->work_sema = &hv_vmbus_g_connection.control_sema;
}
sprintf(qname, "hv_%s_%u", name, qid);
/*
* Fixme: FreeBSD 8.2 has a different prototype for
* taskqueue_create(), and for certain other taskqueue functions.
* We need to research the implications of these changes.
* Fixme: Not sure when the changes were introduced.
*/
wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue,
&wq->queue
#if __FreeBSD_version < 800000
, &wq->proc
#endif
);
if (wq->queue == NULL) {
free(wq, M_DEVBUF);
return (NULL);
}
if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) {
taskqueue_free(wq->queue);
free(wq, M_DEVBUF);
return (NULL);
}
qid++;
return (wq);
}
void
hv_work_queue_close(struct hv_work_queue *wq)
{
/*
* KYS: Need to drain the taskqueue
* before we close the hv_work_queue.
*/
/*KYS: taskqueue_drain(wq->tq, ); */
taskqueue_free(wq->queue);
free(wq, M_DEVBUF);
}
/**
* @brief Create work item
*/
int
hv_queue_work_item(
struct hv_work_queue *wq,
void (*callback)(void *), void *context)
{
struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
M_DEVBUF, M_NOWAIT | M_ZERO);
KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
if (w == NULL)
return (ENOMEM);
w->callback = callback;
w->context = context;
w->wq = wq;
TASK_INIT(&w->work, 0, work_item_callback, w);
return (taskqueue_enqueue(wq->queue, &w->work));
}
/**
* @brief Rescind the offer by initiating a device removal
*/
static void
vmbus_channel_process_rescind_offer(void *context)
{
hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
hv_vmbus_child_device_unregister(channel->device);
}
/**
* @brief Allocate and initialize a vmbus channel object
*/
hv_vmbus_channel*
hv_vmbus_allocate_channel(void)
{
hv_vmbus_channel* channel;
channel = (hv_vmbus_channel*) malloc(
sizeof(hv_vmbus_channel),
M_DEVBUF,
M_NOWAIT | M_ZERO);
KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!"));
if (channel == NULL)
return (NULL);
mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
channel->control_work_queue = hv_work_queue_create("control");
if (channel->control_work_queue == NULL) {
mtx_destroy(&channel->inbound_lock);
free(channel, M_DEVBUF);
return (NULL);
}
return (channel);
}
/**
* @brief Release the vmbus channel object itself
*/
static inline void
ReleaseVmbusChannel(void *context)
{
hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
hv_work_queue_close(channel->control_work_queue);
free(channel, M_DEVBUF);
}
/**
* @brief Release the resources used by the vmbus channel object
*/
void
hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
{
mtx_destroy(&channel->inbound_lock);
/*
* We have to release the channel's workqueue/thread in
* the vmbus's workqueue/thread context
* ie we can't destroy ourselves
*/
hv_queue_work_item(hv_vmbus_g_connection.work_queue,
ReleaseVmbusChannel, (void *) channel);
}
/**
* @brief Process the offer by creating a channel/device
* associated with this offer
*/
static void
vmbus_channel_process_offer(void *context)
{
int ret;
hv_vmbus_channel* new_channel;
boolean_t f_new;
hv_vmbus_channel* channel;
new_channel = (hv_vmbus_channel*) context;
f_new = TRUE;
channel = NULL;
/*
* Make sure this is a new offer
*/
mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
list_entry)
{
if (!memcmp(
&channel->offer_msg.offer.interface_type,
&new_channel->offer_msg.offer.interface_type,
sizeof(hv_guid))
&& !memcmp(
&channel->offer_msg.offer.interface_instance,
&new_channel->offer_msg.offer.interface_instance,
sizeof(hv_guid))) {
f_new = FALSE;
break;
}
}
if (f_new) {
/* Insert at tail */
TAILQ_INSERT_TAIL(
&hv_vmbus_g_connection.channel_anchor,
new_channel,
list_entry);
}
mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
if (!f_new) {
hv_vmbus_free_vmbus_channel(new_channel);
return;
}
/*
* Start the process of binding this offer to the driver
* (We need to set the device field before calling
* hv_vmbus_child_device_add())
*/
new_channel->device = hv_vmbus_child_device_create(
new_channel->offer_msg.offer.interface_type,
new_channel->offer_msg.offer.interface_instance, new_channel);
/*
* TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
* but in the "open" channel request. The ret != 0 logic below
* doesn't take into account that a channel
* may have been opened successfully
*/
/*
* Add the new device to the bus. This will kick off device-driver
* binding which eventually invokes the device driver's AddDevice()
* method.
*/
ret = hv_vmbus_child_device_register(new_channel->device);
if (ret != 0) {
mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
TAILQ_REMOVE(
&hv_vmbus_g_connection.channel_anchor,
new_channel,
list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
hv_vmbus_free_vmbus_channel(new_channel);
} else {
/*
* This state is used to indicate a successful open
* so that when we do close the channel normally,
* we can clean up properly
*/
new_channel->state = HV_CHANNEL_OPEN_STATE;
}
}
/**
* @brief Handler for channel offers from Hyper-V/Azure
*
* Handler for channel offers from vmbus in parent partition. We ignore
* all offers except network and storage offers. For each network and storage
* offers, we create a channel object and queue a work item to the channel
* object to process the offer synchronously
*/
static void
vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
{
hv_vmbus_channel_offer_channel* offer;
hv_vmbus_channel* new_channel;
offer = (hv_vmbus_channel_offer_channel*) hdr;
hv_guid *guidType;
hv_guid *guidInstance;
guidType = &offer->offer.interface_type;
guidInstance = &offer->offer.interface_instance;
/* Allocate the channel object and save this offer */
new_channel = hv_vmbus_allocate_channel();
if (new_channel == NULL)
return;
memcpy(&new_channel->offer_msg, offer,
sizeof(hv_vmbus_channel_offer_channel));
new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
/* TODO: Make sure the offer comes from our parent partition */
hv_queue_work_item(
new_channel->control_work_queue,
vmbus_channel_process_offer,
new_channel);
}
/**
* @brief Rescind offer handler.
*
* We queue a work item to process this offer
* synchronously
*/
static void
vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
{
hv_vmbus_channel_rescind_offer* rescind;
hv_vmbus_channel* channel;
rescind = (hv_vmbus_channel_rescind_offer*) hdr;
channel = hv_vmbus_get_channel_from_rel_id(rescind->child_rel_id);
if (channel == NULL)
return;
hv_queue_work_item(channel->control_work_queue,
vmbus_channel_process_rescind_offer, channel);
}
/**
*
* @brief Invoked when all offers have been delivered.
*/
static void
vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
{
}
/**
* @brief Open result handler.
*
* This is invoked when we received a response
* to our channel open request. Find the matching request, copy the
* response and signal the requesting thread.
*/
static void
vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
{
hv_vmbus_channel_open_result* result;
hv_vmbus_channel_msg_info* msg_info;
hv_vmbus_channel_msg_header* requestHeader;
hv_vmbus_channel_open_channel* openMsg;
result = (hv_vmbus_channel_open_result*) hdr;
/*
* Find the open msg, copy the result and signal/unblock the wait event
*/
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
msg_list_entry) {
requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
if (requestHeader->message_type ==
HV_CHANNEL_MESSAGE_OPEN_CHANNEL) {
openMsg = (hv_vmbus_channel_open_channel*) msg_info->msg;
if (openMsg->child_rel_id == result->child_rel_id
&& openMsg->open_id == result->open_id) {
memcpy(&msg_info->response.open_result, result,
sizeof(hv_vmbus_channel_open_result));
sema_post(&msg_info->wait_sema);
break;
}
}
}
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
}
/**
* @brief GPADL created handler.
*
* This is invoked when we received a response
* to our gpadl create request. Find the matching request, copy the
* response and signal the requesting thread.
*/
static void
vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
{
hv_vmbus_channel_gpadl_created* gpadl_created;
hv_vmbus_channel_msg_info* msg_info;
hv_vmbus_channel_msg_header* request_header;
hv_vmbus_channel_gpadl_header* gpadl_header;
gpadl_created = (hv_vmbus_channel_gpadl_created*) hdr;
/* Find the establish msg, copy the result and signal/unblock
* the wait event
*/
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
msg_list_entry) {
request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
if (request_header->message_type ==
HV_CHANNEL_MESSAGEL_GPADL_HEADER) {
gpadl_header =
(hv_vmbus_channel_gpadl_header*) request_header;
if ((gpadl_created->child_rel_id == gpadl_header->child_rel_id)
&& (gpadl_created->gpadl == gpadl_header->gpadl)) {
memcpy(&msg_info->response.gpadl_created,
gpadl_created,
sizeof(hv_vmbus_channel_gpadl_created));
sema_post(&msg_info->wait_sema);
break;
}
}
}
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
}
/**
* @brief GPADL torndown handler.
*
* This is invoked when we received a respons
* to our gpadl teardown request. Find the matching request, copy the
* response and signal the requesting thread
*/
static void
vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
{
hv_vmbus_channel_gpadl_torndown* gpadl_torndown;
hv_vmbus_channel_msg_info* msg_info;
hv_vmbus_channel_msg_header* requestHeader;
hv_vmbus_channel_gpadl_teardown* gpadlTeardown;
gpadl_torndown = (hv_vmbus_channel_gpadl_torndown*)hdr;
/*
* Find the open msg, copy the result and signal/unblock the
* wait event.
*/
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
msg_list_entry) {
requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
if (requestHeader->message_type
== HV_CHANNEL_MESSAGE_GPADL_TEARDOWN) {
gpadlTeardown =
(hv_vmbus_channel_gpadl_teardown*) requestHeader;
if (gpadl_torndown->gpadl == gpadlTeardown->gpadl) {
memcpy(&msg_info->response.gpadl_torndown,
gpadl_torndown,
sizeof(hv_vmbus_channel_gpadl_torndown));
sema_post(&msg_info->wait_sema);
break;
}
}
}
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
}
/**
* @brief Version response handler.
*
* This is invoked when we received a response
* to our initiate contact request. Find the matching request, copy th
* response and signal the requesting thread.
*/
static void
vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
{
hv_vmbus_channel_msg_info* msg_info;
hv_vmbus_channel_msg_header* requestHeader;
hv_vmbus_channel_initiate_contact* initiate;
hv_vmbus_channel_version_response* versionResponse;
versionResponse = (hv_vmbus_channel_version_response*)hdr;
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
msg_list_entry) {
requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
if (requestHeader->message_type
== HV_CHANNEL_MESSAGE_INITIATED_CONTACT) {
initiate =
(hv_vmbus_channel_initiate_contact*) requestHeader;
memcpy(&msg_info->response.version_response,
versionResponse,
sizeof(hv_vmbus_channel_version_response));
sema_post(&msg_info->wait_sema);
}
}
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
}
/**
* @brief Handler for channel protocol messages.
*
* This is invoked in the vmbus worker thread context.
*/
void
hv_vmbus_on_channel_message(void *context)
{
hv_vmbus_message* msg;
hv_vmbus_channel_msg_header* hdr;
int size;
msg = (hv_vmbus_message*) context;
hdr = (hv_vmbus_channel_msg_header*) msg->u.payload;
size = msg->header.payload_size;
if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) {
free(msg, M_DEVBUF);
return;
}
if (g_channel_message_table[hdr->message_type].messageHandler) {
g_channel_message_table[hdr->message_type].messageHandler(hdr);
}
/* Free the msg that was allocated in VmbusOnMsgDPC() */
free(msg, M_DEVBUF);
}
/**
* @brief Send a request to get all our pending offers.
*/
int
hv_vmbus_request_channel_offers(void)
{
int ret;
hv_vmbus_channel_msg_header* msg;
hv_vmbus_channel_msg_info* msg_info;
msg_info = (hv_vmbus_channel_msg_info *)
malloc(sizeof(hv_vmbus_channel_msg_info)
+ sizeof(hv_vmbus_channel_msg_header), M_DEVBUF, M_NOWAIT);
if (msg_info == NULL) {
if(bootverbose)
printf("Error VMBUS: malloc failed for Request Offers\n");
return (ENOMEM);
}
msg = (hv_vmbus_channel_msg_header*) msg_info->msg;
msg->message_type = HV_CHANNEL_MESSAGE_REQUEST_OFFERS;
ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
if (msg_info)
free(msg_info, M_DEVBUF);
return (ret);
}
/**
* @brief Release channels that are unattached/unconnected (i.e., no drivers associated)
*/
void
hv_vmbus_release_unattached_channels(void)
{
hv_vmbus_channel *channel;
mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
channel, list_entry);
hv_vmbus_child_device_unregister(channel->device);
hv_vmbus_free_vmbus_channel(channel);
}
mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
}

View File

@ -0,0 +1,431 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <machine/bus.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include "hv_vmbus_priv.h"
/*
* Globals
*/
hv_vmbus_connection hv_vmbus_g_connection =
{ .connect_state = HV_DISCONNECTED,
.next_gpadl_handle = 0xE1E10, };
/**
* Send a connect request on the partition service connection
*/
int
hv_vmbus_connect(void) {
int ret = 0;
hv_vmbus_channel_msg_info* msg_info = NULL;
hv_vmbus_channel_initiate_contact* msg;
/**
* Make sure we are not connecting or connected
*/
if (hv_vmbus_g_connection.connect_state != HV_DISCONNECTED) {
return (-1);
}
/**
* Initialize the vmbus connection
*/
hv_vmbus_g_connection.connect_state = HV_CONNECTING;
hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ");
sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema");
TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
NULL, MTX_SPIN);
TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
NULL, MTX_SPIN);
/**
* Setup the vmbus event connection for channel interrupt abstraction
* stuff
*/
hv_vmbus_g_connection.interrupt_page = contigmalloc(
PAGE_SIZE, M_DEVBUF,
M_NOWAIT | M_ZERO, 0UL,
BUS_SPACE_MAXADDR,
PAGE_SIZE, 0);
KASSERT(hv_vmbus_g_connection.interrupt_page != NULL,
("Error VMBUS: malloc failed to allocate Channel"
" Request Event message!"));
if (hv_vmbus_g_connection.interrupt_page == NULL) {
ret = ENOMEM;
goto cleanup;
}
hv_vmbus_g_connection.recv_interrupt_page =
hv_vmbus_g_connection.interrupt_page;
hv_vmbus_g_connection.send_interrupt_page =
((uint8_t *) hv_vmbus_g_connection.interrupt_page +
(PAGE_SIZE >> 1));
/**
* Set up the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
hv_vmbus_g_connection.monitor_pages = contigmalloc(
2 * PAGE_SIZE,
M_DEVBUF,
M_NOWAIT | M_ZERO,
0UL,
BUS_SPACE_MAXADDR,
PAGE_SIZE,
0);
KASSERT(hv_vmbus_g_connection.monitor_pages != NULL,
("Error VMBUS: malloc failed to allocate Monitor Pages!"));
if (hv_vmbus_g_connection.monitor_pages == NULL) {
ret = ENOMEM;
goto cleanup;
}
msg_info = (hv_vmbus_channel_msg_info*)
malloc(sizeof(hv_vmbus_channel_msg_info) +
sizeof(hv_vmbus_channel_initiate_contact),
M_DEVBUF, M_NOWAIT | M_ZERO);
KASSERT(msg_info != NULL,
("Error VMBUS: malloc failed for Initiate Contact message!"));
if (msg_info == NULL) {
ret = ENOMEM;
goto cleanup;
}
sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
msg->vmbus_version_requested = HV_VMBUS_REVISION_NUMBER;
msg->interrupt_page = hv_get_phys_addr(
hv_vmbus_g_connection.interrupt_page);
msg->monitor_page_1 = hv_get_phys_addr(
hv_vmbus_g_connection.monitor_pages);
msg->monitor_page_2 =
hv_get_phys_addr(
((uint8_t *) hv_vmbus_g_connection.monitor_pages
+ PAGE_SIZE));
/**
* Add to list before we send the request since we may receive the
* response before returning from this routine
*/
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_INSERT_TAIL(
&hv_vmbus_g_connection.channel_msg_anchor,
msg_info,
msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
ret = hv_vmbus_post_message(
msg,
sizeof(hv_vmbus_channel_initiate_contact));
if (ret != 0) {
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(
&hv_vmbus_g_connection.channel_msg_anchor,
msg_info,
msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
goto cleanup;
}
/**
* Wait for the connection response
*/
ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(
&hv_vmbus_g_connection.channel_msg_anchor,
msg_info,
msg_list_entry);
mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
/**
* Check if successful
*/
if (msg_info->response.version_response.version_supported) {
hv_vmbus_g_connection.connect_state = HV_CONNECTED;
} else {
ret = ECONNREFUSED;
goto cleanup;
}
sema_destroy(&msg_info->wait_sema);
free(msg_info, M_DEVBUF);
return (0);
/*
* Cleanup after failure!
*/
cleanup:
hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
hv_work_queue_close(hv_vmbus_g_connection.work_queue);
sema_destroy(&hv_vmbus_g_connection.control_sema);
mtx_destroy(&hv_vmbus_g_connection.channel_lock);
mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
if (hv_vmbus_g_connection.interrupt_page != NULL) {
contigfree(
hv_vmbus_g_connection.interrupt_page,
PAGE_SIZE,
M_DEVBUF);
hv_vmbus_g_connection.interrupt_page = NULL;
}
if (hv_vmbus_g_connection.monitor_pages != NULL) {
contigfree(
hv_vmbus_g_connection.monitor_pages,
2 * PAGE_SIZE,
M_DEVBUF);
hv_vmbus_g_connection.monitor_pages = NULL;
}
if (msg_info) {
sema_destroy(&msg_info->wait_sema);
free(msg_info, M_DEVBUF);
}
return (ret);
}
/**
* Send a disconnect request on the partition service connection
*/
int
hv_vmbus_disconnect(void) {
int ret = 0;
hv_vmbus_channel_unload* msg;
msg = malloc(sizeof(hv_vmbus_channel_unload),
M_DEVBUF, M_NOWAIT | M_ZERO);
KASSERT(msg != NULL,
("Error VMBUS: malloc failed to allocate Channel Unload Msg!"));
if (msg == NULL)
return (ENOMEM);
msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD;
ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload));
contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF);
mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
hv_work_queue_close(hv_vmbus_g_connection.work_queue);
sema_destroy(&hv_vmbus_g_connection.control_sema);
hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
free(msg, M_DEVBUF);
return (ret);
}
/**
* Get the channel object given its child relative id (ie channel id)
*/
hv_vmbus_channel*
hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
hv_vmbus_channel* channel;
hv_vmbus_channel* foundChannel = NULL;
/*
* TODO:
* Consider optimization where relids are stored in a fixed size array
* and channels are accessed without the need to take this lock or search
* the list.
*/
mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
TAILQ_FOREACH(channel,
&hv_vmbus_g_connection.channel_anchor, list_entry) {
if (channel->offer_msg.child_rel_id == rel_id) {
foundChannel = channel;
break;
}
}
mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
return (foundChannel);
}
/**
* Process a channel event notification
*/
static void
VmbusProcessChannelEvent(uint32_t relid)
{
hv_vmbus_channel* channel;
/**
* Find the channel based on this relid and invokes
* the channel callback to process the event
*/
channel = hv_vmbus_get_channel_from_rel_id(relid);
if (channel == NULL) {
return;
}
/**
* To deal with the race condition where we might
* receive a packet while the relevant driver is
* being unloaded, dispatch the callback while
* holding the channel lock. The unloading driver
* will acquire the same channel lock to set the
* callback to NULL. This closes the window.
*/
mtx_lock(&channel->inbound_lock);
if (channel->on_channel_callback != NULL) {
channel->on_channel_callback(channel->channel_callback_context);
}
mtx_unlock(&channel->inbound_lock);
}
/**
* Handler for events
*/
void
hv_vmbus_on_events(void *arg)
{
int dword;
int bit;
int rel_id;
int maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
/* int maxdword = PAGE_SIZE >> 3; */
/*
* receive size is 1/2 page and divide that by 4 bytes
*/
uint32_t* recv_interrupt_page =
hv_vmbus_g_connection.recv_interrupt_page;
/*
* Check events
*/
if (recv_interrupt_page != NULL) {
for (dword = 0; dword < maxdword; dword++) {
if (recv_interrupt_page[dword]) {
for (bit = 0; bit < 32; bit++) {
if (synch_test_and_clear_bit(bit,
(uint32_t *) &recv_interrupt_page[dword])) {
rel_id = (dword << 5) + bit;
if (rel_id == 0) {
/*
* Special case -
* vmbus channel protocol msg.
*/
continue;
} else {
VmbusProcessChannelEvent(rel_id);
}
}
}
}
}
}
return;
}
/**
* Send a msg on the vmbus's message connection
*/
int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
int ret = 0;
hv_vmbus_connection_id connId;
unsigned retries = 0;
/* NetScaler delays from previous code were consolidated here */
static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000};
/* for(each entry in delayAmount) try to post message,
* delay a little bit before retrying
*/
for (retries = 0;
retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) {
connId.as_uint32_t = 0;
connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen);
if (ret != HV_STATUS_INSUFFICIENT_BUFFERS)
break;
/* TODO: KYS We should use a blocking wait call */
DELAY(delayAmount[retries]);
}
KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n"));
return (ret);
}
/**
* Send an event notification to the parent
*/
int
hv_vmbus_set_event(uint32_t child_rel_id) {
int ret = 0;
/* Each uint32_t represents 32 channels */
synch_set_bit(child_rel_id & 31,
(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
+ (child_rel_id >> 5))));
ret = hv_vmbus_signal_event();
return (ret);
}

View File

@ -0,0 +1,494 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Implements low-level interactions with Hypver-V/Azure
*/
#include <sys/param.h>
#include <sys/malloc.h>
#include <sys/pcpu.h>
#include <sys/timetc.h>
#include <machine/bus.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include "hv_vmbus_priv.h"
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
#define HV_X64_CPUID_MIN 0x40000005
#define HV_X64_CPUID_MAX 0x4000ffff
#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
#define HV_NANOSECONDS_PER_SEC 1000000000L
static u_int hv_get_timecount(struct timecounter *tc);
static inline void do_cpuid_inline(unsigned int op, unsigned int *eax,
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
__asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx),
"=d" (*edx) : "0" (op), "c" (ecx));
}
/**
* Globals
*/
hv_vmbus_context hv_vmbus_g_context = {
.syn_ic_initialized = FALSE,
.hypercall_page = NULL,
.signal_event_param = NULL,
.signal_event_buffer = NULL,
};
static struct timecounter hv_timecounter = {
hv_get_timecount, 0, ~0u, HV_NANOSECONDS_PER_SEC/100, "Hyper-V", HV_NANOSECONDS_PER_SEC/100
};
static u_int
hv_get_timecount(struct timecounter *tc)
{
u_int now = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
return (now);
}
/**
* @brief Query the cpuid for presence of windows hypervisor
*/
int
hv_vmbus_query_hypervisor_presence(void)
{
u_int regs[4];
int hyper_v_detected = 0;
do_cpuid(1, regs);
if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */
/* make sure this really is Hyper-V */
/* we look at the CPUID info */
do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs);
hyper_v_detected =
regs[0] >= HV_X64_CPUID_MIN &&
regs[0] <= HV_X64_CPUID_MAX &&
!memcmp("Microsoft Hv", &regs[1], 12);
}
return (hyper_v_detected);
}
/**
* @brief Get version of the windows hypervisor
*/
static int
hv_vmbus_get_hypervisor_version(void)
{
unsigned int eax;
unsigned int ebx;
unsigned int ecx;
unsigned int edx;
unsigned int maxLeaf;
unsigned int op;
/*
* Its assumed that this is called after confirming that
* Viridian is present
* Query id and revision.
*/
eax = 0;
ebx = 0;
ecx = 0;
edx = 0;
op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
maxLeaf = eax;
eax = 0;
ebx = 0;
ecx = 0;
edx = 0;
op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) {
eax = 0;
ebx = 0;
ecx = 0;
edx = 0;
op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
do_cpuid_inline(op, &eax, &ebx, &ecx, &edx);
}
return (maxLeaf);
}
/**
* @brief Invoke the specified hypercall
*/
static uint64_t
hv_vmbus_do_hypercall(uint64_t control, void* input, void* output)
{
#ifdef __x86_64__
uint64_t hv_status = 0;
uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
__asm__ __volatile__ ("mov %0, %%r8" : : "r" (output_address): "r8");
__asm__ __volatile__ ("call *%3" : "=a"(hv_status):
"c" (control), "d" (input_address),
"m" (hypercall_page));
return (hv_status);
#else
uint32_t control_high = control >> 32;
uint32_t control_low = control & 0xFFFFFFFF;
uint32_t hv_status_high = 1;
uint32_t hv_status_low = 1;
uint64_t input_address = (input) ? hv_get_phys_addr(input) : 0;
uint32_t input_address_high = input_address >> 32;
uint32_t input_address_low = input_address & 0xFFFFFFFF;
uint64_t output_address = (output) ? hv_get_phys_addr(output) : 0;
uint32_t output_address_high = output_address >> 32;
uint32_t output_address_low = output_address & 0xFFFFFFFF;
volatile void* hypercall_page = hv_vmbus_g_context.hypercall_page;
__asm__ __volatile__ ("call *%8" : "=d"(hv_status_high),
"=a"(hv_status_low) : "d" (control_high),
"a" (control_low), "b" (input_address_high),
"c" (input_address_low),
"D"(output_address_high),
"S"(output_address_low), "m" (hypercall_page));
return (hv_status_low | ((uint64_t)hv_status_high << 32));
#endif /* __x86_64__ */
}
/**
* @brief Main initialization routine.
*
* This routine must be called
* before any other routines in here are called
*/
int
hv_vmbus_init(void)
{
int max_leaf;
hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
void* virt_addr = 0;
memset(
hv_vmbus_g_context.syn_ic_event_page,
0,
sizeof(hv_vmbus_handle) * MAXCPU);
memset(
hv_vmbus_g_context.syn_ic_msg_page,
0,
sizeof(hv_vmbus_handle) * MAXCPU);
if (!hv_vmbus_query_hypervisor_presence())
goto cleanup;
max_leaf = hv_vmbus_get_hypervisor_version();
/*
* Write our OS info
*/
uint64_t os_guest_info = HV_FREEBSD_GUEST_ID;
wrmsr(HV_X64_MSR_GUEST_OS_ID, os_guest_info);
hv_vmbus_g_context.guest_id = os_guest_info;
/*
* See if the hypercall page is already set
*/
hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
KASSERT(virt_addr != NULL,
("Error VMBUS: malloc failed to allocate page during init!"));
if (virt_addr == NULL)
goto cleanup;
hypercall_msr.enable = 1;
hypercall_msr.guest_physical_address =
(hv_get_phys_addr(virt_addr) >> PAGE_SHIFT);
wrmsr(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64_t);
/*
* Confirm that hypercall page did get set up
*/
hypercall_msr.as_uint64_t = 0;
hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
if (!hypercall_msr.enable)
goto cleanup;
hv_vmbus_g_context.hypercall_page = virt_addr;
/*
* Setup the global signal event param for the signal event hypercall
*/
hv_vmbus_g_context.signal_event_buffer =
malloc(sizeof(hv_vmbus_input_signal_event_buffer), M_DEVBUF,
M_ZERO | M_NOWAIT);
KASSERT(hv_vmbus_g_context.signal_event_buffer != NULL,
("Error VMBUS: Failed to allocate signal_event_buffer\n"));
if (hv_vmbus_g_context.signal_event_buffer == NULL)
goto cleanup;
hv_vmbus_g_context.signal_event_param =
(hv_vmbus_input_signal_event*)
(HV_ALIGN_UP((unsigned long)
hv_vmbus_g_context.signal_event_buffer,
HV_HYPERCALL_PARAM_ALIGN));
hv_vmbus_g_context.signal_event_param->connection_id.as_uint32_t = 0;
hv_vmbus_g_context.signal_event_param->connection_id.u.id =
HV_VMBUS_EVENT_CONNECTION_ID;
hv_vmbus_g_context.signal_event_param->flag_number = 0;
hv_vmbus_g_context.signal_event_param->rsvd_z = 0;
tc_init(&hv_timecounter); /* register virtual timecount */
return (0);
cleanup:
if (virt_addr != NULL) {
if (hypercall_msr.enable) {
hypercall_msr.as_uint64_t = 0;
wrmsr(HV_X64_MSR_HYPERCALL,
hypercall_msr.as_uint64_t);
}
free(virt_addr, M_DEVBUF);
}
return (ENOTSUP);
}
/**
* @brief Cleanup routine, called normally during driver unloading or exiting
*/
void
hv_vmbus_cleanup(void)
{
hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
if (hv_vmbus_g_context.signal_event_buffer != NULL) {
free(hv_vmbus_g_context.signal_event_buffer, M_DEVBUF);
hv_vmbus_g_context.signal_event_buffer = NULL;
hv_vmbus_g_context.signal_event_param = NULL;
}
if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) {
if (hv_vmbus_g_context.hypercall_page != NULL) {
hypercall_msr.as_uint64_t = 0;
wrmsr(HV_X64_MSR_HYPERCALL,
hypercall_msr.as_uint64_t);
free(hv_vmbus_g_context.hypercall_page, M_DEVBUF);
hv_vmbus_g_context.hypercall_page = NULL;
}
}
}
/**
* @brief Post a message using the hypervisor message IPC.
* (This involves a hypercall.)
*/
hv_vmbus_status
hv_vmbus_post_msg_via_msg_ipc(
hv_vmbus_connection_id connection_id,
hv_vmbus_msg_type message_type,
void* payload,
size_t payload_size)
{
struct alignedinput {
uint64_t alignment8;
hv_vmbus_input_post_message msg;
};
hv_vmbus_input_post_message* aligned_msg;
hv_vmbus_status status;
size_t addr;
if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
return (EMSGSIZE);
addr = (size_t) malloc(sizeof(struct alignedinput), M_DEVBUF,
M_ZERO | M_NOWAIT);
KASSERT(addr != 0,
("Error VMBUS: malloc failed to allocate message buffer!"));
if (addr == 0)
return (ENOMEM);
aligned_msg = (hv_vmbus_input_post_message*)
(HV_ALIGN_UP(addr, HV_HYPERCALL_PARAM_ALIGN));
aligned_msg->connection_id = connection_id;
aligned_msg->message_type = message_type;
aligned_msg->payload_size = payload_size;
memcpy((void*) aligned_msg->payload, payload, payload_size);
status = hv_vmbus_do_hypercall(
HV_CALL_POST_MESSAGE, aligned_msg, 0) & 0xFFFF;
free((void *) addr, M_DEVBUF);
return (status);
}
/**
* @brief Signal an event on the specified connection using the hypervisor
* event IPC. (This involves a hypercall.)
*/
hv_vmbus_status
hv_vmbus_signal_event()
{
hv_vmbus_status status;
status = hv_vmbus_do_hypercall(
HV_CALL_SIGNAL_EVENT,
hv_vmbus_g_context.signal_event_param,
0) & 0xFFFF;
return (status);
}
/**
* @brief hv_vmbus_synic_init
*/
void
hv_vmbus_synic_init(void *arg)
{
int cpu;
hv_vmbus_synic_simp simp;
hv_vmbus_synic_siefp siefp;
hv_vmbus_synic_scontrol sctrl;
hv_vmbus_synic_sint shared_sint;
uint64_t version;
hv_setup_args* setup_args = (hv_setup_args *)arg;
cpu = PCPU_GET(cpuid);
if (hv_vmbus_g_context.hypercall_page == NULL)
return;
/*
* KYS: Looks like we can only initialize on cpu0; don't we support
* SMP guests?
*
* TODO: Need to add SMP support for FreeBSD V9
*/
if (cpu != 0)
return;
/*
* TODO: Check the version
*/
version = rdmsr(HV_X64_MSR_SVERSION);
hv_vmbus_g_context.syn_ic_msg_page[cpu] = setup_args->page_buffers[0];
hv_vmbus_g_context.syn_ic_event_page[cpu] = setup_args->page_buffers[1];
/*
* Setup the Synic's message page
*/
simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
simp.simp_enabled = 1;
simp.base_simp_gpa = ((hv_get_phys_addr(
hv_vmbus_g_context.syn_ic_msg_page[cpu])) >> PAGE_SHIFT);
wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
/*
* Setup the Synic's event page
*/
siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
siefp.siefp_enabled = 1;
siefp.base_siefp_gpa = ((hv_get_phys_addr(
hv_vmbus_g_context.syn_ic_event_page[cpu])) >> PAGE_SHIFT);
wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
/*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
shared_sint.vector = setup_args->vector;
shared_sint.masked = FALSE;
shared_sint.auto_eoi = FALSE;
wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
shared_sint.as_uint64_t);
/* Enable the global synic bit */
sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL);
sctrl.enable = 1;
wrmsr(HV_X64_MSR_SCONTROL, sctrl.as_uint64_t);
hv_vmbus_g_context.syn_ic_initialized = TRUE;
return;
}
/**
* @brief Cleanup routine for hv_vmbus_synic_init()
*/
void hv_vmbus_synic_cleanup(void *arg)
{
hv_vmbus_synic_sint shared_sint;
hv_vmbus_synic_simp simp;
hv_vmbus_synic_siefp siefp;
int cpu = PCPU_GET(cpuid);
if (!hv_vmbus_g_context.syn_ic_initialized)
return;
if (cpu != 0)
return; /* TODO: XXXKYS: SMP? */
shared_sint.as_uint64_t = rdmsr(
HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
shared_sint.masked = 1;
/*
* Disable the interrupt
*/
wrmsr(
HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
shared_sint.as_uint64_t);
simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
simp.simp_enabled = 0;
simp.base_simp_gpa = 0;
wrmsr(HV_X64_MSR_SIMP, simp.as_uint64_t);
siefp.as_uint64_t = rdmsr(HV_X64_MSR_SIEFP);
siefp.siefp_enabled = 0;
siefp.base_siefp_gpa = 0;
wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
}

View File

@ -0,0 +1,440 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include "hv_vmbus_priv.h"
/* Amount of space to write to */
#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
((z) - ((w) - (r))):((r) - (w))
/**
* @brief Get number of bytes available to read and to write to
* for the specified ring buffer
*/
static inline void
get_ring_buffer_avail_bytes(
hv_vmbus_ring_buffer_info* rbi,
uint32_t* read,
uint32_t* write)
{
uint32_t read_loc, write_loc;
/*
* Capture the read/write indices before they changed
*/
read_loc = rbi->ring_buffer->read_index;
write_loc = rbi->ring_buffer->write_index;
*write = HV_BYTES_AVAIL_TO_WRITE(
read_loc, write_loc, rbi->ring_data_size);
*read = rbi->ring_data_size - *write;
}
/**
* @brief Get the next write location for the specified ring buffer
*/
static inline uint32_t
get_next_write_location(hv_vmbus_ring_buffer_info* ring_info)
{
uint32_t next = ring_info->ring_buffer->write_index;
return (next);
}
/**
* @brief Set the next write location for the specified ring buffer
*/
static inline void
set_next_write_location(
hv_vmbus_ring_buffer_info* ring_info,
uint32_t next_write_location)
{
ring_info->ring_buffer->write_index = next_write_location;
}
/**
* @brief Get the next read location for the specified ring buffer
*/
static inline uint32_t
get_next_read_location(hv_vmbus_ring_buffer_info* ring_info)
{
uint32_t next = ring_info->ring_buffer->read_index;
return (next);
}
/**
* @brief Get the next read location + offset for the specified ring buffer.
* This allows the caller to skip.
*/
static inline uint32_t
get_next_read_location_with_offset(
hv_vmbus_ring_buffer_info* ring_info,
uint32_t offset)
{
uint32_t next = ring_info->ring_buffer->read_index;
next += offset;
next %= ring_info->ring_data_size;
return (next);
}
/**
* @brief Set the next read location for the specified ring buffer
*/
static inline void
set_next_read_location(
hv_vmbus_ring_buffer_info* ring_info,
uint32_t next_read_location)
{
ring_info->ring_buffer->read_index = next_read_location;
}
/**
* @brief Get the start of the ring buffer
*/
static inline void *
get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info)
{
return (void *) ring_info->ring_buffer->buffer;
}
/**
* @brief Get the size of the ring buffer.
*/
static inline uint32_t
get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info)
{
return ring_info->ring_data_size;
}
/**
* Get the read and write indices as uint64_t of the specified ring buffer.
*/
static inline uint64_t
get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info)
{
return (uint64_t) ring_info->ring_buffer->write_index << 32;
}
static uint32_t copy_to_ring_buffer(
hv_vmbus_ring_buffer_info* ring_info,
uint32_t start_write_offset,
char* src,
uint32_t src_len);
static uint32_t copy_from_ring_buffer(
hv_vmbus_ring_buffer_info* ring_info,
char* dest,
uint32_t dest_len,
uint32_t start_read_offset);
/**
* @brief Get the interrupt mask for the specified ring buffer.
*/
uint32_t
hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi)
{
return rbi->ring_buffer->interrupt_mask;
}
/**
* @brief Initialize the ring buffer.
*/
int
hv_vmbus_ring_buffer_init(
hv_vmbus_ring_buffer_info* ring_info,
void* buffer,
uint32_t buffer_len)
{
memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
ring_info->ring_buffer->read_index =
ring_info->ring_buffer->write_index = 0;
ring_info->ring_size = buffer_len;
ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
return (0);
}
/**
* @brief Cleanup the ring buffer.
*/
void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info)
{
mtx_destroy(&ring_info->ring_lock);
}
/**
* @brief Write to the ring buffer.
*/
int
hv_ring_buffer_write(
hv_vmbus_ring_buffer_info* out_ring_info,
hv_vmbus_sg_buffer_list sg_buffers[],
uint32_t sg_buffer_count)
{
int i = 0;
uint32_t byte_avail_to_write;
uint32_t byte_avail_to_read;
uint32_t total_bytes_to_write = 0;
volatile uint32_t next_write_location;
uint64_t prev_indices = 0;
for (i = 0; i < sg_buffer_count; i++) {
total_bytes_to_write += sg_buffers[i].length;
}
total_bytes_to_write += sizeof(uint64_t);
mtx_lock_spin(&out_ring_info->ring_lock);
get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
&byte_avail_to_write);
/*
* If there is only room for the packet, assume it is full.
* Otherwise, the next time around, we think the ring buffer
* is empty since the read index == write index
*/
if (byte_avail_to_write <= total_bytes_to_write) {
mtx_unlock_spin(&out_ring_info->ring_lock);
return (EAGAIN);
}
/*
* Write to the ring buffer
*/
next_write_location = get_next_write_location(out_ring_info);
for (i = 0; i < sg_buffer_count; i++) {
next_write_location = copy_to_ring_buffer(out_ring_info,
next_write_location, (char *) sg_buffers[i].data,
sg_buffers[i].length);
}
/*
* Set previous packet start
*/
prev_indices = get_ring_buffer_indices(out_ring_info);
next_write_location = copy_to_ring_buffer(
out_ring_info, next_write_location,
(char *) &prev_indices, sizeof(uint64_t));
/*
* Make sure we flush all writes before updating the writeIndex
*/
wmb();
/*
* Now, update the write location
*/
set_next_write_location(out_ring_info, next_write_location);
mtx_unlock_spin(&out_ring_info->ring_lock);
return (0);
}
/**
* @brief Read without advancing the read index.
*/
int
hv_ring_buffer_peek(
hv_vmbus_ring_buffer_info* in_ring_info,
void* buffer,
uint32_t buffer_len)
{
uint32_t bytesAvailToWrite;
uint32_t bytesAvailToRead;
uint32_t nextReadLocation = 0;
mtx_lock_spin(&in_ring_info->ring_lock);
get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
&bytesAvailToWrite);
/*
* Make sure there is something to read
*/
if (bytesAvailToRead < buffer_len) {
mtx_unlock_spin(&in_ring_info->ring_lock);
return (EAGAIN);
}
/*
* Convert to byte offset
*/
nextReadLocation = get_next_read_location(in_ring_info);
nextReadLocation = copy_from_ring_buffer(
in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
mtx_unlock_spin(&in_ring_info->ring_lock);
return (0);
}
/**
* @brief Read and advance the read index.
*/
int
hv_ring_buffer_read(
hv_vmbus_ring_buffer_info* in_ring_info,
void* buffer,
uint32_t buffer_len,
uint32_t offset)
{
uint32_t bytes_avail_to_write;
uint32_t bytes_avail_to_read;
uint32_t next_read_location = 0;
uint64_t prev_indices = 0;
if (buffer_len <= 0)
return (EINVAL);
mtx_lock_spin(&in_ring_info->ring_lock);
get_ring_buffer_avail_bytes(
in_ring_info, &bytes_avail_to_read,
&bytes_avail_to_write);
/*
* Make sure there is something to read
*/
if (bytes_avail_to_read < buffer_len) {
mtx_unlock_spin(&in_ring_info->ring_lock);
return (EAGAIN);
}
next_read_location = get_next_read_location_with_offset(
in_ring_info,
offset);
next_read_location = copy_from_ring_buffer(
in_ring_info,
(char *) buffer,
buffer_len,
next_read_location);
next_read_location = copy_from_ring_buffer(
in_ring_info,
(char *) &prev_indices,
sizeof(uint64_t),
next_read_location);
/*
* Make sure all reads are done before we update the read index since
* the writer may start writing to the read area once the read index
* is updated.
*/
wmb();
/*
* Update the read index
*/
set_next_read_location(in_ring_info, next_read_location);
mtx_unlock_spin(&in_ring_info->ring_lock);
return (0);
}
/**
* @brief Helper routine to copy from source to ring buffer.
*
* Assume there is enough room. Handles wrap-around in dest case only!
*/
uint32_t
copy_to_ring_buffer(
hv_vmbus_ring_buffer_info* ring_info,
uint32_t start_write_offset,
char* src,
uint32_t src_len)
{
char *ring_buffer = get_ring_buffer(ring_info);
uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
uint32_t fragLen;
if (src_len > ring_buffer_size - start_write_offset) {
/* wrap-around detected! */
fragLen = ring_buffer_size - start_write_offset;
memcpy(ring_buffer + start_write_offset, src, fragLen);
memcpy(ring_buffer, src + fragLen, src_len - fragLen);
} else {
memcpy(ring_buffer + start_write_offset, src, src_len);
}
start_write_offset += src_len;
start_write_offset %= ring_buffer_size;
return (start_write_offset);
}
/**
* @brief Helper routine to copy to source from ring buffer.
*
* Assume there is enough room. Handles wrap-around in src case only!
*/
uint32_t
copy_from_ring_buffer(
hv_vmbus_ring_buffer_info* ring_info,
char* dest,
uint32_t dest_len,
uint32_t start_read_offset)
{
uint32_t fragLen;
char *ring_buffer = get_ring_buffer(ring_info);
uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
if (dest_len > ring_buffer_size - start_read_offset) {
/* wrap-around detected at the src */
fragLen = ring_buffer_size - start_read_offset;
memcpy(dest, ring_buffer + start_read_offset, fragLen);
memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
} else {
memcpy(dest, ring_buffer + start_read_offset, dest_len);
}
start_read_offset += dest_len;
start_read_offset %= ring_buffer_size;
return (start_read_offset);
}

View File

@ -0,0 +1,602 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* VM Bus Driver Implementation
*/
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/rtprio.h>
#include <sys/interrupt.h>
#include <sys/sx.h>
#include <sys/taskqueue.h>
#include <sys/mutex.h>
#include <sys/smp.h>
#include <machine/resource.h>
#include <sys/rman.h>
#include <machine/stdarg.h>
#include <machine/intr_machdep.h>
#include <sys/pcpu.h>
#include "hv_vmbus_priv.h"
#define VMBUS_IRQ 0x5
static struct intr_event *hv_msg_intr_event;
static struct intr_event *hv_event_intr_event;
static void *msg_swintr;
static void *event_swintr;
static device_t vmbus_devp;
static void *vmbus_cookiep;
static int vmbus_rid;
struct resource *intr_res;
static int vmbus_irq = VMBUS_IRQ;
static int vmbus_inited;
static hv_setup_args setup_args; /* only CPU 0 supported at this time */
/**
* @brief Software interrupt thread routine to handle channel messages from
* the hypervisor.
*/
static void
vmbus_msg_swintr(void *dummy)
{
int cpu;
void* page_addr;
hv_vmbus_message* msg;
hv_vmbus_message* copied;
cpu = PCPU_GET(cpuid);
page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
for (;;) {
if (msg->header.message_type == HV_MESSAGE_TYPE_NONE) {
break; /* no message */
} else {
copied = malloc(sizeof(hv_vmbus_message),
M_DEVBUF, M_NOWAIT);
KASSERT(copied != NULL,
("Error VMBUS: malloc failed to allocate"
" hv_vmbus_message!"));
if (copied == NULL)
continue;
memcpy(copied, msg, sizeof(hv_vmbus_message));
hv_queue_work_item(hv_vmbus_g_connection.work_queue,
hv_vmbus_on_channel_message, copied);
}
msg->header.message_type = HV_MESSAGE_TYPE_NONE;
/*
* Make sure the write to message_type (ie set to
* HV_MESSAGE_TYPE_NONE) happens before we read the
* message_pending and EOMing. Otherwise, the EOMing will
* not deliver any more messages
* since there is no empty slot
*/
wmb();
if (msg->header.message_flags.message_pending) {
/*
* This will cause message queue rescan to possibly
* deliver another msg from the hypervisor
*/
wrmsr(HV_X64_MSR_EOM, 0);
}
}
}
/**
* @brief Interrupt filter routine for VMBUS.
*
* The purpose of this routine is to determine the type of VMBUS protocol
* message to process - an event or a channel message.
* As this is an interrupt filter routine, the function runs in a very
* restricted envinronment. From the manpage for bus_setup_intr(9)
*
* In this restricted environment, care must be taken to account for all
* races. A careful analysis of races should be done as well. It is gener-
* ally cheaper to take an extra interrupt, for example, than to protect
* variables with spinlocks. Read, modify, write cycles of hardware regis-
* ters need to be carefully analyzed if other threads are accessing the
* same registers.
*/
static int
hv_vmbus_isr(void *unused)
{
int cpu;
hv_vmbus_message* msg;
hv_vmbus_synic_event_flags* event;
void* page_addr;
cpu = PCPU_GET(cpuid);
/* (Temporary limit) */
KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
/*
* The Windows team has advised that we check for events
* before checking for messages. This is the way they do it
* in Windows when running as a guest in Hyper-V
*/
page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
event = (hv_vmbus_synic_event_flags*)
page_addr + HV_VMBUS_MESSAGE_SINT;
/* Since we are a child, we only need to check bit 0 */
if (synch_test_and_clear_bit(0, &event->flags32[0])) {
swi_sched(event_swintr, 0);
}
/* Check if there are actual msgs to be process */
page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
swi_sched(msg_swintr, 0);
}
return FILTER_HANDLED;
}
static int
vmbus_read_ivar(
device_t dev,
device_t child,
int index,
uintptr_t* result)
{
struct hv_device *child_dev_ctx = device_get_ivars(child);
switch (index) {
case HV_VMBUS_IVAR_TYPE:
*result = (uintptr_t) &child_dev_ctx->class_id;
return (0);
case HV_VMBUS_IVAR_INSTANCE:
*result = (uintptr_t) &child_dev_ctx->device_id;
return (0);
case HV_VMBUS_IVAR_DEVCTX:
*result = (uintptr_t) child_dev_ctx;
return (0);
case HV_VMBUS_IVAR_NODE:
*result = (uintptr_t) child_dev_ctx->device;
return (0);
}
return (ENOENT);
}
static int
vmbus_write_ivar(
device_t dev,
device_t child,
int index,
uintptr_t value)
{
switch (index) {
case HV_VMBUS_IVAR_TYPE:
case HV_VMBUS_IVAR_INSTANCE:
case HV_VMBUS_IVAR_DEVCTX:
case HV_VMBUS_IVAR_NODE:
/* read-only */
return (EINVAL);
}
return (ENOENT);
}
struct hv_device*
hv_vmbus_child_device_create(
hv_guid type,
hv_guid instance,
hv_vmbus_channel* channel)
{
hv_device* child_dev;
/*
* Allocate the new child device
*/
child_dev = malloc(sizeof(hv_device), M_DEVBUF,
M_NOWAIT | M_ZERO);
KASSERT(child_dev != NULL,
("Error VMBUS: malloc failed to allocate hv_device!"));
if (child_dev == NULL)
return (NULL);
child_dev->channel = channel;
memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
memcpy(&child_dev->device_id, &instance, sizeof(hv_guid));
return (child_dev);
}
static void
print_dev_guid(struct hv_device *dev)
{
int i;
unsigned char guid_name[100];
for (i = 0; i < 32; i += 2)
sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
if(bootverbose)
printf("VMBUS: Class ID: %s\n", guid_name);
}
int
hv_vmbus_child_device_register(struct hv_device *child_dev)
{
device_t child;
int ret = 0;
print_dev_guid(child_dev);
child = device_add_child(vmbus_devp, NULL, -1);
child_dev->device = child;
device_set_ivars(child, child_dev);
mtx_lock(&Giant);
ret = device_probe_and_attach(child);
mtx_unlock(&Giant);
return (0);
}
int
hv_vmbus_child_device_unregister(struct hv_device *child_dev)
{
int ret = 0;
/*
* XXXKYS: Ensure that this is the opposite of
* device_add_child()
*/
mtx_lock(&Giant);
ret = device_delete_child(vmbus_devp, child_dev->device);
mtx_unlock(&Giant);
return(ret);
}
static void vmbus_identify(driver_t *driver, device_t parent) {
BUS_ADD_CHILD(parent, 0, "vmbus", 0);
if (device_find_child(parent, "vmbus", 0) == NULL) {
BUS_ADD_CHILD(parent, 0, "vmbus", 0);
}
}
static int
vmbus_probe(device_t dev) {
if(bootverbose)
device_printf(dev, "VMBUS: probe\n");
if (!hv_vmbus_query_hypervisor_presence())
return (ENXIO);
device_set_desc(dev, "Vmbus Devices");
return (0);
}
/**
* @brief Main vmbus driver initialization routine.
*
* Here, we
* - initialize the vmbus driver context
* - setup various driver entry points
* - invoke the vmbus hv main init routine
* - get the irq resource
* - invoke the vmbus to add the vmbus root device
* - setup the vmbus root device
* - retrieve the channel offers
*/
static int
vmbus_bus_init(void)
{
struct ioapic_intsrc {
struct intsrc io_intsrc;
u_int io_irq;
u_int io_intpin:8;
u_int io_vector:8;
u_int io_cpu:8;
u_int io_activehi:1;
u_int io_edgetrigger:1;
u_int io_masked:1;
int io_bus:4;
uint32_t io_lowreg;
};
int i, ret;
unsigned int vector = 0;
struct intsrc *isrc;
struct ioapic_intsrc *intpin;
if (vmbus_inited)
return (0);
vmbus_inited = 1;
ret = hv_vmbus_init();
if (ret) {
if(bootverbose)
printf("Error VMBUS: Hypervisor Initialization Failed!\n");
return (ret);
}
ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
NULL, SWI_CLOCK, 0, &msg_swintr);
if (ret)
goto cleanup;
/*
* Message SW interrupt handler checks a per-CPU page and
* thus the thread needs to be bound to CPU-0 - which is where
* all interrupts are processed.
*/
ret = intr_event_bind(hv_msg_intr_event, 0);
if (ret)
goto cleanup1;
ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
NULL, SWI_CLOCK, 0, &event_swintr);
if (ret)
goto cleanup1;
intr_res = bus_alloc_resource(vmbus_devp,
SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
if (intr_res == NULL) {
ret = ENOMEM; /* XXXKYS: Need a better errno */
goto cleanup2;
}
/*
* Setup interrupt filter handler
*/
ret = bus_setup_intr(vmbus_devp, intr_res,
INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
NULL, &vmbus_cookiep);
if (ret != 0)
goto cleanup3;
ret = bus_bind_intr(vmbus_devp, intr_res, 0);
if (ret != 0)
goto cleanup4;
isrc = intr_lookup_source(vmbus_irq);
if ((isrc == NULL) || (isrc->is_event == NULL)) {
ret = EINVAL;
goto cleanup4;
}
/* vector = isrc->is_event->ie_vector; */
intpin = (struct ioapic_intsrc *)isrc;
vector = intpin->io_vector;
if(bootverbose)
printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
/**
* Notify the hypervisor of our irq.
*/
setup_args.vector = vector;
for(i = 0; i < 2; i++) {
setup_args.page_buffers[i] =
malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
if (setup_args.page_buffers[i] == NULL) {
KASSERT(setup_args.page_buffers[i] != NULL,
("Error VMBUS: malloc failed!"));
if (i > 0)
free(setup_args.page_buffers[0], M_DEVBUF);
goto cleanup4;
}
}
/* only CPU #0 supported at this time */
smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
/*
* Connect to VMBus in the root partition
*/
ret = hv_vmbus_connect();
if (ret != 0)
goto cleanup4;
hv_vmbus_request_channel_offers();
return (ret);
cleanup4:
/*
* remove swi, bus and intr resource
*/
bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
cleanup3:
bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
cleanup2:
swi_remove(event_swintr);
cleanup1:
swi_remove(msg_swintr);
cleanup:
hv_vmbus_cleanup();
return (ret);
}
static int
vmbus_attach(device_t dev)
{
if(bootverbose)
device_printf(dev, "VMBUS: attach dev: %p\n", dev);
vmbus_devp = dev;
/*
* If the system has already booted and thread
* scheduling is possible indicated by the global
* cold set to zero, we just call the driver
* initialization directly.
*/
if (!cold)
vmbus_bus_init();
return (0);
}
static void
vmbus_init(void)
{
/*
* If the system has already booted and thread
* scheduling is possible indicated by the global
* cold set to zero, we just call the driver
* initialization directly.
*/
if (!cold)
vmbus_bus_init();
}
static void
vmbus_bus_exit(void)
{
int i;
hv_vmbus_release_unattached_channels();
hv_vmbus_disconnect();
smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
for(i = 0; i < 2; i++) {
if (setup_args.page_buffers[i] != 0)
free(setup_args.page_buffers[i], M_DEVBUF);
}
hv_vmbus_cleanup();
/* remove swi, bus and intr resource */
bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
swi_remove(msg_swintr);
swi_remove(event_swintr);
return;
}
static void
vmbus_exit(void)
{
vmbus_bus_exit();
}
static int
vmbus_detach(device_t dev)
{
vmbus_exit();
return (0);
}
static void
vmbus_mod_load(void)
{
if(bootverbose)
printf("VMBUS: load\n");
}
static void
vmbus_mod_unload(void)
{
if(bootverbose)
printf("VMBUS: unload\n");
}
static int
vmbus_modevent(module_t mod, int what, void *arg)
{
switch (what) {
case MOD_LOAD:
vmbus_mod_load();
break;
case MOD_UNLOAD:
vmbus_mod_unload();
break;
}
return (0);
}
static device_method_t vmbus_methods[] = {
/** Device interface */
DEVMETHOD(device_identify, vmbus_identify),
DEVMETHOD(device_probe, vmbus_probe),
DEVMETHOD(device_attach, vmbus_attach),
DEVMETHOD(device_detach, vmbus_detach),
DEVMETHOD(device_shutdown, bus_generic_shutdown),
DEVMETHOD(device_suspend, bus_generic_suspend),
DEVMETHOD(device_resume, bus_generic_resume),
/** Bus interface */
DEVMETHOD(bus_add_child, bus_generic_add_child),
DEVMETHOD(bus_print_child, bus_generic_print_child),
DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
{ 0, 0 } };
static char driver_name[] = "vmbus";
static driver_t vmbus_driver = { driver_name, vmbus_methods,0, };
devclass_t vmbus_devclass;
DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
MODULE_VERSION(vmbus,1);
/* TODO: We want to be earlier than SI_SUB_VFS */
SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);

View File

@ -0,0 +1,722 @@
/*-
* Copyright (c) 2009-2012 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __HYPERV_PRIV_H__
#define __HYPERV_PRIV_H__
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sema.h>
#include <dev/hyperv/include/hyperv.h>
/*
* Status codes for hypervisor operations.
*/
typedef uint16_t hv_vmbus_status;
#define HV_MESSAGE_SIZE (256)
#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
#define HV_ANY_VP (0xFFFFFFFF)
/*
* Synthetic interrupt controller flag constants.
*/
#define HV_EVENT_FLAGS_COUNT (256 * 8)
#define HV_EVENT_FLAGS_BYTE_COUNT (256)
#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(uint32_t))
/*
* MessageId: HV_STATUS_INSUFFICIENT_BUFFERS
* MessageText:
* You did not supply enough message buffers to send a message.
*/
#define HV_STATUS_INSUFFICIENT_BUFFERS ((uint16_t)0x0013)
typedef void (*hv_vmbus_channel_callback)(void *context);
typedef struct {
void* data;
uint32_t length;
} hv_vmbus_sg_buffer_list;
typedef struct {
uint32_t current_interrupt_mask;
uint32_t current_read_index;
uint32_t current_write_index;
uint32_t bytes_avail_to_read;
uint32_t bytes_avail_to_write;
} hv_vmbus_ring_buffer_debug_info;
typedef struct {
uint32_t rel_id;
hv_vmbus_channel_state state;
hv_guid interface_type;
hv_guid interface_instance;
uint32_t monitor_id;
uint32_t server_monitor_pending;
uint32_t server_monitor_latency;
uint32_t server_monitor_connection_id;
uint32_t client_monitor_pending;
uint32_t client_monitor_latency;
uint32_t client_monitor_connection_id;
hv_vmbus_ring_buffer_debug_info inbound;
hv_vmbus_ring_buffer_debug_info outbound;
} hv_vmbus_channel_debug_info;
typedef union {
hv_vmbus_channel_version_supported version_supported;
hv_vmbus_channel_open_result open_result;
hv_vmbus_channel_gpadl_torndown gpadl_torndown;
hv_vmbus_channel_gpadl_created gpadl_created;
hv_vmbus_channel_version_response version_response;
} hv_vmbus_channel_msg_response;
/*
* Represents each channel msg on the vmbus connection
* This is a variable-size data structure depending on
* the msg type itself
*/
typedef struct hv_vmbus_channel_msg_info {
/*
* Bookkeeping stuff
*/
TAILQ_ENTRY(hv_vmbus_channel_msg_info) msg_list_entry;
/*
* So far, this is only used to handle
* gpadl body message
*/
TAILQ_HEAD(, hv_vmbus_channel_msg_info) sub_msg_list_anchor;
/*
* Synchronize the request/response if
* needed.
* KYS: Use a semaphore for now.
* Not perf critical.
*/
struct sema wait_sema;
hv_vmbus_channel_msg_response response;
uint32_t message_size;
/**
* The channel message that goes out on
* the "wire". It will contain at
* minimum the
* hv_vmbus_channel_msg_header
* header.
*/
unsigned char msg[0];
} hv_vmbus_channel_msg_info;
/*
* The format must be the same as hv_vm_data_gpa_direct
*/
typedef struct hv_vmbus_channel_packet_page_buffer {
uint16_t type;
uint16_t data_offset8;
uint16_t length8;
uint16_t flags;
uint64_t transaction_id;
uint32_t reserved;
uint32_t range_count;
hv_vmbus_page_buffer range[HV_MAX_PAGE_BUFFER_COUNT];
} __packed hv_vmbus_channel_packet_page_buffer;
/*
* The format must be the same as hv_vm_data_gpa_direct
*/
typedef struct hv_vmbus_channel_packet_multipage_buffer {
uint16_t type;
uint16_t data_offset8;
uint16_t length8;
uint16_t flags;
uint64_t transaction_id;
uint32_t reserved;
uint32_t range_count; /* Always 1 in this case */
hv_vmbus_multipage_buffer range;
} __packed hv_vmbus_channel_packet_multipage_buffer;
enum {
HV_VMBUS_MESSAGE_CONNECTION_ID = 1,
HV_VMBUS_MESSAGE_PORT_ID = 1,
HV_VMBUS_EVENT_CONNECTION_ID = 2,
HV_VMBUS_EVENT_PORT_ID = 2,
HV_VMBUS_MONITOR_CONNECTION_ID = 3,
HV_VMBUS_MONITOR_PORT_ID = 3,
HV_VMBUS_MESSAGE_SINT = 2
};
#define HV_PRESENT_BIT 0x80000000
#define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t)
/*
* Connection identifier type
*/
typedef union {
uint32_t as_uint32_t;
struct {
uint32_t id:24;
uint32_t reserved:8;
} u;
} __packed hv_vmbus_connection_id;
/*
* Definition of the hv_vmbus_signal_event hypercall input structure
*/
typedef struct {
hv_vmbus_connection_id connection_id;
uint16_t flag_number;
uint16_t rsvd_z;
} __packed hv_vmbus_input_signal_event;
typedef struct {
uint64_t align8;
hv_vmbus_input_signal_event event;
} __packed hv_vmbus_input_signal_event_buffer;
typedef struct {
uint64_t guest_id;
void* hypercall_page;
hv_bool_uint8_t syn_ic_initialized;
/*
* This is used as an input param to HV_CALL_SIGNAL_EVENT hypercall.
* The input param is immutable in our usage and
* must be dynamic mem (vs stack or global).
*/
hv_vmbus_input_signal_event_buffer *signal_event_buffer;
/*
* 8-bytes aligned of the buffer above
*/
hv_vmbus_input_signal_event *signal_event_param;
hv_vmbus_handle syn_ic_msg_page[MAXCPU];
hv_vmbus_handle syn_ic_event_page[MAXCPU];
} hv_vmbus_context;
/*
* Define hypervisor message types
*/
typedef enum {
HV_MESSAGE_TYPE_NONE = 0x00000000,
/*
* Memory access messages
*/
HV_MESSAGE_TYPE_UNMAPPED_GPA = 0x80000000,
HV_MESSAGE_TYPE_GPA_INTERCEPT = 0x80000001,
/*
* Timer notification messages
*/
HV_MESSAGE_TIMER_EXPIRED = 0x80000010,
/*
* Error messages
*/
HV_MESSAGE_TYPE_INVALID_VP_REGISTER_VALUE = 0x80000020,
HV_MESSAGE_TYPE_UNRECOVERABLE_EXCEPTION = 0x80000021,
HV_MESSAGE_TYPE_UNSUPPORTED_FEATURE = 0x80000022,
/*
* Trace buffer complete messages
*/
HV_MESSAGE_TYPE_EVENT_LOG_BUFFER_COMPLETE = 0x80000040,
/*
* Platform-specific processor intercept messages
*/
HV_MESSAGE_TYPE_X64_IO_PORT_INTERCEPT = 0x80010000,
HV_MESSAGE_TYPE_X64_MSR_INTERCEPT = 0x80010001,
HV_MESSAGE_TYPE_X64_CPU_INTERCEPT = 0x80010002,
HV_MESSAGE_TYPE_X64_EXCEPTION_INTERCEPT = 0x80010003,
HV_MESSAGE_TYPE_X64_APIC_EOI = 0x80010004,
HV_MESSAGE_TYPE_X64_LEGACY_FP_ERROR = 0x80010005
} hv_vmbus_msg_type;
/*
* Define port identifier type
*/
typedef union _hv_vmbus_port_id {
uint32_t as_uint32_t;
struct {
uint32_t id:24;
uint32_t reserved:8;
} u ;
} hv_vmbus_port_id;
/*
* Define synthetic interrupt controller message flag
*/
typedef union {
uint8_t as_uint8_t;
struct {
uint8_t message_pending:1;
uint8_t reserved:7;
};
} hv_vmbus_msg_flags;
typedef uint64_t hv_vmbus_partition_id;
/*
* Define synthetic interrupt controller message header
*/
typedef struct {
hv_vmbus_msg_type message_type;
uint8_t payload_size;
hv_vmbus_msg_flags message_flags;
uint8_t reserved[2];
union {
hv_vmbus_partition_id sender;
hv_vmbus_port_id port;
} u;
} hv_vmbus_msg_header;
/*
* Define synthetic interrupt controller message format
*/
typedef struct {
hv_vmbus_msg_header header;
union {
uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
} u ;
} hv_vmbus_message;
/*
* Maximum channels is determined by the size of the interrupt
* page which is PAGE_SIZE. 1/2 of PAGE_SIZE is for
* send endpoint interrupt and the other is receive
* endpoint interrupt.
*
* Note: (PAGE_SIZE >> 1) << 3 allocates 16348 channels
*/
#define HV_MAX_NUM_CHANNELS (PAGE_SIZE >> 1) << 3
/*
* (The value here must be in multiple of 32)
*/
#define HV_MAX_NUM_CHANNELS_SUPPORTED 256
/*
* VM Bus connection states
*/
typedef enum {
HV_DISCONNECTED,
HV_CONNECTING,
HV_CONNECTED,
HV_DISCONNECTING
} hv_vmbus_connect_state;
#define HV_MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT
typedef struct {
hv_vmbus_connect_state connect_state;
uint32_t next_gpadl_handle;
/**
* Represents channel interrupts. Each bit position
* represents a channel.
* When a channel sends an interrupt via VMBUS, it
* finds its bit in the send_interrupt_page, set it and
* calls Hv to generate a port event. The other end
* receives the port event and parse the
* recv_interrupt_page to see which bit is set
*/
void *interrupt_page;
void *send_interrupt_page;
void *recv_interrupt_page;
/*
* 2 pages - 1st page for parent->child
* notification and 2nd is child->parent
* notification
*/
void *monitor_pages;
TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor;
struct mtx channel_msg_lock;
/**
* List of channels
*/
TAILQ_HEAD(, hv_vmbus_channel) channel_anchor;
struct mtx channel_lock;
hv_vmbus_handle work_queue;
struct sema control_sema;
} hv_vmbus_connection;
/*
* Declare the MSR used to identify the guest OS
*/
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
typedef union {
uint64_t as_uint64_t;
struct {
uint64_t build_number : 16;
uint64_t service_version : 8; /* Service Pack, etc. */
uint64_t minor_version : 8;
uint64_t major_version : 8;
/*
* HV_GUEST_OS_MICROSOFT_IDS (If Vendor=MS)
* HV_GUEST_OS_VENDOR
*/
uint64_t os_id : 8;
uint64_t vendor_id : 16;
};
} hv_vmbus_x64_msr_guest_os_id_contents;
/*
* Declare the MSR used to setup pages used to communicate with the hypervisor
*/
#define HV_X64_MSR_HYPERCALL 0x40000001
typedef union {
uint64_t as_uint64_t;
struct {
uint64_t enable :1;
uint64_t reserved :11;
uint64_t guest_physical_address :52;
};
} hv_vmbus_x64_msr_hypercall_contents;
typedef union {
uint32_t as_uint32_t;
struct {
uint32_t group_enable :4;
uint32_t rsvd_z :28;
};
} hv_vmbus_monitor_trigger_state;
typedef union {
uint64_t as_uint64_t;
struct {
uint32_t pending;
uint32_t armed;
};
} hv_vmbus_monitor_trigger_group;
typedef struct {
hv_vmbus_connection_id connection_id;
uint16_t flag_number;
uint16_t rsvd_z;
} hv_vmbus_monitor_parameter;
/*
* hv_vmbus_monitor_page Layout
* ------------------------------------------------------
* | 0 | trigger_state (4 bytes) | Rsvd1 (4 bytes) |
* | 8 | trigger_group[0] |
* | 10 | trigger_group[1] |
* | 18 | trigger_group[2] |
* | 20 | trigger_group[3] |
* | 28 | Rsvd2[0] |
* | 30 | Rsvd2[1] |
* | 38 | Rsvd2[2] |
* | 40 | next_check_time[0][0] | next_check_time[0][1] |
* | ... |
* | 240 | latency[0][0..3] |
* | 340 | Rsvz3[0] |
* | 440 | parameter[0][0] |
* | 448 | parameter[0][1] |
* | ... |
* | 840 | Rsvd4[0] |
* ------------------------------------------------------
*/
typedef struct {
hv_vmbus_monitor_trigger_state trigger_state;
uint32_t rsvd_z1;
hv_vmbus_monitor_trigger_group trigger_group[4];
uint64_t rsvd_z2[3];
int32_t next_check_time[4][32];
uint16_t latency[4][32];
uint64_t rsvd_z3[32];
hv_vmbus_monitor_parameter parameter[4][32];
uint8_t rsvd_z4[1984];
} hv_vmbus_monitor_page;
/*
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
* is set by CPUID(HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES).
*/
typedef enum {
HV_CPU_ID_FUNCTION_VERSION_AND_FEATURES = 0x00000001,
HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION = 0x40000000,
HV_CPU_ID_FUNCTION_HV_INTERFACE = 0x40000001,
/*
* The remaining functions depend on the value
* of hv_cpu_id_function_interface
*/
HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002,
HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003,
HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004,
HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005
} hv_vmbus_cpuid_function;
/*
* Define the format of the SIMP register
*/
typedef union {
uint64_t as_uint64_t;
struct {
uint64_t simp_enabled : 1;
uint64_t preserved : 11;
uint64_t base_simp_gpa : 52;
};
} hv_vmbus_synic_simp;
/*
* Define the format of the SIEFP register
*/
typedef union {
uint64_t as_uint64_t;
struct {
uint64_t siefp_enabled : 1;
uint64_t preserved : 11;
uint64_t base_siefp_gpa : 52;
};
} hv_vmbus_synic_siefp;
/*
* Define synthetic interrupt source
*/
typedef union {
uint64_t as_uint64_t;
struct {
uint64_t vector : 8;
uint64_t reserved1 : 8;
uint64_t masked : 1;
uint64_t auto_eoi : 1;
uint64_t reserved2 : 46;
};
} hv_vmbus_synic_sint;
/*
* Define syn_ic control register
*/
typedef union _hv_vmbus_synic_scontrol {
uint64_t as_uint64_t;
struct {
uint64_t enable : 1;
uint64_t reserved : 63;
};
} hv_vmbus_synic_scontrol;
/*
* Define the hv_vmbus_post_message hypercall input structure
*/
typedef struct {
hv_vmbus_connection_id connection_id;
uint32_t reserved;
hv_vmbus_msg_type message_type;
uint32_t payload_size;
uint64_t payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
} hv_vmbus_input_post_message;
/*
* Define the synthetic interrupt controller event flags format
*/
typedef union {
uint8_t flags8[HV_EVENT_FLAGS_BYTE_COUNT];
uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT];
} hv_vmbus_synic_event_flags;
/*
* Define synthetic interrupt controller model specific registers
*/
#define HV_X64_MSR_SCONTROL (0x40000080)
#define HV_X64_MSR_SVERSION (0x40000081)
#define HV_X64_MSR_SIEFP (0x40000082)
#define HV_X64_MSR_SIMP (0x40000083)
#define HV_X64_MSR_EOM (0x40000084)
#define HV_X64_MSR_SINT0 (0x40000090)
#define HV_X64_MSR_SINT1 (0x40000091)
#define HV_X64_MSR_SINT2 (0x40000092)
#define HV_X64_MSR_SINT3 (0x40000093)
#define HV_X64_MSR_SINT4 (0x40000094)
#define HV_X64_MSR_SINT5 (0x40000095)
#define HV_X64_MSR_SINT6 (0x40000096)
#define HV_X64_MSR_SINT7 (0x40000097)
#define HV_X64_MSR_SINT8 (0x40000098)
#define HV_X64_MSR_SINT9 (0x40000099)
#define HV_X64_MSR_SINT10 (0x4000009A)
#define HV_X64_MSR_SINT11 (0x4000009B)
#define HV_X64_MSR_SINT12 (0x4000009C)
#define HV_X64_MSR_SINT13 (0x4000009D)
#define HV_X64_MSR_SINT14 (0x4000009E)
#define HV_X64_MSR_SINT15 (0x4000009F)
/*
* Declare the various hypercall operations
*/
typedef enum {
HV_CALL_POST_MESSAGE = 0x005c,
HV_CALL_SIGNAL_EVENT = 0x005d,
} hv_vmbus_call_code;
/**
* Global variables
*/
extern hv_vmbus_context hv_vmbus_g_context;
extern hv_vmbus_connection hv_vmbus_g_connection;
/*
* Private, VM Bus functions
*/
int hv_vmbus_ring_buffer_init(
hv_vmbus_ring_buffer_info *ring_info,
void *buffer,
uint32_t buffer_len);
void hv_ring_buffer_cleanup(
hv_vmbus_ring_buffer_info *ring_info);
int hv_ring_buffer_write(
hv_vmbus_ring_buffer_info *ring_info,
hv_vmbus_sg_buffer_list sg_buffers[],
uint32_t sg_buff_count);
int hv_ring_buffer_peek(
hv_vmbus_ring_buffer_info *ring_info,
void *buffer,
uint32_t buffer_len);
int hv_ring_buffer_read(
hv_vmbus_ring_buffer_info *ring_info,
void *buffer,
uint32_t buffer_len,
uint32_t offset);
uint32_t hv_vmbus_get_ring_buffer_interrupt_mask(
hv_vmbus_ring_buffer_info *ring_info);
void hv_vmbus_dump_ring_info(
hv_vmbus_ring_buffer_info *ring_info,
char *prefix);
hv_vmbus_channel* hv_vmbus_allocate_channel(void);
void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
void hv_vmbus_on_channel_message(void *context);
int hv_vmbus_request_channel_offers(void);
void hv_vmbus_release_unattached_channels(void);
int hv_vmbus_init(void);
void hv_vmbus_cleanup(void);
uint16_t hv_vmbus_post_msg_via_msg_ipc(
hv_vmbus_connection_id connection_id,
hv_vmbus_msg_type message_type,
void *payload,
size_t payload_size);
uint16_t hv_vmbus_signal_event(void);
void hv_vmbus_synic_init(void *irq_arg);
void hv_vmbus_synic_cleanup(void *arg);
int hv_vmbus_query_hypervisor_presence(void);
struct hv_device* hv_vmbus_child_device_create(
hv_guid device_type,
hv_guid device_instance,
hv_vmbus_channel *channel);
int hv_vmbus_child_device_register(
struct hv_device *child_dev);
int hv_vmbus_child_device_unregister(
struct hv_device *child_dev);
hv_vmbus_channel* hv_vmbus_get_channel_from_rel_id(uint32_t rel_id);
/**
* Connection interfaces
*/
int hv_vmbus_connect(void);
int hv_vmbus_disconnect(void);
int hv_vmbus_post_message(void *buffer, size_t buf_size);
int hv_vmbus_set_event(uint32_t child_rel_id);
void hv_vmbus_on_events(void *);
/*
* The guest OS needs to register the guest ID with the hypervisor.
* The guest ID is a 64 bit entity and the structure of this ID is
* specified in the Hyper-V specification:
*
* http://msdn.microsoft.com/en-us/library/windows/
* hardware/ff542653%28v=vs.85%29.aspx
*
* While the current guideline does not specify how FreeBSD guest ID(s)
* need to be generated, our plan is to publish the guidelines for
* FreeBSD and other guest operating systems that currently are hosted
* on Hyper-V. The implementation here conforms to this yet
* unpublished guidelines.
*
* Bit(s)
* 63 - Indicates if the OS is Open Source or not; 1 is Open Source
* 62:56 - Os Type; Linux is 0x100, FreeBSD is 0x200
* 55:48 - Distro specific identification
* 47:16 - FreeBSD kernel version number
* 15:0 - Distro specific identification
*
*/
#define HV_FREEBSD_VENDOR_ID 0x8200
#define HV_FREEBSD_GUEST_ID hv_generate_guest_id(0,0)
static inline uint64_t hv_generate_guest_id(
uint8_t distro_id_part1,
uint16_t distro_id_part2)
{
uint64_t guest_id;
guest_id = (((uint64_t)HV_FREEBSD_VENDOR_ID) << 48);
guest_id |= (((uint64_t)(distro_id_part1)) << 48);
guest_id |= (((uint64_t)(__FreeBSD_version)) << 16); /* in param.h */
guest_id |= ((uint64_t)(distro_id_part2));
return guest_id;
}
typedef struct {
unsigned int vector;
void *page_buffers[2];
} hv_setup_args;
#endif /* __HYPERV_PRIV_H__ */

View File

@ -126,6 +126,7 @@ SUBDIR= \
${_hptnr} \
${_hptrr} \
hwpmc \
${_hyperv} \
${_i2c} \
${_ibcs2} \
${_ichwd} \
@ -671,6 +672,7 @@ _hptmv= hptmv
_hptnr= hptnr
_hptrr= hptrr
.endif
_hyperv= hyperv
_i2c= i2c
_ichwd= ichwd
_ida= ida

View File

@ -0,0 +1,5 @@
# $FreeBSD$
SUBDIR = vmbus netvsc stordisengage storvsc utilities
.include <bsd.subdir.mk>

View File

@ -0,0 +1,13 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../dev/hyperv/netvsc
KMOD = hv_netvsc
SRCS = hv_net_vsc.c \
hv_netvsc_drv_freebsd.c \
hv_rndis_filter.c
CFLAGS += -I${.CURDIR}/../../../dev/hyperv/netvsc
.include <bsd.kmod.mk>

View File

@ -0,0 +1,9 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../dev/hyperv/stordisengage
KMOD= hv_ata_pci_disengage
SRCS= hv_ata_pci_disengage.c
.include <bsd.kmod.mk>

View File

@ -0,0 +1,14 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../dev/hyperv/storvsc
KMOD= hv_storvsc
SRCS = hv_storvsc_drv_freebsd.c \
hv_vstorage.h
CFLAGS+= -I${.CURDIR}/../../../dev/hyperv/include \
-I${.CURDIR}/../../../dev/hyperv/vmbus \
-I${.CURDIR}/../../../dev/hyperv/storvsc
.include <bsd.kmod.mk>

View File

@ -0,0 +1,12 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../dev/hyperv/utilities
KMOD= hv_utils
SRCS = hv_util.c
CFLAGS+= -I${.CURDIR}/../../../dev/hyperv/include \
-I${.CURDIR}/../../../dev/hyperv/vmbus
.include <bsd.kmod.mk>

View File

@ -0,0 +1,20 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../dev/hyperv/vmbus \
${.CURDIR}/../../../dev/hyperv/utilities
KMOD= hv_vmbus
SRCS = hv_channel.c \
hv_channel_mgmt.c \
hv_connection.c \
hv_hv.c \
hv_ring_buffer.c \
hv_vmbus_drv_freebsd.c \
hv_vmbus_priv.h
CFLAGS+= -I${.CURDIR}/../../../dev/hyperv/include \
-I${.CURDIR}/../../../dev/hyperv/vmbus \
-I${.CURDIR}/../../../dev/hyperv/utilities
.include <bsd.kmod.mk>