MFC - tracking commit
This commit is contained in:
parent
2368f0cbd1
commit
21998ad688
@ -124,7 +124,8 @@ vm_destroy(struct vmctx *vm)
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len)
|
||||
vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
|
||||
int *wired)
|
||||
{
|
||||
int error;
|
||||
struct vm_memory_segment seg;
|
||||
@ -133,6 +134,8 @@ vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len)
|
||||
seg.gpa = gpa;
|
||||
error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
|
||||
*ret_len = seg.len;
|
||||
if (wired != NULL)
|
||||
*wired = seg.wired;
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -741,3 +744,23 @@ vcpu_reset(struct vmctx *vmctx, int vcpu)
|
||||
done:
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
|
||||
{
|
||||
int error, i;
|
||||
struct vm_gpa_pte gpapte;
|
||||
|
||||
bzero(&gpapte, sizeof(gpapte));
|
||||
gpapte.gpa = gpa;
|
||||
|
||||
error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
|
||||
|
||||
if (error == 0) {
|
||||
*num = gpapte.ptenum;
|
||||
for (i = 0; i < gpapte.ptenum; i++)
|
||||
pte[i] = gpapte.pte[i];
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
@ -45,9 +45,11 @@ enum vm_mmap_style {
|
||||
int vm_create(const char *name);
|
||||
struct vmctx *vm_open(const char *name);
|
||||
void vm_destroy(struct vmctx *ctx);
|
||||
int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len);
|
||||
int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
|
||||
int *wired);
|
||||
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
|
||||
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
|
||||
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
|
||||
uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
|
||||
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
|
||||
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
|
||||
|
@ -1574,7 +1574,7 @@ getmemsize(caddr_t kmdp, u_int64_t first)
|
||||
/*
|
||||
* map page into kernel: valid, read/write,non-cacheable
|
||||
*/
|
||||
*pte = pa | PG_V | PG_RW | PG_N;
|
||||
*pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD;
|
||||
invltlb();
|
||||
|
||||
tmp = *(int *)ptr;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -733,6 +733,14 @@ trap_pfault(frame, usermode)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the trap was caused by errant bits in the PTE then panic.
|
||||
*/
|
||||
if (frame->tf_err & PGEX_RSV) {
|
||||
trap_fatal(frame, eva);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* PGEX_I is defined only if the execute disable bit capability is
|
||||
* supported and enabled.
|
||||
@ -822,10 +830,11 @@ trap_fatal(frame, eva)
|
||||
#endif
|
||||
if (type == T_PAGEFLT) {
|
||||
printf("fault virtual address = 0x%lx\n", eva);
|
||||
printf("fault code = %s %s %s, %s\n",
|
||||
printf("fault code = %s %s %s%s, %s\n",
|
||||
code & PGEX_U ? "user" : "supervisor",
|
||||
code & PGEX_W ? "write" : "read",
|
||||
code & PGEX_I ? "instruction" : "data",
|
||||
code & PGEX_RSV ? " rsv" : "",
|
||||
code & PGEX_P ? "protection violation" : "page not present");
|
||||
}
|
||||
printf("instruction pointer = 0x%lx:0x%lx\n",
|
||||
|
@ -62,7 +62,8 @@
|
||||
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
|
||||
uint64_t pc_dbreg[16]; /* ddb debugging regs */ \
|
||||
int pc_dbreg_cmd; /* ddb debugging reg cmd */ \
|
||||
char __pad[161] /* be divisor of PAGE_SIZE \
|
||||
u_int pc_vcpu_id; /* Xen vCPU ID */ \
|
||||
char __pad[157] /* be divisor of PAGE_SIZE \
|
||||
after cache alignment */
|
||||
|
||||
#define PC_DBREG_CMD_NONE 0
|
||||
|
@ -50,41 +50,74 @@
|
||||
* of the fields not present here and there, depending on a lot of things.
|
||||
*/
|
||||
/* ---- Intel Nomenclature ---- */
|
||||
#define PG_V 0x001 /* P Valid */
|
||||
#define PG_RW 0x002 /* R/W Read/Write */
|
||||
#define PG_U 0x004 /* U/S User/Supervisor */
|
||||
#define PG_NC_PWT 0x008 /* PWT Write through */
|
||||
#define PG_NC_PCD 0x010 /* PCD Cache disable */
|
||||
#define PG_A 0x020 /* A Accessed */
|
||||
#define PG_M 0x040 /* D Dirty */
|
||||
#define PG_PS 0x080 /* PS Page size (0=4k,1=2M) */
|
||||
#define PG_PTE_PAT 0x080 /* PAT PAT index */
|
||||
#define PG_G 0x100 /* G Global */
|
||||
#define PG_AVAIL1 0x200 /* / Available for system */
|
||||
#define PG_AVAIL2 0x400 /* < programmers use */
|
||||
#define PG_AVAIL3 0x800 /* \ */
|
||||
#define PG_PDE_PAT 0x1000 /* PAT PAT index */
|
||||
#define PG_NX (1ul<<63) /* No-execute */
|
||||
|
||||
|
||||
/* Our various interpretations of the above */
|
||||
#define PG_W PG_AVAIL1 /* "Wired" pseudoflag */
|
||||
#define PG_MANAGED PG_AVAIL2
|
||||
#define PG_FRAME (0x000ffffffffff000ul)
|
||||
#define PG_PS_FRAME (0x000fffffffe00000ul)
|
||||
#define PG_PROT (PG_RW|PG_U) /* all protection bits . */
|
||||
#define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */
|
||||
#define X86_PG_V 0x001 /* P Valid */
|
||||
#define X86_PG_RW 0x002 /* R/W Read/Write */
|
||||
#define X86_PG_U 0x004 /* U/S User/Supervisor */
|
||||
#define X86_PG_NC_PWT 0x008 /* PWT Write through */
|
||||
#define X86_PG_NC_PCD 0x010 /* PCD Cache disable */
|
||||
#define X86_PG_A 0x020 /* A Accessed */
|
||||
#define X86_PG_M 0x040 /* D Dirty */
|
||||
#define X86_PG_PS 0x080 /* PS Page size (0=4k,1=2M) */
|
||||
#define X86_PG_PTE_PAT 0x080 /* PAT PAT index */
|
||||
#define X86_PG_G 0x100 /* G Global */
|
||||
#define X86_PG_AVAIL1 0x200 /* / Available for system */
|
||||
#define X86_PG_AVAIL2 0x400 /* < programmers use */
|
||||
#define X86_PG_AVAIL3 0x800 /* \ */
|
||||
#define X86_PG_PDE_PAT 0x1000 /* PAT PAT index */
|
||||
#define X86_PG_NX (1ul<<63) /* No-execute */
|
||||
#define X86_PG_AVAIL(x) (1ul << (x))
|
||||
|
||||
/* Page level cache control fields used to determine the PAT type */
|
||||
#define PG_PDE_CACHE (PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD)
|
||||
#define PG_PTE_CACHE (PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD)
|
||||
#define X86_PG_PDE_CACHE (X86_PG_PDE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
|
||||
#define X86_PG_PTE_CACHE (X86_PG_PTE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
|
||||
|
||||
/*
|
||||
* Intel extended page table (EPT) bit definitions.
|
||||
*/
|
||||
#define EPT_PG_READ 0x001 /* R Read */
|
||||
#define EPT_PG_WRITE 0x002 /* W Write */
|
||||
#define EPT_PG_EXECUTE 0x004 /* X Execute */
|
||||
#define EPT_PG_IGNORE_PAT 0x040 /* IPAT Ignore PAT */
|
||||
#define EPT_PG_PS 0x080 /* PS Page size */
|
||||
#define EPT_PG_A 0x100 /* A Accessed */
|
||||
#define EPT_PG_M 0x200 /* D Dirty */
|
||||
#define EPT_PG_MEMORY_TYPE(x) ((x) << 3) /* MT Memory Type */
|
||||
|
||||
/*
|
||||
* Define the PG_xx macros in terms of the bits on x86 PTEs.
|
||||
*/
|
||||
#define PG_V X86_PG_V
|
||||
#define PG_RW X86_PG_RW
|
||||
#define PG_U X86_PG_U
|
||||
#define PG_NC_PWT X86_PG_NC_PWT
|
||||
#define PG_NC_PCD X86_PG_NC_PCD
|
||||
#define PG_A X86_PG_A
|
||||
#define PG_M X86_PG_M
|
||||
#define PG_PS X86_PG_PS
|
||||
#define PG_PTE_PAT X86_PG_PTE_PAT
|
||||
#define PG_G X86_PG_G
|
||||
#define PG_AVAIL1 X86_PG_AVAIL1
|
||||
#define PG_AVAIL2 X86_PG_AVAIL2
|
||||
#define PG_AVAIL3 X86_PG_AVAIL3
|
||||
#define PG_PDE_PAT X86_PG_PDE_PAT
|
||||
#define PG_NX X86_PG_NX
|
||||
#define PG_PDE_CACHE X86_PG_PDE_CACHE
|
||||
#define PG_PTE_CACHE X86_PG_PTE_CACHE
|
||||
|
||||
/* Our various interpretations of the above */
|
||||
#define PG_W X86_PG_AVAIL3 /* "Wired" pseudoflag */
|
||||
#define PG_MANAGED X86_PG_AVAIL2
|
||||
#define EPT_PG_EMUL_V X86_PG_AVAIL(52)
|
||||
#define EPT_PG_EMUL_RW X86_PG_AVAIL(53)
|
||||
#define PG_FRAME (0x000ffffffffff000ul)
|
||||
#define PG_PS_FRAME (0x000fffffffe00000ul)
|
||||
|
||||
/*
|
||||
* Promotion to a 2MB (PDE) page mapping requires that the corresponding 4KB
|
||||
* (PTE) page mappings have identical settings for the following fields:
|
||||
*/
|
||||
#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \
|
||||
PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V)
|
||||
#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \
|
||||
PG_M | PG_A | PG_U | PG_RW | PG_V)
|
||||
|
||||
/*
|
||||
* Page Protection Exception bits
|
||||
@ -96,6 +129,28 @@
|
||||
#define PGEX_RSV 0x08 /* reserved PTE field is non-zero */
|
||||
#define PGEX_I 0x10 /* during an instruction fetch */
|
||||
|
||||
/*
|
||||
* undef the PG_xx macros that define bits in the regular x86 PTEs that
|
||||
* have a different position in nested PTEs. This is done when compiling
|
||||
* code that needs to be aware of the differences between regular x86 and
|
||||
* nested PTEs.
|
||||
*
|
||||
* The appropriate bitmask will be calculated at runtime based on the pmap
|
||||
* type.
|
||||
*/
|
||||
#ifdef AMD64_NPT_AWARE
|
||||
#undef PG_AVAIL1 /* X86_PG_AVAIL1 aliases with EPT_PG_M */
|
||||
#undef PG_G
|
||||
#undef PG_A
|
||||
#undef PG_M
|
||||
#undef PG_PDE_PAT
|
||||
#undef PG_PDE_CACHE
|
||||
#undef PG_PTE_PAT
|
||||
#undef PG_PTE_CACHE
|
||||
#undef PG_RW
|
||||
#undef PG_V
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Pte related macros. This is complicated by having to deal with
|
||||
* the sign extension of the 48th bit.
|
||||
@ -256,6 +311,11 @@ struct pmap {
|
||||
int pm_flags;
|
||||
};
|
||||
|
||||
/* flags */
|
||||
#define PMAP_PDE_SUPERPAGE (1 << 0) /* supports 2MB superpages */
|
||||
#define PMAP_EMULATE_AD_BITS (1 << 1) /* needs A/D bits emulation */
|
||||
#define PMAP_SUPPORTS_EXEC_ONLY (1 << 2) /* execute only mappings ok */
|
||||
|
||||
typedef struct pmap *pmap_t;
|
||||
|
||||
#ifdef _KERNEL
|
||||
@ -272,6 +332,9 @@ extern struct pmap kernel_pmap_store;
|
||||
#define PMAP_MTX(pmap) (&(pmap)->pm_mtx)
|
||||
#define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx)
|
||||
#define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx)
|
||||
|
||||
int pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags);
|
||||
int pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype);
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -330,7 +393,7 @@ void pmap_invalidate_all(pmap_t);
|
||||
void pmap_invalidate_cache(void);
|
||||
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
|
||||
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
|
||||
|
||||
void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num);
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* !LOCORE */
|
||||
|
@ -39,19 +39,18 @@ struct seg_desc;
|
||||
struct vm_exit;
|
||||
struct vm_run;
|
||||
struct vlapic;
|
||||
struct vmspace;
|
||||
struct vm_object;
|
||||
struct pmap;
|
||||
|
||||
enum x2apic_state;
|
||||
|
||||
typedef int (*vmm_init_func_t)(void);
|
||||
typedef int (*vmm_cleanup_func_t)(void);
|
||||
typedef void * (*vmi_init_func_t)(struct vm *vm); /* instance specific apis */
|
||||
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip);
|
||||
typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
|
||||
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
|
||||
struct pmap *pmap);
|
||||
typedef void (*vmi_cleanup_func_t)(void *vmi);
|
||||
typedef int (*vmi_mmap_set_func_t)(void *vmi, vm_paddr_t gpa,
|
||||
vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, int prot,
|
||||
boolean_t superpages_ok);
|
||||
typedef vm_paddr_t (*vmi_mmap_get_func_t)(void *vmi, vm_paddr_t gpa);
|
||||
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
|
||||
uint64_t *retval);
|
||||
typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num,
|
||||
@ -65,6 +64,8 @@ typedef int (*vmi_inject_event_t)(void *vmi, int vcpu,
|
||||
uint32_t code, int code_valid);
|
||||
typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
|
||||
typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
|
||||
typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
|
||||
typedef void (*vmi_vmspace_free)(struct vmspace *vmspace);
|
||||
|
||||
struct vmm_ops {
|
||||
vmm_init_func_t init; /* module wide initialization */
|
||||
@ -73,8 +74,6 @@ struct vmm_ops {
|
||||
vmi_init_func_t vminit; /* vm-specific initialization */
|
||||
vmi_run_func_t vmrun;
|
||||
vmi_cleanup_func_t vmcleanup;
|
||||
vmi_mmap_set_func_t vmmmap_set;
|
||||
vmi_mmap_get_func_t vmmmap_get;
|
||||
vmi_get_register_t vmgetreg;
|
||||
vmi_set_register_t vmsetreg;
|
||||
vmi_get_desc_t vmgetdesc;
|
||||
@ -82,6 +81,8 @@ struct vmm_ops {
|
||||
vmi_inject_event_t vminject;
|
||||
vmi_get_cap_t vmgetcap;
|
||||
vmi_set_cap_t vmsetcap;
|
||||
vmi_vmspace_alloc vmspace_alloc;
|
||||
vmi_vmspace_free vmspace_free;
|
||||
};
|
||||
|
||||
extern struct vmm_ops vmm_ops_intel;
|
||||
@ -93,9 +94,14 @@ const char *vm_name(struct vm *vm);
|
||||
int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
|
||||
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
|
||||
vm_paddr_t vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t size);
|
||||
void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
|
||||
void **cookie);
|
||||
void vm_gpa_release(void *cookie);
|
||||
int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
|
||||
struct vm_memory_segment *seg);
|
||||
int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
|
||||
vm_offset_t *offset, struct vm_object **object);
|
||||
boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
|
||||
int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
|
||||
int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
|
||||
int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
|
||||
@ -130,8 +136,9 @@ void *vm_iommu_domain(struct vm *vm);
|
||||
|
||||
enum vcpu_state {
|
||||
VCPU_IDLE,
|
||||
VCPU_FROZEN,
|
||||
VCPU_RUNNING,
|
||||
VCPU_CANNOT_RUN,
|
||||
VCPU_SLEEPING,
|
||||
};
|
||||
|
||||
int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state);
|
||||
@ -145,7 +152,9 @@ vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
|
||||
|
||||
void *vcpu_stats(struct vm *vm, int vcpu);
|
||||
void vm_interrupt_hostcpu(struct vm *vm, int vcpu);
|
||||
|
||||
struct vmspace *vm_get_vmspace(struct vm *vm);
|
||||
int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
|
||||
int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
|
||||
#endif /* KERNEL */
|
||||
|
||||
#include <machine/vmm_instruction_emul.h>
|
||||
@ -247,6 +256,7 @@ enum vm_exitcode {
|
||||
VM_EXITCODE_MTRAP,
|
||||
VM_EXITCODE_PAUSE,
|
||||
VM_EXITCODE_PAGING,
|
||||
VM_EXITCODE_INST_EMUL,
|
||||
VM_EXITCODE_SPINUP_AP,
|
||||
VM_EXITCODE_MAX
|
||||
};
|
||||
@ -266,8 +276,15 @@ struct vm_exit {
|
||||
} inout;
|
||||
struct {
|
||||
uint64_t gpa;
|
||||
struct vie vie;
|
||||
int fault_type;
|
||||
int protection;
|
||||
} paging;
|
||||
struct {
|
||||
uint64_t gpa;
|
||||
uint64_t gla;
|
||||
uint64_t cr3;
|
||||
struct vie vie;
|
||||
} inst_emul;
|
||||
/*
|
||||
* VMX specific payload. Used when there is no "better"
|
||||
* exitcode to represent the VM-exit.
|
||||
|
@ -36,7 +36,8 @@ int vmmdev_cleanup(void);
|
||||
|
||||
struct vm_memory_segment {
|
||||
vm_paddr_t gpa; /* in */
|
||||
size_t len; /* in */
|
||||
size_t len;
|
||||
int wired;
|
||||
};
|
||||
|
||||
struct vm_register {
|
||||
@ -135,6 +136,12 @@ struct vm_x2apic {
|
||||
enum x2apic_state state;
|
||||
};
|
||||
|
||||
struct vm_gpa_pte {
|
||||
uint64_t gpa; /* in */
|
||||
uint64_t pte[4]; /* out */
|
||||
int ptenum;
|
||||
};
|
||||
|
||||
enum {
|
||||
/* general routines */
|
||||
IOCNUM_ABIVERS = 0,
|
||||
@ -145,6 +152,7 @@ enum {
|
||||
/* memory apis */
|
||||
IOCNUM_MAP_MEMORY = 10,
|
||||
IOCNUM_GET_MEMORY_SEG = 11,
|
||||
IOCNUM_GET_GPA_PMAP = 12,
|
||||
|
||||
/* register/state accessors */
|
||||
IOCNUM_SET_REGISTER = 20,
|
||||
@ -215,4 +223,6 @@ enum {
|
||||
_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
|
||||
#define VM_GET_X2APIC_STATE \
|
||||
_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
|
||||
#define VM_GET_GPA_PMAP \
|
||||
_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
|
||||
#endif
|
||||
|
@ -102,11 +102,15 @@ int vmm_emulate_instruction(void *vm, int cpuid, uint64_t gpa, struct vie *vie,
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* APIs to fetch and decode the instruction from nested page fault handler.
|
||||
*
|
||||
* 'vie' must be initialized before calling 'vmm_fetch_instruction()'
|
||||
*/
|
||||
int vmm_fetch_instruction(struct vm *vm, int cpuid,
|
||||
uint64_t rip, int inst_length, uint64_t cr3,
|
||||
struct vie *vie);
|
||||
|
||||
void vie_init(struct vie *vie);
|
||||
|
||||
/*
|
||||
* Decode the instruction fetched into 'vie' so it can be emulated.
|
||||
*
|
||||
|
@ -54,7 +54,7 @@ amdv_cleanup(void)
|
||||
}
|
||||
|
||||
static void *
|
||||
amdv_vminit(struct vm *vm)
|
||||
amdv_vminit(struct vm *vm, struct pmap *pmap)
|
||||
{
|
||||
|
||||
printf("amdv_vminit: not implemented\n");
|
||||
@ -62,7 +62,7 @@ amdv_vminit(struct vm *vm)
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_vmrun(void *arg, int vcpu, register_t rip)
|
||||
amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap)
|
||||
{
|
||||
|
||||
printf("amdv_vmrun: not implemented\n");
|
||||
@ -77,23 +77,6 @@ amdv_vmcleanup(void *arg)
|
||||
return;
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, int prot, boolean_t spok)
|
||||
{
|
||||
|
||||
printf("amdv_vmmmap_set: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static vm_paddr_t
|
||||
amdv_vmmmap_get(void *arg, vm_paddr_t gpa)
|
||||
{
|
||||
|
||||
printf("amdv_vmmmap_get: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
|
||||
{
|
||||
@ -151,21 +134,37 @@ amdv_setcap(void *arg, int vcpu, int type, int val)
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static struct vmspace *
|
||||
amdv_vmspace_alloc(vm_offset_t min, vm_offset_t max)
|
||||
{
|
||||
|
||||
printf("amdv_vmspace_alloc: not implemented\n");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
amdv_vmspace_free(struct vmspace *vmspace)
|
||||
{
|
||||
|
||||
printf("amdv_vmspace_free: not implemented\n");
|
||||
return;
|
||||
}
|
||||
|
||||
struct vmm_ops vmm_ops_amd = {
|
||||
amdv_init,
|
||||
amdv_cleanup,
|
||||
amdv_vminit,
|
||||
amdv_vmrun,
|
||||
amdv_vmcleanup,
|
||||
amdv_vmmmap_set,
|
||||
amdv_vmmmap_get,
|
||||
amdv_getreg,
|
||||
amdv_setreg,
|
||||
amdv_getdesc,
|
||||
amdv_setdesc,
|
||||
amdv_inject_event,
|
||||
amdv_getcap,
|
||||
amdv_setcap
|
||||
amdv_setcap,
|
||||
amdv_vmspace_alloc,
|
||||
amdv_vmspace_free,
|
||||
};
|
||||
|
||||
static int
|
||||
|
@ -29,32 +29,31 @@
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/smp.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
|
||||
#include <machine/param.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
#include <vm/vm_extern.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
|
||||
#include "vmx_cpufunc.h"
|
||||
#include "vmx_msr.h"
|
||||
#include "vmx.h"
|
||||
#include "ept.h"
|
||||
|
||||
#define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0))
|
||||
#define EPT_PWL4(cap) ((cap) & (1UL << 6))
|
||||
#define EPT_MEMORY_TYPE_WB(cap) ((cap) & (1UL << 14))
|
||||
#define EPT_PDE_SUPERPAGE(cap) ((cap) & (1UL << 16)) /* 2MB pages */
|
||||
#define EPT_PDPTE_SUPERPAGE(cap) ((cap) & (1UL << 17)) /* 1GB pages */
|
||||
#define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32))
|
||||
#define INVEPT_SUPPORTED(cap) ((cap) & (1UL << 20))
|
||||
#define AD_BITS_SUPPORTED(cap) ((cap) & (1UL << 21))
|
||||
#define INVVPID_SUPPORTED(cap) ((cap) & (1UL << 32))
|
||||
|
||||
#define INVVPID_ALL_TYPES_MASK 0xF0000000000UL
|
||||
#define INVVPID_ALL_TYPES_SUPPORTED(cap) \
|
||||
@ -64,28 +63,22 @@ __FBSDID("$FreeBSD$");
|
||||
#define INVEPT_ALL_TYPES_SUPPORTED(cap) \
|
||||
(((cap) & INVEPT_ALL_TYPES_MASK) == INVEPT_ALL_TYPES_MASK)
|
||||
|
||||
#define EPT_PG_RD (1 << 0)
|
||||
#define EPT_PG_WR (1 << 1)
|
||||
#define EPT_PG_EX (1 << 2)
|
||||
#define EPT_PG_MEMORY_TYPE(x) ((x) << 3)
|
||||
#define EPT_PG_IGNORE_PAT (1 << 6)
|
||||
#define EPT_PG_SUPERPAGE (1 << 7)
|
||||
#define EPT_PWLEVELS 4 /* page walk levels */
|
||||
#define EPT_ENABLE_AD_BITS (1 << 6)
|
||||
|
||||
#define EPT_ADDR_MASK ((uint64_t)-1 << 12)
|
||||
SYSCTL_DECL(_hw_vmm);
|
||||
SYSCTL_NODE(_hw_vmm, OID_AUTO, ept, CTLFLAG_RW, NULL, NULL);
|
||||
|
||||
MALLOC_DECLARE(M_VMX);
|
||||
static int ept_enable_ad_bits;
|
||||
|
||||
static uint64_t page_sizes_mask;
|
||||
|
||||
/*
|
||||
* Set this to 1 to have the EPT tables respect the guest PAT settings
|
||||
*/
|
||||
static int ept_pat_passthru;
|
||||
static int ept_pmap_flags;
|
||||
SYSCTL_INT(_hw_vmm_ept, OID_AUTO, pmap_flags, CTLFLAG_RD,
|
||||
&ept_pmap_flags, 0, NULL);
|
||||
|
||||
int
|
||||
ept_init(void)
|
||||
{
|
||||
int page_shift;
|
||||
int use_hw_ad_bits, use_superpages, use_exec_only;
|
||||
uint64_t cap;
|
||||
|
||||
cap = rdmsr(MSR_VMX_EPT_VPID_CAP);
|
||||
@ -105,17 +98,22 @@ ept_init(void)
|
||||
!INVEPT_ALL_TYPES_SUPPORTED(cap))
|
||||
return (EINVAL);
|
||||
|
||||
/* Set bits in 'page_sizes_mask' for each valid page size */
|
||||
page_shift = PAGE_SHIFT;
|
||||
page_sizes_mask = 1UL << page_shift; /* 4KB page */
|
||||
use_superpages = 1;
|
||||
TUNABLE_INT_FETCH("hw.vmm.ept.use_superpages", &use_superpages);
|
||||
if (use_superpages && EPT_PDE_SUPERPAGE(cap))
|
||||
ept_pmap_flags |= PMAP_PDE_SUPERPAGE; /* 2MB superpage */
|
||||
|
||||
page_shift += 9;
|
||||
if (EPT_PDE_SUPERPAGE(cap))
|
||||
page_sizes_mask |= 1UL << page_shift; /* 2MB superpage */
|
||||
use_hw_ad_bits = 1;
|
||||
TUNABLE_INT_FETCH("hw.vmm.ept.use_hw_ad_bits", &use_hw_ad_bits);
|
||||
if (use_hw_ad_bits && AD_BITS_SUPPORTED(cap))
|
||||
ept_enable_ad_bits = 1;
|
||||
else
|
||||
ept_pmap_flags |= PMAP_EMULATE_AD_BITS;
|
||||
|
||||
page_shift += 9;
|
||||
if (EPT_PDPTE_SUPERPAGE(cap))
|
||||
page_sizes_mask |= 1UL << page_shift; /* 1GB superpage */
|
||||
use_exec_only = 1;
|
||||
TUNABLE_INT_FETCH("hw.vmm.ept.use_exec_only", &use_exec_only);
|
||||
if (use_exec_only && EPT_SUPPORTS_EXEC_ONLY(cap))
|
||||
ept_pmap_flags |= PMAP_SUPPORTS_EXEC_ONLY;
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -154,233 +152,6 @@ ept_dump(uint64_t *ptp, int nlevels)
|
||||
}
|
||||
#endif
|
||||
|
||||
static size_t
|
||||
ept_create_mapping(uint64_t *ptp, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, vm_prot_t prot, boolean_t spok)
|
||||
{
|
||||
int spshift, ptpshift, ptpindex, nlevels;
|
||||
|
||||
/*
|
||||
* Compute the size of the mapping that we can accomodate.
|
||||
*
|
||||
* This is based on three factors:
|
||||
* - super page sizes supported by the processor
|
||||
* - alignment of the region starting at 'gpa' and 'hpa'
|
||||
* - length of the region 'len'
|
||||
*/
|
||||
spshift = PAGE_SHIFT;
|
||||
if (spok)
|
||||
spshift += (EPT_PWLEVELS - 1) * 9;
|
||||
while (spshift >= PAGE_SHIFT) {
|
||||
uint64_t spsize = 1UL << spshift;
|
||||
if ((page_sizes_mask & spsize) != 0 &&
|
||||
(gpa & (spsize - 1)) == 0 &&
|
||||
(hpa & (spsize - 1)) == 0 &&
|
||||
length >= spsize) {
|
||||
break;
|
||||
}
|
||||
spshift -= 9;
|
||||
}
|
||||
|
||||
if (spshift < PAGE_SHIFT) {
|
||||
panic("Invalid spshift for gpa 0x%016lx, hpa 0x%016lx, "
|
||||
"length 0x%016lx, page_sizes_mask 0x%016lx",
|
||||
gpa, hpa, length, page_sizes_mask);
|
||||
}
|
||||
|
||||
nlevels = EPT_PWLEVELS;
|
||||
while (--nlevels >= 0) {
|
||||
ptpshift = PAGE_SHIFT + nlevels * 9;
|
||||
ptpindex = (gpa >> ptpshift) & 0x1FF;
|
||||
|
||||
/* We have reached the leaf mapping */
|
||||
if (spshift >= ptpshift)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We are working on a non-leaf page table page.
|
||||
*
|
||||
* Create the next level page table page if necessary and point
|
||||
* to it from the current page table.
|
||||
*/
|
||||
if (ptp[ptpindex] == 0) {
|
||||
void *nlp = malloc(PAGE_SIZE, M_VMX, M_WAITOK | M_ZERO);
|
||||
ptp[ptpindex] = vtophys(nlp);
|
||||
ptp[ptpindex] |= EPT_PG_RD | EPT_PG_WR | EPT_PG_EX;
|
||||
}
|
||||
|
||||
/* Work our way down to the next level page table page */
|
||||
ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & EPT_ADDR_MASK);
|
||||
}
|
||||
|
||||
if ((gpa & ((1UL << ptpshift) - 1)) != 0) {
|
||||
panic("ept_create_mapping: gpa 0x%016lx and ptpshift %d "
|
||||
"mismatch\n", gpa, ptpshift);
|
||||
}
|
||||
|
||||
if (prot != VM_PROT_NONE) {
|
||||
/* Do the mapping */
|
||||
ptp[ptpindex] = hpa;
|
||||
|
||||
/* Apply the access controls */
|
||||
if (prot & VM_PROT_READ)
|
||||
ptp[ptpindex] |= EPT_PG_RD;
|
||||
if (prot & VM_PROT_WRITE)
|
||||
ptp[ptpindex] |= EPT_PG_WR;
|
||||
if (prot & VM_PROT_EXECUTE)
|
||||
ptp[ptpindex] |= EPT_PG_EX;
|
||||
|
||||
/*
|
||||
* By default the PAT type is ignored - this appears to
|
||||
* be how other hypervisors handle EPT. Allow this to be
|
||||
* overridden.
|
||||
*/
|
||||
ptp[ptpindex] |= EPT_PG_MEMORY_TYPE(attr);
|
||||
if (!ept_pat_passthru)
|
||||
ptp[ptpindex] |= EPT_PG_IGNORE_PAT;
|
||||
|
||||
if (nlevels > 0)
|
||||
ptp[ptpindex] |= EPT_PG_SUPERPAGE;
|
||||
} else {
|
||||
/* Remove the mapping */
|
||||
ptp[ptpindex] = 0;
|
||||
}
|
||||
|
||||
return (1UL << ptpshift);
|
||||
}
|
||||
|
||||
static vm_paddr_t
|
||||
ept_lookup_mapping(uint64_t *ptp, vm_paddr_t gpa)
|
||||
{
|
||||
int nlevels, ptpshift, ptpindex;
|
||||
uint64_t ptpval, hpabase, pgmask;
|
||||
|
||||
nlevels = EPT_PWLEVELS;
|
||||
while (--nlevels >= 0) {
|
||||
ptpshift = PAGE_SHIFT + nlevels * 9;
|
||||
ptpindex = (gpa >> ptpshift) & 0x1FF;
|
||||
|
||||
ptpval = ptp[ptpindex];
|
||||
|
||||
/* Cannot make progress beyond this point */
|
||||
if ((ptpval & (EPT_PG_RD | EPT_PG_WR | EPT_PG_EX)) == 0)
|
||||
break;
|
||||
|
||||
if (nlevels == 0 || (ptpval & EPT_PG_SUPERPAGE)) {
|
||||
pgmask = (1UL << ptpshift) - 1;
|
||||
hpabase = ptpval & ~pgmask;
|
||||
return (hpabase | (gpa & pgmask));
|
||||
}
|
||||
|
||||
/* Work our way down to the next level page table page */
|
||||
ptp = (uint64_t *)PHYS_TO_DMAP(ptpval & EPT_ADDR_MASK);
|
||||
}
|
||||
|
||||
return ((vm_paddr_t)-1);
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pt_entry(pt_entry_t pte)
|
||||
{
|
||||
if (pte == 0)
|
||||
return;
|
||||
|
||||
/* sanity check */
|
||||
if ((pte & EPT_PG_SUPERPAGE) != 0)
|
||||
panic("ept_free_pt_entry: pte cannot have superpage bit");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pd_entry(pd_entry_t pde)
|
||||
{
|
||||
pt_entry_t *pt;
|
||||
int i;
|
||||
|
||||
if (pde == 0)
|
||||
return;
|
||||
|
||||
if ((pde & EPT_PG_SUPERPAGE) == 0) {
|
||||
pt = (pt_entry_t *)PHYS_TO_DMAP(pde & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPTEPG; i++)
|
||||
ept_free_pt_entry(pt[i]);
|
||||
free(pt, M_VMX); /* free the page table page */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pdp_entry(pdp_entry_t pdpe)
|
||||
{
|
||||
pd_entry_t *pd;
|
||||
int i;
|
||||
|
||||
if (pdpe == 0)
|
||||
return;
|
||||
|
||||
if ((pdpe & EPT_PG_SUPERPAGE) == 0) {
|
||||
pd = (pd_entry_t *)PHYS_TO_DMAP(pdpe & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPDEPG; i++)
|
||||
ept_free_pd_entry(pd[i]);
|
||||
free(pd, M_VMX); /* free the page directory page */
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ept_free_pml4_entry(pml4_entry_t pml4e)
|
||||
{
|
||||
pdp_entry_t *pdp;
|
||||
int i;
|
||||
|
||||
if (pml4e == 0)
|
||||
return;
|
||||
|
||||
if ((pml4e & EPT_PG_SUPERPAGE) == 0) {
|
||||
pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4e & EPT_ADDR_MASK);
|
||||
for (i = 0; i < NPDPEPG; i++)
|
||||
ept_free_pdp_entry(pdp[i]);
|
||||
free(pdp, M_VMX); /* free the page directory ptr page */
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ept_vmcleanup(struct vmx *vmx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NPML4EPG; i++)
|
||||
ept_free_pml4_entry(vmx->pml4ept[i]);
|
||||
}
|
||||
|
||||
int
|
||||
ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t len,
|
||||
vm_memattr_t attr, int prot, boolean_t spok)
|
||||
{
|
||||
size_t n;
|
||||
struct vmx *vmx = arg;
|
||||
|
||||
while (len > 0) {
|
||||
n = ept_create_mapping(vmx->pml4ept, gpa, hpa, len, attr,
|
||||
prot, spok);
|
||||
len -= n;
|
||||
gpa += n;
|
||||
hpa += n;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
ept_vmmmap_get(void *arg, vm_paddr_t gpa)
|
||||
{
|
||||
vm_paddr_t hpa;
|
||||
struct vmx *vmx;
|
||||
|
||||
vmx = arg;
|
||||
hpa = ept_lookup_mapping(vmx->pml4ept, gpa);
|
||||
return (hpa);
|
||||
}
|
||||
|
||||
static void
|
||||
invept_single_context(void *arg)
|
||||
{
|
||||
@ -390,11 +161,44 @@ invept_single_context(void *arg)
|
||||
}
|
||||
|
||||
void
|
||||
ept_invalidate_mappings(u_long pml4ept)
|
||||
ept_invalidate_mappings(u_long eptp)
|
||||
{
|
||||
struct invept_desc invept_desc = { 0 };
|
||||
|
||||
invept_desc.eptp = EPTP(pml4ept);
|
||||
invept_desc.eptp = eptp;
|
||||
|
||||
smp_rendezvous(NULL, invept_single_context, NULL, &invept_desc);
|
||||
}
|
||||
|
||||
static int
|
||||
ept_pinit(pmap_t pmap)
|
||||
{
|
||||
|
||||
return (pmap_pinit_type(pmap, PT_EPT, ept_pmap_flags));
|
||||
}
|
||||
|
||||
struct vmspace *
|
||||
ept_vmspace_alloc(vm_offset_t min, vm_offset_t max)
|
||||
{
|
||||
|
||||
return (vmspace_alloc(min, max, ept_pinit));
|
||||
}
|
||||
|
||||
void
|
||||
ept_vmspace_free(struct vmspace *vmspace)
|
||||
{
|
||||
|
||||
vmspace_free(vmspace);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
eptp(uint64_t pml4)
|
||||
{
|
||||
uint64_t eptp_val;
|
||||
|
||||
eptp_val = pml4 | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK;
|
||||
if (ept_enable_ad_bits)
|
||||
eptp_val |= EPT_ENABLE_AD_BITS;
|
||||
|
||||
return (eptp_val);
|
||||
}
|
||||
|
@ -31,13 +31,9 @@
|
||||
|
||||
struct vmx;
|
||||
|
||||
#define EPT_PWLEVELS 4 /* page walk levels */
|
||||
#define EPTP(pml4) ((pml4) | (EPT_PWLEVELS - 1) << 3 | PAT_WRITE_BACK)
|
||||
|
||||
int ept_init(void);
|
||||
int ept_vmmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, size_t length,
|
||||
vm_memattr_t attr, int prot, boolean_t allow_superpage_mappings);
|
||||
vm_paddr_t ept_vmmmap_get(void *arg, vm_paddr_t gpa);
|
||||
void ept_invalidate_mappings(u_long ept_pml4);
|
||||
void ept_vmcleanup(struct vmx *vmx);
|
||||
void ept_invalidate_mappings(u_long eptp);
|
||||
struct vmspace *ept_vmspace_alloc(vm_offset_t min, vm_offset_t max);
|
||||
void ept_vmspace_free(struct vmspace *vmspace);
|
||||
uint64_t eptp(uint64_t pml4);
|
||||
#endif
|
||||
|
@ -318,14 +318,14 @@ done:
|
||||
|
||||
int
|
||||
vmcs_set_defaults(struct vmcs *vmcs,
|
||||
u_long host_rip, u_long host_rsp, u_long ept_pml4,
|
||||
u_long host_rip, u_long host_rsp, uint64_t eptp,
|
||||
uint32_t pinbased_ctls, uint32_t procbased_ctls,
|
||||
uint32_t procbased_ctls2, uint32_t exit_ctls,
|
||||
uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
|
||||
{
|
||||
int error, codesel, datasel, tsssel;
|
||||
u_long cr0, cr4, efer;
|
||||
uint64_t eptp, pat, fsbase, idtrbase;
|
||||
uint64_t pat, fsbase, idtrbase;
|
||||
uint32_t exc_bitmap;
|
||||
|
||||
codesel = vmm_get_host_codesel();
|
||||
@ -432,7 +432,6 @@ vmcs_set_defaults(struct vmcs *vmcs,
|
||||
goto done;
|
||||
|
||||
/* eptp */
|
||||
eptp = EPTP(ept_pml4);
|
||||
if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
|
||||
goto done;
|
||||
|
||||
|
@ -47,7 +47,7 @@ struct msr_entry {
|
||||
|
||||
int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
|
||||
int vmcs_set_defaults(struct vmcs *vmcs, u_long host_rip, u_long host_rsp,
|
||||
u_long ept_pml4,
|
||||
uint64_t eptp,
|
||||
uint32_t pinbased_ctls, uint32_t procbased_ctls,
|
||||
uint32_t procbased_ctls2, uint32_t exit_ctls,
|
||||
uint32_t entry_ctls, u_long msr_bitmap,
|
||||
@ -68,6 +68,8 @@ uint64_t vmcs_read(uint32_t encoding);
|
||||
#define vmcs_guest_cr3() vmcs_read(VMCS_GUEST_CR3)
|
||||
#define vmcs_gpa() vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)
|
||||
#define vmcs_gla() vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)
|
||||
#define vmcs_idt_vectoring_info() vmcs_read(VMCS_IDT_VECTORING_INFO)
|
||||
#define vmcs_idt_vectoring_err() vmcs_read(VMCS_IDT_VECTORING_ERROR)
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
@ -313,6 +315,12 @@ uint64_t vmcs_read(uint32_t encoding);
|
||||
#define VMCS_INTERRUPTION_INFO_HW_INTR (0 << 8)
|
||||
#define VMCS_INTERRUPTION_INFO_NMI (2 << 8)
|
||||
|
||||
/*
|
||||
* VMCS IDT-Vectoring information fields
|
||||
*/
|
||||
#define VMCS_IDT_VEC_VALID (1 << 31)
|
||||
#define VMCS_IDT_VEC_ERRCODE_VALID (1 << 11)
|
||||
|
||||
/*
|
||||
* VMCS Guest interruptibility field
|
||||
*/
|
||||
@ -332,6 +340,9 @@ uint64_t vmcs_read(uint32_t encoding);
|
||||
#define EPT_VIOLATION_DATA_READ (1UL << 0)
|
||||
#define EPT_VIOLATION_DATA_WRITE (1UL << 1)
|
||||
#define EPT_VIOLATION_INST_FETCH (1UL << 2)
|
||||
#define EPT_VIOLATION_GPA_READABLE (1UL << 3)
|
||||
#define EPT_VIOLATION_GPA_WRITEABLE (1UL << 4)
|
||||
#define EPT_VIOLATION_GPA_EXECUTABLE (1UL << 5)
|
||||
#define EPT_VIOLATION_GLA_VALID (1UL << 7)
|
||||
#define EPT_VIOLATION_XLAT_VALID (1UL << 8)
|
||||
|
||||
|
@ -49,8 +49,6 @@ __FBSDID("$FreeBSD$");
|
||||
#include <machine/specialreg.h>
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
#include <x86/apicreg.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_host.h"
|
||||
#include "vmm_lapic.h"
|
||||
@ -167,9 +165,6 @@ static int cap_pause_exit;
|
||||
static int cap_unrestricted_guest;
|
||||
static int cap_monitor_trap;
|
||||
|
||||
/* statistics */
|
||||
static VMM_STAT_INTEL(VMEXIT_HLT_IGNORED, "number of times hlt was ignored");
|
||||
|
||||
static struct unrhdr *vpid_unr;
|
||||
static u_int vpid_alloc_failed;
|
||||
SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
|
||||
@ -740,7 +735,7 @@ vmx_setup_cr_shadow(int which, struct vmcs *vmcs, uint32_t initial)
|
||||
#define vmx_setup_cr4_shadow(vmcs,init) vmx_setup_cr_shadow(4, (vmcs), (init))
|
||||
|
||||
static void *
|
||||
vmx_vminit(struct vm *vm)
|
||||
vmx_vminit(struct vm *vm, pmap_t pmap)
|
||||
{
|
||||
uint16_t vpid[VM_MAXCPU];
|
||||
int i, error, guest_msr_count;
|
||||
@ -753,6 +748,8 @@ vmx_vminit(struct vm *vm)
|
||||
}
|
||||
vmx->vm = vm;
|
||||
|
||||
vmx->eptp = eptp(vtophys((vm_offset_t)pmap->pm_pml4));
|
||||
|
||||
/*
|
||||
* Clean up EPTP-tagged guest physical and combined mappings
|
||||
*
|
||||
@ -762,7 +759,7 @@ vmx_vminit(struct vm *vm)
|
||||
*
|
||||
* Combined mappings for this EP4TA are also invalidated for all VPIDs.
|
||||
*/
|
||||
ept_invalidate_mappings(vtophys(vmx->pml4ept));
|
||||
ept_invalidate_mappings(vmx->eptp);
|
||||
|
||||
msr_bitmap_initialize(vmx->msr_bitmap);
|
||||
|
||||
@ -818,7 +815,7 @@ vmx_vminit(struct vm *vm)
|
||||
error = vmcs_set_defaults(&vmx->vmcs[i],
|
||||
(u_long)vmx_longjmp,
|
||||
(u_long)&vmx->ctx[i],
|
||||
vtophys(vmx->pml4ept),
|
||||
vmx->eptp,
|
||||
pinbased_ctls,
|
||||
procbased_ctls,
|
||||
procbased_ctls2,
|
||||
@ -856,6 +853,9 @@ vmx_vminit(struct vm *vm)
|
||||
error = vmx_setup_cr4_shadow(&vmx->vmcs[i], 0);
|
||||
if (error != 0)
|
||||
panic("vmx_setup_cr4_shadow %d", error);
|
||||
|
||||
vmx->ctx[i].pmap = pmap;
|
||||
vmx->ctx[i].eptp = vmx->eptp;
|
||||
}
|
||||
|
||||
return (vmx);
|
||||
@ -1281,21 +1281,49 @@ vmx_emulate_cr_access(struct vmx *vmx, int vcpu, uint64_t exitqual)
|
||||
}
|
||||
|
||||
static int
|
||||
vmx_ept_fault(struct vm *vm, int cpu,
|
||||
uint64_t gla, uint64_t gpa, uint64_t rip, int inst_length,
|
||||
uint64_t cr3, uint64_t ept_qual, struct vie *vie)
|
||||
ept_fault_type(uint64_t ept_qual)
|
||||
{
|
||||
int read, write, error;
|
||||
int fault_type;
|
||||
|
||||
/* EPT violation on an instruction fetch doesn't make sense here */
|
||||
if (ept_qual & EPT_VIOLATION_DATA_WRITE)
|
||||
fault_type = VM_PROT_WRITE;
|
||||
else if (ept_qual & EPT_VIOLATION_INST_FETCH)
|
||||
fault_type = VM_PROT_EXECUTE;
|
||||
else
|
||||
fault_type= VM_PROT_READ;
|
||||
|
||||
return (fault_type);
|
||||
}
|
||||
|
||||
static int
|
||||
ept_protection(uint64_t ept_qual)
|
||||
{
|
||||
int prot = 0;
|
||||
|
||||
if (ept_qual & EPT_VIOLATION_GPA_READABLE)
|
||||
prot |= VM_PROT_READ;
|
||||
if (ept_qual & EPT_VIOLATION_GPA_WRITEABLE)
|
||||
prot |= VM_PROT_WRITE;
|
||||
if (ept_qual & EPT_VIOLATION_GPA_EXECUTABLE)
|
||||
prot |= VM_PROT_EXECUTE;
|
||||
|
||||
return (prot);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
ept_emulation_fault(uint64_t ept_qual)
|
||||
{
|
||||
int read, write;
|
||||
|
||||
/* EPT fault on an instruction fetch doesn't make sense here */
|
||||
if (ept_qual & EPT_VIOLATION_INST_FETCH)
|
||||
return (UNHANDLED);
|
||||
return (FALSE);
|
||||
|
||||
/* EPT violation must be a read fault or a write fault */
|
||||
/* EPT fault must be a read fault or a write fault */
|
||||
read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0;
|
||||
write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0;
|
||||
if ((read | write) == 0)
|
||||
return (UNHANDLED);
|
||||
return (FALSE);
|
||||
|
||||
/*
|
||||
* The EPT violation must have been caused by accessing a
|
||||
@ -1304,26 +1332,10 @@ vmx_ept_fault(struct vm *vm, int cpu,
|
||||
*/
|
||||
if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 ||
|
||||
(ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) {
|
||||
return (UNHANDLED);
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
/* Fetch, decode and emulate the faulting instruction */
|
||||
if (vmm_fetch_instruction(vm, cpu, rip, inst_length, cr3, vie) != 0)
|
||||
return (UNHANDLED);
|
||||
|
||||
if (vmm_decode_instruction(vm, cpu, gla, vie) != 0)
|
||||
return (UNHANDLED);
|
||||
|
||||
/*
|
||||
* Check if this is a local apic access
|
||||
*/
|
||||
if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE)
|
||||
return (UNHANDLED);
|
||||
|
||||
error = vmm_emulate_instruction(vm, cpu, gpa, vie,
|
||||
lapic_mmio_read, lapic_mmio_write, 0);
|
||||
|
||||
return (error ? UNHANDLED : HANDLED);
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1332,18 +1344,47 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
int error, handled;
|
||||
struct vmcs *vmcs;
|
||||
struct vmxctx *vmxctx;
|
||||
uint32_t eax, ecx, edx;
|
||||
uint64_t qual, gla, gpa, cr3, intr_info;
|
||||
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
|
||||
uint64_t qual, gpa;
|
||||
|
||||
handled = 0;
|
||||
vmcs = &vmx->vmcs[vcpu];
|
||||
vmxctx = &vmx->ctx[vcpu];
|
||||
qual = vmexit->u.vmx.exit_qualification;
|
||||
reason = vmexit->u.vmx.exit_reason;
|
||||
vmexit->exitcode = VM_EXITCODE_BOGUS;
|
||||
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1);
|
||||
|
||||
switch (vmexit->u.vmx.exit_reason) {
|
||||
/*
|
||||
* VM exits that could be triggered during event injection on the
|
||||
* previous VM entry need to be handled specially by re-injecting
|
||||
* the event.
|
||||
*
|
||||
* See "Information for VM Exits During Event Delivery" in Intel SDM
|
||||
* for details.
|
||||
*/
|
||||
switch (reason) {
|
||||
case EXIT_REASON_EPT_FAULT:
|
||||
case EXIT_REASON_EPT_MISCONFIG:
|
||||
case EXIT_REASON_APIC:
|
||||
case EXIT_REASON_TASK_SWITCH:
|
||||
case EXIT_REASON_EXCEPTION:
|
||||
idtvec_info = vmcs_idt_vectoring_info();
|
||||
if (idtvec_info & VMCS_IDT_VEC_VALID) {
|
||||
idtvec_info &= ~(1 << 12); /* clear undefined bit */
|
||||
vmwrite(VMCS_ENTRY_INTR_INFO, idtvec_info);
|
||||
if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
|
||||
idtvec_err = vmcs_idt_vectoring_err();
|
||||
vmwrite(VMCS_ENTRY_EXCEPTION_ERROR, idtvec_err);
|
||||
}
|
||||
vmwrite(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (reason) {
|
||||
case EXIT_REASON_CR_ACCESS:
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1);
|
||||
handled = vmx_emulate_cr_access(vmx, vcpu, qual);
|
||||
@ -1374,19 +1415,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
break;
|
||||
case EXIT_REASON_HLT:
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
|
||||
/*
|
||||
* If there is an event waiting to be injected then there is
|
||||
* no need to 'hlt'.
|
||||
*/
|
||||
error = vmread(VMCS_ENTRY_INTR_INFO, &intr_info);
|
||||
if (error)
|
||||
panic("vmx_exit_process: vmread(intrinfo) %d", error);
|
||||
|
||||
if (intr_info & VMCS_INTERRUPTION_INFO_VALID) {
|
||||
handled = 1;
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT_IGNORED, 1);
|
||||
} else
|
||||
vmexit->exitcode = VM_EXITCODE_HLT;
|
||||
vmexit->exitcode = VM_EXITCODE_HLT;
|
||||
break;
|
||||
case EXIT_REASON_MTF:
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
|
||||
@ -1440,15 +1469,22 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
break;
|
||||
case EXIT_REASON_EPT_FAULT:
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EPT_FAULT, 1);
|
||||
gla = vmcs_gla();
|
||||
/*
|
||||
* If 'gpa' lies within the address space allocated to
|
||||
* memory then this must be a nested page fault otherwise
|
||||
* this must be an instruction that accesses MMIO space.
|
||||
*/
|
||||
gpa = vmcs_gpa();
|
||||
cr3 = vmcs_guest_cr3();
|
||||
handled = vmx_ept_fault(vmx->vm, vcpu, gla, gpa,
|
||||
vmexit->rip, vmexit->inst_length,
|
||||
cr3, qual, &vmexit->u.paging.vie);
|
||||
if (!handled) {
|
||||
if (vm_mem_allocated(vmx->vm, gpa)) {
|
||||
vmexit->exitcode = VM_EXITCODE_PAGING;
|
||||
vmexit->u.paging.gpa = gpa;
|
||||
vmexit->u.paging.fault_type = ept_fault_type(qual);
|
||||
vmexit->u.paging.protection = ept_protection(qual);
|
||||
} else if (ept_emulation_fault(qual)) {
|
||||
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
|
||||
vmexit->u.inst_emul.gpa = gpa;
|
||||
vmexit->u.inst_emul.gla = vmcs_gla();
|
||||
vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -1470,14 +1506,6 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
vm_exit_update_rip(vmexit);
|
||||
vmexit->rip += vmexit->inst_length;
|
||||
vmexit->inst_length = 0;
|
||||
|
||||
/*
|
||||
* Special case for spinning up an AP - exit to userspace to
|
||||
* give the controlling process a chance to intercept and
|
||||
* spin up a thread for the AP.
|
||||
*/
|
||||
if (vmexit->exitcode == VM_EXITCODE_SPINUP_AP)
|
||||
handled = 0;
|
||||
} else {
|
||||
if (vmexit->exitcode == VM_EXITCODE_BOGUS) {
|
||||
/*
|
||||
@ -1497,7 +1525,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
}
|
||||
|
||||
static int
|
||||
vmx_run(void *arg, int vcpu, register_t rip)
|
||||
vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap)
|
||||
{
|
||||
int error, vie, rc, handled, astpending;
|
||||
uint32_t exit_reason;
|
||||
@ -1505,7 +1533,7 @@ vmx_run(void *arg, int vcpu, register_t rip)
|
||||
struct vmxctx *vmxctx;
|
||||
struct vmcs *vmcs;
|
||||
struct vm_exit *vmexit;
|
||||
|
||||
|
||||
vmx = arg;
|
||||
vmcs = &vmx->vmcs[vcpu];
|
||||
vmxctx = &vmx->ctx[vcpu];
|
||||
@ -1514,6 +1542,11 @@ vmx_run(void *arg, int vcpu, register_t rip)
|
||||
astpending = 0;
|
||||
vmexit = vm_exitinfo(vmx->vm, vcpu);
|
||||
|
||||
KASSERT(vmxctx->pmap == pmap,
|
||||
("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap));
|
||||
KASSERT(vmxctx->eptp == vmx->eptp,
|
||||
("eptp %p different than ctx eptp %#lx", eptp, vmxctx->eptp));
|
||||
|
||||
/*
|
||||
* XXX Can we avoid doing this every time we do a vm run?
|
||||
*/
|
||||
@ -1576,6 +1609,9 @@ vmx_run(void *arg, int vcpu, register_t rip)
|
||||
vmxctx->launch_error, vie);
|
||||
#endif
|
||||
goto err_exit;
|
||||
case VMX_RETURN_INVEPT:
|
||||
panic("vm %s:%d invept error %d",
|
||||
vm_name(vmx->vm), vcpu, vmxctx->launch_error);
|
||||
default:
|
||||
panic("vmx_setjmp returned %d", rc);
|
||||
}
|
||||
@ -1654,7 +1690,6 @@ vmx_vmcleanup(void *arg)
|
||||
if (error != 0)
|
||||
panic("vmx_vmcleanup: vmclear error %d on vcpu 0", error);
|
||||
|
||||
ept_vmcleanup(vmx);
|
||||
free(vmx, M_VMX);
|
||||
|
||||
return;
|
||||
@ -2000,13 +2035,13 @@ struct vmm_ops vmm_ops_intel = {
|
||||
vmx_vminit,
|
||||
vmx_run,
|
||||
vmx_vmcleanup,
|
||||
ept_vmmmap_set,
|
||||
ept_vmmmap_get,
|
||||
vmx_getreg,
|
||||
vmx_setreg,
|
||||
vmx_getdesc,
|
||||
vmx_setdesc,
|
||||
vmx_inject,
|
||||
vmx_getcap,
|
||||
vmx_setcap
|
||||
vmx_setcap,
|
||||
ept_vmspace_alloc,
|
||||
ept_vmspace_free,
|
||||
};
|
||||
|
@ -31,6 +31,8 @@
|
||||
|
||||
#include "vmcs.h"
|
||||
|
||||
struct pmap;
|
||||
|
||||
#define GUEST_MSR_MAX_ENTRIES 64 /* arbitrary */
|
||||
|
||||
struct vmxctx {
|
||||
@ -68,6 +70,15 @@ struct vmxctx {
|
||||
|
||||
int launched; /* vmcs launch state */
|
||||
int launch_error;
|
||||
|
||||
long eptgen[MAXCPU]; /* cached pmap->pm_eptgen */
|
||||
|
||||
/*
|
||||
* The 'eptp' and the 'pmap' do not change during the lifetime of
|
||||
* the VM so it is safe to keep a copy in each vcpu's vmxctx.
|
||||
*/
|
||||
vm_paddr_t eptp;
|
||||
struct pmap *pmap;
|
||||
};
|
||||
|
||||
struct vmxcap {
|
||||
@ -82,16 +93,15 @@ struct vmxstate {
|
||||
|
||||
/* virtual machine softc */
|
||||
struct vmx {
|
||||
pml4_entry_t pml4ept[NPML4EPG];
|
||||
struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */
|
||||
char msr_bitmap[PAGE_SIZE];
|
||||
struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
|
||||
struct vmxctx ctx[VM_MAXCPU];
|
||||
struct vmxcap cap[VM_MAXCPU];
|
||||
struct vmxstate state[VM_MAXCPU];
|
||||
uint64_t eptp;
|
||||
struct vm *vm;
|
||||
};
|
||||
CTASSERT((offsetof(struct vmx, pml4ept) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
|
||||
@ -101,6 +111,7 @@ CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
|
||||
#define VMX_RETURN_VMRESUME 2
|
||||
#define VMX_RETURN_VMLAUNCH 3
|
||||
#define VMX_RETURN_AST 4
|
||||
#define VMX_RETURN_INVEPT 5
|
||||
/*
|
||||
* vmx_setjmp() returns:
|
||||
* - 0 when it returns directly
|
||||
@ -108,6 +119,7 @@ CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
|
||||
* - 2 when it returns from vmx_resume (which would only be in the error case)
|
||||
* - 3 when it returns from vmx_launch (which would only be in the error case)
|
||||
* - 4 when it returns from vmx_resume or vmx_launch because of AST pending
|
||||
* - 5 when it returns from vmx_launch/vmx_resume because of invept error
|
||||
*/
|
||||
int vmx_setjmp(struct vmxctx *ctx);
|
||||
void vmx_longjmp(void); /* returns via vmx_setjmp */
|
||||
|
@ -72,6 +72,10 @@ ASSYM(VMXCTX_HOST_RBX, offsetof(struct vmxctx, host_rbx));
|
||||
ASSYM(VMXCTX_HOST_RIP, offsetof(struct vmxctx, host_rip));
|
||||
|
||||
ASSYM(VMXCTX_LAUNCH_ERROR, offsetof(struct vmxctx, launch_error));
|
||||
ASSYM(VMXCTX_EPTGEN, offsetof(struct vmxctx, eptgen));
|
||||
|
||||
ASSYM(VMXCTX_PMAP, offsetof(struct vmxctx, pmap));
|
||||
ASSYM(VMXCTX_EPTP, offsetof(struct vmxctx, eptp));
|
||||
|
||||
ASSYM(VM_SUCCESS, VM_SUCCESS);
|
||||
ASSYM(VM_FAIL_INVALID, VM_FAIL_INVALID);
|
||||
@ -82,8 +86,13 @@ ASSYM(VMX_RETURN_LONGJMP, VMX_RETURN_LONGJMP);
|
||||
ASSYM(VMX_RETURN_VMRESUME, VMX_RETURN_VMRESUME);
|
||||
ASSYM(VMX_RETURN_VMLAUNCH, VMX_RETURN_VMLAUNCH);
|
||||
ASSYM(VMX_RETURN_AST, VMX_RETURN_AST);
|
||||
ASSYM(VMX_RETURN_INVEPT, VMX_RETURN_INVEPT);
|
||||
|
||||
ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
|
||||
ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
|
||||
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
|
||||
ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
|
||||
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
|
||||
|
||||
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
|
||||
ASSYM(PM_EPTGEN, offsetof(struct pmap, pm_eptgen));
|
||||
|
@ -30,6 +30,12 @@
|
||||
|
||||
#include "vmx_assym.s"
|
||||
|
||||
#ifdef SMP
|
||||
#define LK lock ;
|
||||
#else
|
||||
#define LK
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Disable interrupts before updating %rsp in VMX_CHECK_AST or
|
||||
* VMX_GUEST_RESTORE.
|
||||
@ -86,15 +92,73 @@
|
||||
movq VMXCTX_GUEST_R15(%rdi),%r15; \
|
||||
movq VMXCTX_GUEST_RDI(%rdi),%rdi; /* restore rdi the last */
|
||||
|
||||
#define VM_INSTRUCTION_ERROR(reg) \
|
||||
/*
|
||||
* Check for an error after executing a VMX instruction.
|
||||
* 'errreg' will be zero on success and non-zero otherwise.
|
||||
* 'ctxreg' points to the 'struct vmxctx' associated with the vcpu.
|
||||
*/
|
||||
#define VM_INSTRUCTION_ERROR(errreg, ctxreg) \
|
||||
jnc 1f; \
|
||||
movl $VM_FAIL_INVALID,reg; /* CF is set */ \
|
||||
movl $VM_FAIL_INVALID,errreg; /* CF is set */ \
|
||||
jmp 3f; \
|
||||
1: jnz 2f; \
|
||||
movl $VM_FAIL_VALID,reg; /* ZF is set */ \
|
||||
movl $VM_FAIL_VALID,errreg; /* ZF is set */ \
|
||||
jmp 3f; \
|
||||
2: movl $VM_SUCCESS,reg; \
|
||||
3: movl reg,VMXCTX_LAUNCH_ERROR(%rsp)
|
||||
2: movl $VM_SUCCESS,errreg; \
|
||||
3: movl errreg,VMXCTX_LAUNCH_ERROR(ctxreg)
|
||||
|
||||
/*
|
||||
* set or clear the appropriate bit in 'pm_active'
|
||||
* %rdi = vmxctx
|
||||
* %rax, %r11 = scratch registers
|
||||
*/
|
||||
#define VMX_SET_PM_ACTIVE \
|
||||
movq VMXCTX_PMAP(%rdi), %r11; \
|
||||
movl PCPU(CPUID), %eax; \
|
||||
LK btsl %eax, PM_ACTIVE(%r11)
|
||||
|
||||
#define VMX_CLEAR_PM_ACTIVE \
|
||||
movq VMXCTX_PMAP(%rdi), %r11; \
|
||||
movl PCPU(CPUID), %eax; \
|
||||
LK btrl %eax, PM_ACTIVE(%r11)
|
||||
|
||||
/*
|
||||
* If 'vmxctx->eptgen[curcpu]' is not identical to 'pmap->pm_eptgen'
|
||||
* then we must invalidate all mappings associated with this eptp.
|
||||
*
|
||||
* %rdi = vmxctx
|
||||
* %rax, %rbx, %r11 = scratch registers
|
||||
*/
|
||||
#define VMX_CHECK_EPTGEN \
|
||||
movl PCPU(CPUID), %ebx; \
|
||||
movq VMXCTX_PMAP(%rdi), %r11; \
|
||||
movq PM_EPTGEN(%r11), %rax; \
|
||||
cmpq %rax, VMXCTX_EPTGEN(%rdi, %rbx, 8); \
|
||||
je 9f; \
|
||||
\
|
||||
/* Refresh 'vmxctx->eptgen[curcpu]' */ \
|
||||
movq %rax, VMXCTX_EPTGEN(%rdi, %rbx, 8); \
|
||||
\
|
||||
/* Setup the invept descriptor at the top of tmpstk */ \
|
||||
mov %rdi, %r11; \
|
||||
addq $VMXCTX_TMPSTKTOP, %r11; \
|
||||
movq VMXCTX_EPTP(%rdi), %rax; \
|
||||
movq %rax, -16(%r11); \
|
||||
movq $0x0, -8(%r11); \
|
||||
mov $0x1, %eax; /* Single context invalidate */ \
|
||||
invept -16(%r11), %rax; \
|
||||
\
|
||||
/* Check for invept error */ \
|
||||
VM_INSTRUCTION_ERROR(%eax, %rdi); \
|
||||
testl %eax, %eax; \
|
||||
jz 9f; \
|
||||
\
|
||||
/* Return via vmx_setjmp with retval of VMX_RETURN_INVEPT */ \
|
||||
movq $VMX_RETURN_INVEPT, %rsi; \
|
||||
movq %rdi,%rsp; \
|
||||
addq $VMXCTX_TMPSTKTOP, %rsp; \
|
||||
callq vmx_return; \
|
||||
9: ;
|
||||
|
||||
.text
|
||||
/*
|
||||
@ -129,6 +193,9 @@ END(vmx_setjmp)
|
||||
* Return to vmm context through vmx_setjmp() with a value of 'retval'.
|
||||
*/
|
||||
ENTRY(vmx_return)
|
||||
/* The pmap is no longer active on the host cpu */
|
||||
VMX_CLEAR_PM_ACTIVE
|
||||
|
||||
/* Restore host context. */
|
||||
movq VMXCTX_HOST_R15(%rdi),%r15
|
||||
movq VMXCTX_HOST_R14(%rdi),%r14
|
||||
@ -193,6 +260,10 @@ ENTRY(vmx_resume)
|
||||
|
||||
VMX_CHECK_AST
|
||||
|
||||
VMX_SET_PM_ACTIVE /* This vcpu is now active on the host cpu */
|
||||
|
||||
VMX_CHECK_EPTGEN /* Check if we have to invalidate TLB */
|
||||
|
||||
/*
|
||||
* Restore guest state that is not automatically loaded from the vmcs.
|
||||
*/
|
||||
@ -203,7 +274,7 @@ ENTRY(vmx_resume)
|
||||
/*
|
||||
* Capture the reason why vmresume failed.
|
||||
*/
|
||||
VM_INSTRUCTION_ERROR(%eax)
|
||||
VM_INSTRUCTION_ERROR(%eax, %rsp)
|
||||
|
||||
/* Return via vmx_setjmp with return value of VMX_RETURN_VMRESUME */
|
||||
movq %rsp,%rdi
|
||||
@ -225,6 +296,10 @@ ENTRY(vmx_launch)
|
||||
|
||||
VMX_CHECK_AST
|
||||
|
||||
VMX_SET_PM_ACTIVE /* This vcpu is now active on the host cpu */
|
||||
|
||||
VMX_CHECK_EPTGEN /* Check if we have to invalidate TLB */
|
||||
|
||||
/*
|
||||
* Restore guest state that is not automatically loaded from the vmcs.
|
||||
*/
|
||||
@ -235,7 +310,7 @@ ENTRY(vmx_launch)
|
||||
/*
|
||||
* Capture the reason why vmlaunch failed.
|
||||
*/
|
||||
VM_INSTRUCTION_ERROR(%eax)
|
||||
VM_INSTRUCTION_ERROR(%eax, %rsp)
|
||||
|
||||
/* Return via vmx_setjmp with return value of VMX_RETURN_VMLAUNCH */
|
||||
movq %rsp,%rdi
|
||||
|
@ -281,6 +281,43 @@ ppt_teardown_msix(struct pptdev *ppt)
|
||||
ppt->msix.num_msgs = 0;
|
||||
}
|
||||
|
||||
int
|
||||
ppt_num_devices(struct vm *vm)
|
||||
{
|
||||
int i, num;
|
||||
|
||||
num = 0;
|
||||
for (i = 0; i < num_pptdevs; i++) {
|
||||
if (pptdevs[i].vm == vm)
|
||||
num++;
|
||||
}
|
||||
return (num);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
|
||||
{
|
||||
int i, n;
|
||||
struct pptdev *ppt;
|
||||
struct vm_memory_segment *seg;
|
||||
|
||||
for (n = 0; n < num_pptdevs; n++) {
|
||||
ppt = &pptdevs[n];
|
||||
if (ppt->vm != vm)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < MAX_MMIOSEGS; i++) {
|
||||
seg = &ppt->mmio[i];
|
||||
if (seg->len == 0)
|
||||
continue;
|
||||
if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
|
||||
return (TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_assign_device(struct vm *vm, int bus, int slot, int func)
|
||||
{
|
||||
@ -336,7 +373,7 @@ ppt_unassign_all(struct vm *vm)
|
||||
bus = pci_get_bus(dev);
|
||||
slot = pci_get_slot(dev);
|
||||
func = pci_get_function(dev);
|
||||
ppt_unassign_device(vm, bus, slot, func);
|
||||
vm_unassign_pptdev(vm, bus, slot, func);
|
||||
}
|
||||
}
|
||||
|
||||
@ -591,10 +628,3 @@ ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
ppt_num_devices(void)
|
||||
{
|
||||
|
||||
return (num_pptdevs);
|
||||
}
|
||||
|
@ -29,14 +29,20 @@
|
||||
#ifndef _IO_PPT_H_
|
||||
#define _IO_PPT_H_
|
||||
|
||||
int ppt_assign_device(struct vm *vm, int bus, int slot, int func);
|
||||
int ppt_unassign_device(struct vm *vm, int bus, int slot, int func);
|
||||
int ppt_unassign_all(struct vm *vm);
|
||||
int ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
|
||||
vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
|
||||
int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int destcpu, int vector, int numvec);
|
||||
int ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
|
||||
int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
|
||||
int ppt_num_devices(void);
|
||||
int idx, uint32_t msg, uint32_t vector_control, uint64_t addr);
|
||||
int ppt_num_devices(struct vm *vm);
|
||||
boolean_t ppt_is_mmio(struct vm *vm, vm_paddr_t gpa);
|
||||
|
||||
/*
|
||||
* The following functions should never be called directly.
|
||||
* Use 'vm_assign_pptdev()' and 'vm_unassign_pptdev()' instead.
|
||||
*/
|
||||
int ppt_assign_device(struct vm *vm, int bus, int slot, int func);
|
||||
int ppt_unassign_device(struct vm *vm, int bus, int slot, int func);
|
||||
#endif
|
||||
|
@ -39,18 +39,28 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/rwlock.h>
|
||||
#include <sys/sched.h>
|
||||
#include <sys/smp.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_param.h>
|
||||
|
||||
#include <machine/vm.h>
|
||||
#include <machine/pcb.h>
|
||||
#include <machine/smp.h>
|
||||
#include <x86/apicreg.h>
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
|
||||
#include <machine/vmm.h>
|
||||
#include "vmm_ktr.h"
|
||||
#include "vmm_host.h"
|
||||
#include "vmm_mem.h"
|
||||
#include "vmm_util.h"
|
||||
@ -84,15 +94,23 @@ struct vcpu {
|
||||
#define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
|
||||
#define vcpu_lock(v) mtx_lock_spin(&((v)->mtx))
|
||||
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
|
||||
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
|
||||
|
||||
struct mem_seg {
|
||||
vm_paddr_t gpa;
|
||||
size_t len;
|
||||
boolean_t wired;
|
||||
vm_object_t object;
|
||||
};
|
||||
#define VM_MAX_MEMORY_SEGMENTS 2
|
||||
|
||||
struct vm {
|
||||
void *cookie; /* processor-specific data */
|
||||
void *iommu; /* iommu-specific data */
|
||||
struct vmspace *vmspace; /* guest's address space */
|
||||
struct vcpu vcpu[VM_MAXCPU];
|
||||
int num_mem_segs;
|
||||
struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
|
||||
struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS];
|
||||
char name[VM_MAX_NAMELEN];
|
||||
|
||||
/*
|
||||
@ -109,16 +127,14 @@ static struct vmm_ops *ops;
|
||||
#define VMM_INIT() (ops != NULL ? (*ops->init)() : 0)
|
||||
#define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0)
|
||||
|
||||
#define VMINIT(vm) (ops != NULL ? (*ops->vminit)(vm): NULL)
|
||||
#define VMRUN(vmi, vcpu, rip) \
|
||||
(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO)
|
||||
#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
|
||||
#define VMRUN(vmi, vcpu, rip, pmap) \
|
||||
(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap) : ENXIO)
|
||||
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
|
||||
#define VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm) \
|
||||
(ops != NULL ? \
|
||||
(*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) : \
|
||||
ENXIO)
|
||||
#define VMMMAP_GET(vmi, gpa) \
|
||||
(ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO)
|
||||
#define VMSPACE_ALLOC(min, max) \
|
||||
(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
|
||||
#define VMSPACE_FREE(vmspace) \
|
||||
(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
|
||||
#define VMGETREG(vmi, vcpu, num, retval) \
|
||||
(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
|
||||
#define VMSETREG(vmi, vcpu, num, val) \
|
||||
@ -213,8 +229,7 @@ vmm_handler(module_t mod, int what, void *arg)
|
||||
switch (what) {
|
||||
case MOD_LOAD:
|
||||
vmmdev_init();
|
||||
if (ppt_num_devices() > 0)
|
||||
iommu_init();
|
||||
iommu_init();
|
||||
error = vmm_init();
|
||||
if (error == 0)
|
||||
vmm_initialized = 1;
|
||||
@ -265,7 +280,7 @@ vm_create(const char *name, struct vm **retvm)
|
||||
{
|
||||
int i;
|
||||
struct vm *vm;
|
||||
vm_paddr_t maxaddr;
|
||||
struct vmspace *vmspace;
|
||||
|
||||
const int BSP = 0;
|
||||
|
||||
@ -279,59 +294,34 @@ vm_create(const char *name, struct vm **retvm)
|
||||
if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
|
||||
return (EINVAL);
|
||||
|
||||
vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
|
||||
if (vmspace == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
|
||||
strcpy(vm->name, name);
|
||||
vm->cookie = VMINIT(vm);
|
||||
vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
|
||||
|
||||
for (i = 0; i < VM_MAXCPU; i++) {
|
||||
vcpu_init(vm, i);
|
||||
guest_msrs_init(vm, i);
|
||||
}
|
||||
|
||||
maxaddr = vmm_mem_maxaddr();
|
||||
vm->iommu = iommu_create_domain(maxaddr);
|
||||
vm_activate_cpu(vm, BSP);
|
||||
vm->vmspace = vmspace;
|
||||
|
||||
*retvm = vm;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg)
|
||||
vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
|
||||
{
|
||||
size_t len;
|
||||
vm_paddr_t hpa;
|
||||
void *host_domain;
|
||||
|
||||
host_domain = iommu_host_domain();
|
||||
if (seg->object != NULL)
|
||||
vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
|
||||
|
||||
len = 0;
|
||||
while (len < seg->len) {
|
||||
hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
|
||||
if (hpa == (vm_paddr_t)-1) {
|
||||
panic("vm_free_mem_segs: cannot free hpa "
|
||||
"associated with gpa 0x%016lx", seg->gpa + len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the 'gpa' to 'hpa' mapping in VMs domain.
|
||||
* And resurrect the 1:1 mapping for 'hpa' in 'host_domain'.
|
||||
*/
|
||||
iommu_remove_mapping(vm->iommu, seg->gpa + len, PAGE_SIZE);
|
||||
iommu_create_mapping(host_domain, hpa, hpa, PAGE_SIZE);
|
||||
|
||||
vmm_mem_free(hpa, PAGE_SIZE);
|
||||
|
||||
len += PAGE_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate cached translations associated with 'vm->iommu' since
|
||||
* we have now moved some pages from it.
|
||||
*/
|
||||
iommu_invalidate_tlb(vm->iommu);
|
||||
|
||||
bzero(seg, sizeof(struct vm_memory_segment));
|
||||
bzero(seg, sizeof(*seg));
|
||||
}
|
||||
|
||||
void
|
||||
@ -341,6 +331,9 @@ vm_destroy(struct vm *vm)
|
||||
|
||||
ppt_unassign_all(vm);
|
||||
|
||||
if (vm->iommu != NULL)
|
||||
iommu_destroy_domain(vm->iommu);
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++)
|
||||
vm_free_mem_seg(vm, &vm->mem_segs[i]);
|
||||
|
||||
@ -349,7 +342,7 @@ vm_destroy(struct vm *vm)
|
||||
for (i = 0; i < VM_MAXCPU; i++)
|
||||
vcpu_cleanup(&vm->vcpu[i]);
|
||||
|
||||
iommu_destroy_domain(vm->iommu);
|
||||
VMSPACE_FREE(vm->vmspace);
|
||||
|
||||
VMCLEANUP(vm->cookie);
|
||||
|
||||
@ -365,52 +358,48 @@ vm_name(struct vm *vm)
|
||||
int
|
||||
vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
|
||||
{
|
||||
const boolean_t spok = TRUE; /* superpage mappings are ok */
|
||||
vm_object_t obj;
|
||||
|
||||
return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
|
||||
VM_PROT_RW, spok));
|
||||
if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
|
||||
return (ENOMEM);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
const boolean_t spok = TRUE; /* superpage mappings are ok */
|
||||
|
||||
return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0,
|
||||
VM_PROT_NONE, spok));
|
||||
vmm_mmio_free(vm->vmspace, gpa, len);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns TRUE if 'gpa' is available for allocation and FALSE otherwise
|
||||
*/
|
||||
static boolean_t
|
||||
vm_gpa_available(struct vm *vm, vm_paddr_t gpa)
|
||||
boolean_t
|
||||
vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
|
||||
{
|
||||
int i;
|
||||
vm_paddr_t gpabase, gpalimit;
|
||||
|
||||
if (gpa & PAGE_MASK)
|
||||
panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa);
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++) {
|
||||
gpabase = vm->mem_segs[i].gpa;
|
||||
gpalimit = gpabase + vm->mem_segs[i].len;
|
||||
if (gpa >= gpabase && gpa < gpalimit)
|
||||
return (FALSE);
|
||||
return (TRUE); /* 'gpa' is regular memory */
|
||||
}
|
||||
|
||||
return (TRUE);
|
||||
if (ppt_is_mmio(vm, gpa))
|
||||
return (TRUE); /* 'gpa' is pci passthru mmio */
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
int
|
||||
vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
int error, available, allocated;
|
||||
struct vm_memory_segment *seg;
|
||||
vm_paddr_t g, hpa;
|
||||
void *host_domain;
|
||||
|
||||
const boolean_t spok = TRUE; /* superpage mappings are ok */
|
||||
int available, allocated;
|
||||
struct mem_seg *seg;
|
||||
vm_object_t object;
|
||||
vm_paddr_t g;
|
||||
|
||||
if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
|
||||
return (EINVAL);
|
||||
@ -418,10 +407,10 @@ vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
|
||||
available = allocated = 0;
|
||||
g = gpa;
|
||||
while (g < gpa + len) {
|
||||
if (vm_gpa_available(vm, g))
|
||||
available++;
|
||||
else
|
||||
if (vm_mem_allocated(vm, g))
|
||||
allocated++;
|
||||
else
|
||||
available++;
|
||||
|
||||
g += PAGE_SIZE;
|
||||
}
|
||||
@ -443,61 +432,203 @@ vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
|
||||
if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
|
||||
return (E2BIG);
|
||||
|
||||
host_domain = iommu_host_domain();
|
||||
|
||||
seg = &vm->mem_segs[vm->num_mem_segs];
|
||||
|
||||
error = 0;
|
||||
if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
|
||||
return (ENOMEM);
|
||||
|
||||
seg->gpa = gpa;
|
||||
seg->len = 0;
|
||||
while (seg->len < len) {
|
||||
hpa = vmm_mem_alloc(PAGE_SIZE);
|
||||
if (hpa == 0) {
|
||||
error = ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE,
|
||||
VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Remove the 1:1 mapping for 'hpa' from the 'host_domain'.
|
||||
* Add mapping for 'gpa + seg->len' to 'hpa' in the VMs domain.
|
||||
*/
|
||||
iommu_remove_mapping(host_domain, hpa, PAGE_SIZE);
|
||||
iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE);
|
||||
|
||||
seg->len += PAGE_SIZE;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
vm_free_mem_seg(vm, seg);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate cached translations associated with 'host_domain' since
|
||||
* we have now moved some pages from it.
|
||||
*/
|
||||
iommu_invalidate_tlb(host_domain);
|
||||
seg->len = len;
|
||||
seg->object = object;
|
||||
seg->wired = FALSE;
|
||||
|
||||
vm->num_mem_segs++;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
|
||||
static void
|
||||
vm_gpa_unwire(struct vm *vm)
|
||||
{
|
||||
vm_paddr_t nextpage;
|
||||
int i, rv;
|
||||
struct mem_seg *seg;
|
||||
|
||||
nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE);
|
||||
if (len > nextpage - gpa)
|
||||
panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len);
|
||||
for (i = 0; i < vm->num_mem_segs; i++) {
|
||||
seg = &vm->mem_segs[i];
|
||||
if (!seg->wired)
|
||||
continue;
|
||||
|
||||
return (VMMMAP_GET(vm->cookie, gpa));
|
||||
rv = vm_map_unwire(&vm->vmspace->vm_map,
|
||||
seg->gpa, seg->gpa + seg->len,
|
||||
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
|
||||
KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
|
||||
"%#lx/%ld could not be unwired: %d",
|
||||
vm_name(vm), seg->gpa, seg->len, rv));
|
||||
|
||||
seg->wired = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vm_gpa_wire(struct vm *vm)
|
||||
{
|
||||
int i, rv;
|
||||
struct mem_seg *seg;
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++) {
|
||||
seg = &vm->mem_segs[i];
|
||||
if (seg->wired)
|
||||
continue;
|
||||
|
||||
/* XXX rlimits? */
|
||||
rv = vm_map_wire(&vm->vmspace->vm_map,
|
||||
seg->gpa, seg->gpa + seg->len,
|
||||
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
|
||||
if (rv != KERN_SUCCESS)
|
||||
break;
|
||||
|
||||
seg->wired = TRUE;
|
||||
}
|
||||
|
||||
if (i < vm->num_mem_segs) {
|
||||
/*
|
||||
* Undo the wiring before returning an error.
|
||||
*/
|
||||
vm_gpa_unwire(vm);
|
||||
return (EAGAIN);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
vm_iommu_modify(struct vm *vm, boolean_t map)
|
||||
{
|
||||
int i, sz;
|
||||
vm_paddr_t gpa, hpa;
|
||||
struct mem_seg *seg;
|
||||
void *vp, *cookie, *host_domain;
|
||||
|
||||
sz = PAGE_SIZE;
|
||||
host_domain = iommu_host_domain();
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++) {
|
||||
seg = &vm->mem_segs[i];
|
||||
KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
|
||||
vm_name(vm), seg->gpa, seg->len));
|
||||
|
||||
gpa = seg->gpa;
|
||||
while (gpa < seg->gpa + seg->len) {
|
||||
vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
|
||||
&cookie);
|
||||
KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
|
||||
vm_name(vm), gpa));
|
||||
|
||||
vm_gpa_release(cookie);
|
||||
|
||||
hpa = DMAP_TO_PHYS((uintptr_t)vp);
|
||||
if (map) {
|
||||
iommu_create_mapping(vm->iommu, gpa, hpa, sz);
|
||||
iommu_remove_mapping(host_domain, hpa, sz);
|
||||
} else {
|
||||
iommu_remove_mapping(vm->iommu, gpa, sz);
|
||||
iommu_create_mapping(host_domain, hpa, hpa, sz);
|
||||
}
|
||||
|
||||
gpa += PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate the cached translations associated with the domain
|
||||
* from which pages were removed.
|
||||
*/
|
||||
if (map)
|
||||
iommu_invalidate_tlb(host_domain);
|
||||
else
|
||||
iommu_invalidate_tlb(vm->iommu);
|
||||
}
|
||||
|
||||
#define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE)
|
||||
#define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE)
|
||||
|
||||
int
|
||||
vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = ppt_unassign_device(vm, bus, slot, func);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (ppt_num_devices(vm) == 0) {
|
||||
vm_iommu_unmap(vm);
|
||||
vm_gpa_unwire(vm);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
|
||||
{
|
||||
int error;
|
||||
vm_paddr_t maxaddr;
|
||||
|
||||
/*
|
||||
* Virtual machines with pci passthru devices get special treatment:
|
||||
* - the guest physical memory is wired
|
||||
* - the iommu is programmed to do the 'gpa' to 'hpa' translation
|
||||
*
|
||||
* We need to do this before the first pci passthru device is attached.
|
||||
*/
|
||||
if (ppt_num_devices(vm) == 0) {
|
||||
KASSERT(vm->iommu == NULL,
|
||||
("vm_assign_pptdev: iommu must be NULL"));
|
||||
maxaddr = vmm_mem_maxaddr();
|
||||
vm->iommu = iommu_create_domain(maxaddr);
|
||||
|
||||
error = vm_gpa_wire(vm);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
vm_iommu_map(vm);
|
||||
}
|
||||
|
||||
error = ppt_assign_device(vm, bus, slot, func);
|
||||
return (error);
|
||||
}
|
||||
|
||||
void *
|
||||
vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
|
||||
void **cookie)
|
||||
{
|
||||
int count, pageoff;
|
||||
vm_page_t m;
|
||||
|
||||
pageoff = gpa & PAGE_MASK;
|
||||
if (len > PAGE_SIZE - pageoff)
|
||||
panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
|
||||
|
||||
count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
|
||||
trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
|
||||
|
||||
if (count == 1) {
|
||||
*cookie = m;
|
||||
return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
|
||||
} else {
|
||||
*cookie = NULL;
|
||||
return (NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vm_gpa_release(void *cookie)
|
||||
{
|
||||
vm_page_t m = cookie;
|
||||
|
||||
vm_page_lock(m);
|
||||
vm_page_unhold(m);
|
||||
vm_page_unlock(m);
|
||||
}
|
||||
|
||||
int
|
||||
@ -508,13 +639,42 @@ vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++) {
|
||||
if (gpabase == vm->mem_segs[i].gpa) {
|
||||
*seg = vm->mem_segs[i];
|
||||
seg->gpa = vm->mem_segs[i].gpa;
|
||||
seg->len = vm->mem_segs[i].len;
|
||||
seg->wired = vm->mem_segs[i].wired;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
|
||||
vm_offset_t *offset, struct vm_object **object)
|
||||
{
|
||||
int i;
|
||||
size_t seg_len;
|
||||
vm_paddr_t seg_gpa;
|
||||
vm_object_t seg_obj;
|
||||
|
||||
for (i = 0; i < vm->num_mem_segs; i++) {
|
||||
if ((seg_obj = vm->mem_segs[i].object) == NULL)
|
||||
continue;
|
||||
|
||||
seg_gpa = vm->mem_segs[i].gpa;
|
||||
seg_len = vm->mem_segs[i].len;
|
||||
|
||||
if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
|
||||
*offset = gpa - seg_gpa;
|
||||
*object = seg_obj;
|
||||
vm_object_reference(seg_obj);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
int
|
||||
vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
|
||||
{
|
||||
@ -633,26 +793,215 @@ save_guest_fpustate(struct vcpu *vcpu)
|
||||
|
||||
static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
|
||||
|
||||
static int
|
||||
vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
|
||||
{
|
||||
int error;
|
||||
|
||||
vcpu_assert_locked(vcpu);
|
||||
|
||||
/*
|
||||
* The following state transitions are allowed:
|
||||
* IDLE -> FROZEN -> IDLE
|
||||
* FROZEN -> RUNNING -> FROZEN
|
||||
* FROZEN -> SLEEPING -> FROZEN
|
||||
*/
|
||||
switch (vcpu->state) {
|
||||
case VCPU_IDLE:
|
||||
case VCPU_RUNNING:
|
||||
case VCPU_SLEEPING:
|
||||
error = (newstate != VCPU_FROZEN);
|
||||
break;
|
||||
case VCPU_FROZEN:
|
||||
error = (newstate == VCPU_FROZEN);
|
||||
break;
|
||||
default:
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (error == 0)
|
||||
vcpu->state = newstate;
|
||||
else
|
||||
error = EBUSY;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static void
|
||||
vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
|
||||
{
|
||||
int error;
|
||||
|
||||
if ((error = vcpu_set_state(vm, vcpuid, newstate)) != 0)
|
||||
panic("Error %d setting state to %d\n", error, newstate);
|
||||
}
|
||||
|
||||
static void
|
||||
vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
|
||||
{
|
||||
int error;
|
||||
|
||||
if ((error = vcpu_set_state_locked(vcpu, newstate)) != 0)
|
||||
panic("Error %d setting state to %d", error, newstate);
|
||||
}
|
||||
|
||||
/*
|
||||
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
|
||||
*/
|
||||
static int
|
||||
vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t *retu)
|
||||
{
|
||||
struct vcpu *vcpu;
|
||||
int sleepticks, t;
|
||||
|
||||
vcpu = &vm->vcpu[vcpuid];
|
||||
|
||||
vcpu_lock(vcpu);
|
||||
|
||||
/*
|
||||
* Figure out the number of host ticks until the next apic
|
||||
* timer interrupt in the guest.
|
||||
*/
|
||||
sleepticks = lapic_timer_tick(vm, vcpuid);
|
||||
|
||||
/*
|
||||
* If the guest local apic timer is disabled then sleep for
|
||||
* a long time but not forever.
|
||||
*/
|
||||
if (sleepticks < 0)
|
||||
sleepticks = hz;
|
||||
|
||||
/*
|
||||
* Do a final check for pending NMI or interrupts before
|
||||
* really putting this thread to sleep.
|
||||
*
|
||||
* These interrupts could have happened any time after we
|
||||
* returned from VMRUN() and before we grabbed the vcpu lock.
|
||||
*/
|
||||
if (!vm_nmi_pending(vm, vcpuid) && lapic_pending_intr(vm, vcpuid) < 0) {
|
||||
if (sleepticks <= 0)
|
||||
panic("invalid sleepticks %d", sleepticks);
|
||||
t = ticks;
|
||||
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
|
||||
msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks);
|
||||
vcpu_require_state_locked(vcpu, VCPU_FROZEN);
|
||||
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
|
||||
}
|
||||
vcpu_unlock(vcpu);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
vm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu)
|
||||
{
|
||||
int rv, ftype;
|
||||
struct vm_map *map;
|
||||
struct vcpu *vcpu;
|
||||
struct vm_exit *vme;
|
||||
|
||||
vcpu = &vm->vcpu[vcpuid];
|
||||
vme = &vcpu->exitinfo;
|
||||
|
||||
ftype = vme->u.paging.fault_type;
|
||||
KASSERT(ftype == VM_PROT_READ ||
|
||||
ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
|
||||
("vm_handle_paging: invalid fault_type %d", ftype));
|
||||
|
||||
if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
|
||||
rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
|
||||
vme->u.paging.gpa, ftype);
|
||||
if (rv == 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
map = &vm->vmspace->vm_map;
|
||||
rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
|
||||
|
||||
VMM_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, ftype = %d",
|
||||
rv, vme->u.paging.gpa, ftype);
|
||||
|
||||
if (rv != KERN_SUCCESS)
|
||||
return (EFAULT);
|
||||
done:
|
||||
/* restart execution at the faulting instruction */
|
||||
vme->inst_length = 0;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu)
|
||||
{
|
||||
struct vie *vie;
|
||||
struct vcpu *vcpu;
|
||||
struct vm_exit *vme;
|
||||
int error, inst_length;
|
||||
uint64_t rip, gla, gpa, cr3;
|
||||
|
||||
vcpu = &vm->vcpu[vcpuid];
|
||||
vme = &vcpu->exitinfo;
|
||||
|
||||
rip = vme->rip;
|
||||
inst_length = vme->inst_length;
|
||||
|
||||
gla = vme->u.inst_emul.gla;
|
||||
gpa = vme->u.inst_emul.gpa;
|
||||
cr3 = vme->u.inst_emul.cr3;
|
||||
vie = &vme->u.inst_emul.vie;
|
||||
|
||||
vie_init(vie);
|
||||
|
||||
/* Fetch, decode and emulate the faulting instruction */
|
||||
if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3, vie) != 0)
|
||||
return (EFAULT);
|
||||
|
||||
if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0)
|
||||
return (EFAULT);
|
||||
|
||||
/* return to userland unless this is a local apic access */
|
||||
if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE) {
|
||||
*retu = TRUE;
|
||||
return (0);
|
||||
}
|
||||
|
||||
error = vmm_emulate_instruction(vm, vcpuid, gpa, vie,
|
||||
lapic_mmio_read, lapic_mmio_write, 0);
|
||||
|
||||
/* return to userland to spin up the AP */
|
||||
if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP)
|
||||
*retu = TRUE;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
vm_run(struct vm *vm, struct vm_run *vmrun)
|
||||
{
|
||||
int error, vcpuid, sleepticks, t;
|
||||
int error, vcpuid;
|
||||
struct vcpu *vcpu;
|
||||
struct pcb *pcb;
|
||||
uint64_t tscval, rip;
|
||||
struct vm_exit *vme;
|
||||
boolean_t retu;
|
||||
pmap_t pmap;
|
||||
|
||||
vcpuid = vmrun->cpuid;
|
||||
|
||||
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
|
||||
return (EINVAL);
|
||||
|
||||
pmap = vmspace_pmap(vm->vmspace);
|
||||
vcpu = &vm->vcpu[vcpuid];
|
||||
vme = &vmrun->vm_exit;
|
||||
vme = &vcpu->exitinfo;
|
||||
rip = vmrun->rip;
|
||||
restart:
|
||||
critical_enter();
|
||||
|
||||
KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
|
||||
("vm_run: absurd pm_active"));
|
||||
|
||||
tscval = rdtsc();
|
||||
|
||||
pcb = PCPU_GET(curpcb);
|
||||
@ -661,62 +1010,44 @@ restart:
|
||||
restore_guest_msrs(vm, vcpuid);
|
||||
restore_guest_fpustate(vcpu);
|
||||
|
||||
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
|
||||
vcpu->hostcpu = curcpu;
|
||||
error = VMRUN(vm->cookie, vcpuid, rip);
|
||||
error = VMRUN(vm->cookie, vcpuid, rip, pmap);
|
||||
vcpu->hostcpu = NOCPU;
|
||||
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
|
||||
|
||||
save_guest_fpustate(vcpu);
|
||||
restore_host_msrs(vm, vcpuid);
|
||||
|
||||
vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
|
||||
|
||||
/* copy the exit information */
|
||||
bcopy(&vcpu->exitinfo, vme, sizeof(struct vm_exit));
|
||||
|
||||
critical_exit();
|
||||
|
||||
/*
|
||||
* Oblige the guest's desire to 'hlt' by sleeping until the vcpu
|
||||
* is ready to run.
|
||||
*/
|
||||
if (error == 0 && vme->exitcode == VM_EXITCODE_HLT) {
|
||||
vcpu_lock(vcpu);
|
||||
|
||||
/*
|
||||
* Figure out the number of host ticks until the next apic
|
||||
* timer interrupt in the guest.
|
||||
*/
|
||||
sleepticks = lapic_timer_tick(vm, vcpuid);
|
||||
|
||||
/*
|
||||
* If the guest local apic timer is disabled then sleep for
|
||||
* a long time but not forever.
|
||||
*/
|
||||
if (sleepticks < 0)
|
||||
sleepticks = hz;
|
||||
|
||||
/*
|
||||
* Do a final check for pending NMI or interrupts before
|
||||
* really putting this thread to sleep.
|
||||
*
|
||||
* These interrupts could have happened any time after we
|
||||
* returned from VMRUN() and before we grabbed the vcpu lock.
|
||||
*/
|
||||
if (!vm_nmi_pending(vm, vcpuid) &&
|
||||
lapic_pending_intr(vm, vcpuid) < 0) {
|
||||
if (sleepticks <= 0)
|
||||
panic("invalid sleepticks %d", sleepticks);
|
||||
t = ticks;
|
||||
msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks);
|
||||
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
|
||||
if (error == 0) {
|
||||
retu = FALSE;
|
||||
switch (vme->exitcode) {
|
||||
case VM_EXITCODE_HLT:
|
||||
error = vm_handle_hlt(vm, vcpuid, &retu);
|
||||
break;
|
||||
case VM_EXITCODE_PAGING:
|
||||
error = vm_handle_paging(vm, vcpuid, &retu);
|
||||
break;
|
||||
case VM_EXITCODE_INST_EMUL:
|
||||
error = vm_handle_inst_emul(vm, vcpuid, &retu);
|
||||
break;
|
||||
default:
|
||||
retu = TRUE; /* handled in userland */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
vcpu_unlock(vcpu);
|
||||
|
||||
if (error == 0 && retu == FALSE) {
|
||||
rip = vme->rip + vme->inst_length;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
/* copy the exit information */
|
||||
bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -869,7 +1200,7 @@ vm_iommu_domain(struct vm *vm)
|
||||
}
|
||||
|
||||
int
|
||||
vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state)
|
||||
vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
|
||||
{
|
||||
int error;
|
||||
struct vcpu *vcpu;
|
||||
@ -880,20 +1211,7 @@ vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state state)
|
||||
vcpu = &vm->vcpu[vcpuid];
|
||||
|
||||
vcpu_lock(vcpu);
|
||||
|
||||
/*
|
||||
* The following state transitions are allowed:
|
||||
* IDLE -> RUNNING -> IDLE
|
||||
* IDLE -> CANNOT_RUN -> IDLE
|
||||
*/
|
||||
if ((vcpu->state == VCPU_IDLE && state != VCPU_IDLE) ||
|
||||
(vcpu->state != VCPU_IDLE && state == VCPU_IDLE)) {
|
||||
error = 0;
|
||||
vcpu->state = state;
|
||||
} else {
|
||||
error = EBUSY;
|
||||
}
|
||||
|
||||
error = vcpu_set_state_locked(vcpu, newstate);
|
||||
vcpu_unlock(vcpu);
|
||||
|
||||
return (error);
|
||||
@ -979,16 +1297,7 @@ vm_interrupt_hostcpu(struct vm *vm, int vcpuid)
|
||||
vcpu_lock(vcpu);
|
||||
hostcpu = vcpu->hostcpu;
|
||||
if (hostcpu == NOCPU) {
|
||||
/*
|
||||
* If the vcpu is 'RUNNING' but without a valid 'hostcpu' then
|
||||
* the host thread must be sleeping waiting for an event to
|
||||
* kick the vcpu out of 'hlt'.
|
||||
*
|
||||
* XXX this is racy because the condition exists right before
|
||||
* and after calling VMRUN() in vm_run(). The wakeup() is
|
||||
* benign in this case.
|
||||
*/
|
||||
if (vcpu->state == VCPU_RUNNING)
|
||||
if (vcpu->state == VCPU_SLEEPING)
|
||||
wakeup_one(vcpu);
|
||||
} else {
|
||||
if (vcpu->state != VCPU_RUNNING)
|
||||
@ -998,3 +1307,10 @@ vm_interrupt_hostcpu(struct vm *vm, int vcpuid)
|
||||
}
|
||||
vcpu_unlock(vcpu);
|
||||
}
|
||||
|
||||
struct vmspace *
|
||||
vm_get_vmspace(struct vm *vm)
|
||||
{
|
||||
|
||||
return (vm->vmspace);
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_map.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/vmparam.h>
|
||||
@ -95,8 +96,9 @@ vmmdev_lookup2(struct cdev *cdev)
|
||||
static int
|
||||
vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
|
||||
{
|
||||
int error, off, c;
|
||||
vm_paddr_t hpa, gpa;
|
||||
int error, off, c, prot;
|
||||
vm_paddr_t gpa;
|
||||
void *hpa, *cookie;
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
static char zerobuf[PAGE_SIZE];
|
||||
@ -107,6 +109,7 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
|
||||
if (sc == NULL)
|
||||
error = ENXIO;
|
||||
|
||||
prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
|
||||
while (uio->uio_resid > 0 && error == 0) {
|
||||
gpa = uio->uio_offset;
|
||||
off = gpa & PAGE_MASK;
|
||||
@ -120,14 +123,16 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
|
||||
* Since this device does not support lseek(2), dd(1) will
|
||||
* read(2) blocks of data to simulate the lseek(2).
|
||||
*/
|
||||
hpa = vm_gpa2hpa(sc->vm, gpa, c);
|
||||
if (hpa == (vm_paddr_t)-1) {
|
||||
hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie);
|
||||
if (hpa == NULL) {
|
||||
if (uio->uio_rw == UIO_READ)
|
||||
error = uiomove(zerobuf, c, uio);
|
||||
else
|
||||
error = EFAULT;
|
||||
} else
|
||||
error = uiomove((void *)PHYS_TO_DMAP(hpa), c, uio);
|
||||
} else {
|
||||
error = uiomove(hpa, c, uio);
|
||||
vm_gpa_release(cookie);
|
||||
}
|
||||
}
|
||||
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
@ -139,7 +144,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
struct thread *td)
|
||||
{
|
||||
int error, vcpu, state_changed;
|
||||
enum vcpu_state new_state;
|
||||
struct vmmdev_softc *sc;
|
||||
struct vm_memory_segment *seg;
|
||||
struct vm_register *vmreg;
|
||||
@ -156,6 +160,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
struct vm_stats *vmstats;
|
||||
struct vm_stat_desc *statdesc;
|
||||
struct vm_x2apic *x2apic;
|
||||
struct vm_gpa_pte *gpapte;
|
||||
|
||||
sc = vmmdev_lookup2(cdev);
|
||||
if (sc == NULL)
|
||||
@ -189,12 +194,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (cmd == VM_RUN)
|
||||
new_state = VCPU_RUNNING;
|
||||
else
|
||||
new_state = VCPU_CANNOT_RUN;
|
||||
|
||||
error = vcpu_set_state(sc->vm, vcpu, new_state);
|
||||
error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN);
|
||||
if (error)
|
||||
goto done;
|
||||
|
||||
@ -211,7 +211,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
*/
|
||||
error = 0;
|
||||
for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
|
||||
error = vcpu_set_state(sc->vm, vcpu, VCPU_CANNOT_RUN);
|
||||
error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
@ -271,13 +271,13 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
break;
|
||||
case VM_BIND_PPTDEV:
|
||||
pptdev = (struct vm_pptdev *)data;
|
||||
error = ppt_assign_device(sc->vm, pptdev->bus, pptdev->slot,
|
||||
pptdev->func);
|
||||
error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
|
||||
pptdev->func);
|
||||
break;
|
||||
case VM_UNBIND_PPTDEV:
|
||||
pptdev = (struct vm_pptdev *)data;
|
||||
error = ppt_unassign_device(sc->vm, pptdev->bus, pptdev->slot,
|
||||
pptdev->func);
|
||||
error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
|
||||
pptdev->func);
|
||||
break;
|
||||
case VM_INJECT_EVENT:
|
||||
vmevent = (struct vm_event *)data;
|
||||
@ -348,6 +348,12 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
error = vm_get_x2apic_state(sc->vm,
|
||||
x2apic->cpuid, &x2apic->state);
|
||||
break;
|
||||
case VM_GET_GPA_PMAP:
|
||||
gpapte = (struct vm_gpa_pte *)data;
|
||||
pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
|
||||
gpapte->gpa, gpapte->pte, &gpapte->ptenum);
|
||||
error = 0;
|
||||
break;
|
||||
default:
|
||||
error = ENOTTY;
|
||||
break;
|
||||
@ -361,25 +367,25 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
|
||||
}
|
||||
|
||||
done:
|
||||
/* Make sure that no handler returns a bogus value like ERESTART */
|
||||
KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
|
||||
int nprot, vm_memattr_t *memattr)
|
||||
vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
|
||||
vm_size_t size, struct vm_object **object, int nprot)
|
||||
{
|
||||
int error;
|
||||
struct vmmdev_softc *sc;
|
||||
|
||||
error = -1;
|
||||
mtx_lock(&vmmdev_mtx);
|
||||
|
||||
sc = vmmdev_lookup2(cdev);
|
||||
if (sc != NULL && (nprot & PROT_EXEC) == 0) {
|
||||
*paddr = vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
|
||||
if (*paddr != (vm_paddr_t)-1)
|
||||
error = 0;
|
||||
}
|
||||
if (sc != NULL && (nprot & PROT_EXEC) == 0)
|
||||
error = vm_get_memobj(sc->vm, *offset, size, offset, object);
|
||||
else
|
||||
error = EINVAL;
|
||||
|
||||
mtx_unlock(&vmmdev_mtx);
|
||||
|
||||
@ -446,7 +452,7 @@ static struct cdevsw vmmdevsw = {
|
||||
.d_name = "vmmdev",
|
||||
.d_version = D_VERSION,
|
||||
.d_ioctl = vmmdev_ioctl,
|
||||
.d_mmap = vmmdev_mmap,
|
||||
.d_mmap_single = vmmdev_mmap_single,
|
||||
.d_read = vmmdev_rw,
|
||||
.d_write = vmmdev_rw,
|
||||
};
|
||||
|
@ -465,7 +465,7 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
static void
|
||||
void
|
||||
vie_init(struct vie *vie)
|
||||
{
|
||||
|
||||
@ -479,9 +479,9 @@ static int
|
||||
gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
|
||||
uint64_t *gpa, uint64_t *gpaend)
|
||||
{
|
||||
vm_paddr_t hpa;
|
||||
int nlevels, ptpshift, ptpindex;
|
||||
uint64_t *ptpbase, pte, pgsize;
|
||||
void *cookie;
|
||||
|
||||
/*
|
||||
* XXX assumes 64-bit guest with 4 page walk levels
|
||||
@ -491,18 +491,19 @@ gla2gpa(struct vm *vm, uint64_t gla, uint64_t ptpphys,
|
||||
/* Zero out the lower 12 bits and the upper 12 bits */
|
||||
ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
|
||||
|
||||
hpa = vm_gpa2hpa(vm, ptpphys, PAGE_SIZE);
|
||||
if (hpa == -1)
|
||||
ptpbase = vm_gpa_hold(vm, ptpphys, PAGE_SIZE, VM_PROT_READ,
|
||||
&cookie);
|
||||
if (ptpbase == NULL)
|
||||
goto error;
|
||||
|
||||
ptpbase = (uint64_t *)PHYS_TO_DMAP(hpa);
|
||||
|
||||
ptpshift = PAGE_SHIFT + nlevels * 9;
|
||||
ptpindex = (gla >> ptpshift) & 0x1FF;
|
||||
pgsize = 1UL << ptpshift;
|
||||
|
||||
pte = ptpbase[ptpindex];
|
||||
|
||||
vm_gpa_release(cookie);
|
||||
|
||||
if ((pte & PG_V) == 0)
|
||||
goto error;
|
||||
|
||||
@ -530,18 +531,18 @@ int
|
||||
vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
|
||||
uint64_t cr3, struct vie *vie)
|
||||
{
|
||||
int n, err;
|
||||
uint64_t hpa, gpa, gpaend, off;
|
||||
int n, err, prot;
|
||||
uint64_t gpa, gpaend, off;
|
||||
void *hpa, *cookie;
|
||||
|
||||
/*
|
||||
* XXX cache previously fetched instructions using 'rip' as the tag
|
||||
*/
|
||||
|
||||
prot = VM_PROT_READ | VM_PROT_EXECUTE;
|
||||
if (inst_length > VIE_INST_SIZE)
|
||||
panic("vmm_fetch_instruction: invalid length %d", inst_length);
|
||||
|
||||
vie_init(vie);
|
||||
|
||||
/* Copy the instruction into 'vie' */
|
||||
while (vie->num_valid < inst_length) {
|
||||
err = gla2gpa(vm, rip, cr3, &gpa, &gpaend);
|
||||
@ -551,11 +552,12 @@ vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
|
||||
off = gpa & PAGE_MASK;
|
||||
n = min(inst_length - vie->num_valid, PAGE_SIZE - off);
|
||||
|
||||
hpa = vm_gpa2hpa(vm, gpa, n);
|
||||
if (hpa == -1)
|
||||
if ((hpa = vm_gpa_hold(vm, gpa, n, prot, &cookie)) == NULL)
|
||||
break;
|
||||
|
||||
bcopy((void *)PHYS_TO_DMAP(hpa), &vie->inst[vie->num_valid], n);
|
||||
bcopy(hpa, &vie->inst[vie->num_valid], n);
|
||||
|
||||
vm_gpa_release(cookie);
|
||||
|
||||
rip += n;
|
||||
vie->num_valid += n;
|
||||
|
@ -30,40 +30,24 @@
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/linker.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/sglist.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/rwlock.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_pageout.h>
|
||||
#include <vm/vm_pager.h>
|
||||
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/metadata.h>
|
||||
#include <machine/pc/bios.h>
|
||||
#include <machine/vmparam.h>
|
||||
#include <machine/pmap.h>
|
||||
|
||||
#include "vmm_util.h"
|
||||
#include "vmm_mem.h"
|
||||
|
||||
SYSCTL_DECL(_hw_vmm);
|
||||
|
||||
static u_long pages_allocated;
|
||||
SYSCTL_ULONG(_hw_vmm, OID_AUTO, pages_allocated, CTLFLAG_RD,
|
||||
&pages_allocated, 0, "4KB pages allocated");
|
||||
|
||||
static void
|
||||
update_pages_allocated(int howmany)
|
||||
{
|
||||
pages_allocated += howmany; /* XXX locking? */
|
||||
}
|
||||
|
||||
int
|
||||
vmm_mem_init(void)
|
||||
{
|
||||
@ -71,60 +55,95 @@ vmm_mem_init(void)
|
||||
return (0);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
vmm_mem_alloc(size_t size)
|
||||
vm_object_t
|
||||
vmm_mmio_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,
|
||||
vm_paddr_t hpa)
|
||||
{
|
||||
int flags;
|
||||
vm_page_t m;
|
||||
vm_paddr_t pa;
|
||||
int error;
|
||||
vm_object_t obj;
|
||||
struct sglist *sg;
|
||||
|
||||
if (size != PAGE_SIZE)
|
||||
panic("vmm_mem_alloc: invalid allocation size %lu", size);
|
||||
sg = sglist_alloc(1, M_WAITOK);
|
||||
error = sglist_append_phys(sg, hpa, len);
|
||||
KASSERT(error == 0, ("error %d appending physaddr to sglist", error));
|
||||
|
||||
flags = VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
|
||||
VM_ALLOC_ZERO;
|
||||
|
||||
while (1) {
|
||||
obj = vm_pager_allocate(OBJT_SG, sg, len, VM_PROT_RW, 0, NULL);
|
||||
if (obj != NULL) {
|
||||
/*
|
||||
* XXX need policy to determine when to back off the allocation
|
||||
* VT-x ignores the MTRR settings when figuring out the
|
||||
* memory type for translations obtained through EPT.
|
||||
*
|
||||
* Therefore we explicitly force the pages provided by
|
||||
* this object to be mapped as uncacheable.
|
||||
*/
|
||||
m = vm_page_alloc(NULL, 0, flags);
|
||||
if (m == NULL)
|
||||
VM_WAIT;
|
||||
else
|
||||
break;
|
||||
VM_OBJECT_WLOCK(obj);
|
||||
error = vm_object_set_memattr(obj, VM_MEMATTR_UNCACHEABLE);
|
||||
VM_OBJECT_WUNLOCK(obj);
|
||||
if (error != KERN_SUCCESS) {
|
||||
panic("vmm_mmio_alloc: vm_object_set_memattr error %d",
|
||||
error);
|
||||
}
|
||||
error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0,
|
||||
VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_RW, 0);
|
||||
if (error != KERN_SUCCESS) {
|
||||
vm_object_deallocate(obj);
|
||||
obj = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
pa = VM_PAGE_TO_PHYS(m);
|
||||
|
||||
if ((m->flags & PG_ZERO) == 0)
|
||||
pagezero((void *)PHYS_TO_DMAP(pa));
|
||||
m->valid = VM_PAGE_BITS_ALL;
|
||||
/*
|
||||
* Drop the reference on the sglist.
|
||||
*
|
||||
* If the scatter/gather object was successfully allocated then it
|
||||
* has incremented the reference count on the sglist. Dropping the
|
||||
* initial reference count ensures that the sglist will be freed
|
||||
* when the object is deallocated.
|
||||
*
|
||||
* If the object could not be allocated then we end up freeing the
|
||||
* sglist.
|
||||
*/
|
||||
sglist_free(sg);
|
||||
|
||||
update_pages_allocated(1);
|
||||
|
||||
return (pa);
|
||||
return (obj);
|
||||
}
|
||||
|
||||
void
|
||||
vmm_mem_free(vm_paddr_t base, size_t length)
|
||||
vmm_mmio_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
vm_page_t m;
|
||||
|
||||
if (base & PAGE_MASK) {
|
||||
panic("vmm_mem_free: base 0x%0lx must be aligned on a "
|
||||
"0x%0x boundary\n", base, PAGE_SIZE);
|
||||
vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
|
||||
}
|
||||
|
||||
vm_object_t
|
||||
vmm_mem_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
int error;
|
||||
vm_object_t obj;
|
||||
|
||||
if (gpa & PAGE_MASK)
|
||||
panic("vmm_mem_alloc: invalid gpa %#lx", gpa);
|
||||
|
||||
if (len == 0 || (len & PAGE_MASK) != 0)
|
||||
panic("vmm_mem_alloc: invalid allocation size %lu", len);
|
||||
|
||||
obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
|
||||
if (obj != NULL) {
|
||||
error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0,
|
||||
VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0);
|
||||
if (error != KERN_SUCCESS) {
|
||||
vm_object_deallocate(obj);
|
||||
obj = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (length != PAGE_SIZE)
|
||||
panic("vmm_mem_free: invalid length %lu", length);
|
||||
return (obj);
|
||||
}
|
||||
|
||||
m = PHYS_TO_VM_PAGE(base);
|
||||
m->wire_count--;
|
||||
vm_page_free(m);
|
||||
atomic_subtract_int(&cnt.v_wire_count, 1);
|
||||
void
|
||||
vmm_mem_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
|
||||
{
|
||||
|
||||
update_pages_allocated(-1);
|
||||
vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
|
||||
}
|
||||
|
||||
vm_paddr_t
|
||||
|
@ -29,9 +29,15 @@
|
||||
#ifndef _VMM_MEM_H_
|
||||
#define _VMM_MEM_H_
|
||||
|
||||
struct vmspace;
|
||||
struct vm_object;
|
||||
|
||||
int vmm_mem_init(void);
|
||||
vm_paddr_t vmm_mem_alloc(size_t size);
|
||||
void vmm_mem_free(vm_paddr_t start, size_t size);
|
||||
struct vm_object *vmm_mem_alloc(struct vmspace *, vm_paddr_t gpa, size_t size);
|
||||
struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len,
|
||||
vm_paddr_t hpa);
|
||||
void vmm_mem_free(struct vmspace *, vm_paddr_t gpa, size_t size);
|
||||
void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);
|
||||
vm_paddr_t vmm_mem_maxaddr(void);
|
||||
|
||||
#endif
|
||||
|
@ -2239,20 +2239,16 @@ cfiscsi_lun_disable(void *arg, struct ctl_id target_id, int lun_id)
|
||||
}
|
||||
|
||||
static void
|
||||
cfiscsi_datamove(union ctl_io *io)
|
||||
cfiscsi_datamove_in(union ctl_io *io)
|
||||
{
|
||||
struct cfiscsi_session *cs;
|
||||
struct icl_pdu *request, *response;
|
||||
const struct iscsi_bhs_scsi_command *bhssc;
|
||||
struct iscsi_bhs_data_in *bhsdi;
|
||||
struct iscsi_bhs_r2t *bhsr2t;
|
||||
struct cfiscsi_data_wait *cdw;
|
||||
struct ctl_sg_entry ctl_sg_entry, *ctl_sglist;
|
||||
size_t copy_len, len, off;
|
||||
const char *addr;
|
||||
int ctl_sg_count, error, i;
|
||||
uint32_t target_transfer_tag;
|
||||
bool done;
|
||||
|
||||
request = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
|
||||
cs = PDU_SESSION(request);
|
||||
@ -2278,215 +2274,240 @@ cfiscsi_datamove(union ctl_io *io)
|
||||
*/
|
||||
PDU_TOTAL_TRANSFER_LEN(request) = io->scsiio.kern_total_len;
|
||||
|
||||
if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) {
|
||||
#if 0
|
||||
if (ctl_sg_count > 1)
|
||||
CFISCSI_SESSION_DEBUG(cs, "ctl_sg_count = %d", ctl_sg_count);
|
||||
if (ctl_sg_count > 1)
|
||||
CFISCSI_SESSION_DEBUG(cs, "ctl_sg_count = %d", ctl_sg_count);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is the offset within the current SCSI command;
|
||||
* i.e. for the first call of datamove(), it will be 0,
|
||||
* and for subsequent ones it will be the sum of lengths
|
||||
* of previous ones.
|
||||
*/
|
||||
off = htonl(io->scsiio.kern_rel_offset);
|
||||
if (off > 1)
|
||||
CFISCSI_SESSION_DEBUG(cs, "off = %zd", off);
|
||||
/*
|
||||
* This is the offset within the current SCSI command;
|
||||
* i.e. for the first call of datamove(), it will be 0,
|
||||
* and for subsequent ones it will be the sum of lengths
|
||||
* of previous ones.
|
||||
*/
|
||||
off = htonl(io->scsiio.kern_rel_offset);
|
||||
if (off > 1)
|
||||
CFISCSI_SESSION_DEBUG(cs, "off = %zd", off);
|
||||
|
||||
i = 0;
|
||||
addr = NULL;
|
||||
len = 0;
|
||||
response = NULL;
|
||||
bhsdi = NULL;
|
||||
for (;;) {
|
||||
KASSERT(i < ctl_sg_count, ("i >= ctl_sg_count"));
|
||||
i = 0;
|
||||
addr = NULL;
|
||||
len = 0;
|
||||
response = NULL;
|
||||
bhsdi = NULL;
|
||||
for (;;) {
|
||||
KASSERT(i < ctl_sg_count, ("i >= ctl_sg_count"));
|
||||
if (response == NULL) {
|
||||
response = cfiscsi_pdu_new_response(request, M_NOWAIT);
|
||||
if (response == NULL) {
|
||||
response =
|
||||
cfiscsi_pdu_new_response(request, M_NOWAIT);
|
||||
if (response == NULL) {
|
||||
CFISCSI_SESSION_WARN(cs, "failed to "
|
||||
"allocate memory; dropping connection");
|
||||
icl_pdu_free(request);
|
||||
cfiscsi_session_terminate(cs);
|
||||
return;
|
||||
}
|
||||
bhsdi = (struct iscsi_bhs_data_in *)
|
||||
response->ip_bhs;
|
||||
bhsdi->bhsdi_opcode =
|
||||
ISCSI_BHS_OPCODE_SCSI_DATA_IN;
|
||||
bhsdi->bhsdi_initiator_task_tag =
|
||||
bhssc->bhssc_initiator_task_tag;
|
||||
bhsdi->bhsdi_datasn =
|
||||
htonl(PDU_EXPDATASN(request));
|
||||
PDU_EXPDATASN(request)++;
|
||||
bhsdi->bhsdi_buffer_offset = htonl(off);
|
||||
}
|
||||
|
||||
if (len == 0) {
|
||||
addr = ctl_sglist[i].addr;
|
||||
len = ctl_sglist[i].len;
|
||||
KASSERT(len > 0, ("len <= 0"));
|
||||
}
|
||||
|
||||
copy_len = len;
|
||||
if (response->ip_data_len + copy_len >
|
||||
cs->cs_max_data_segment_length)
|
||||
copy_len = cs->cs_max_data_segment_length -
|
||||
response->ip_data_len;
|
||||
KASSERT(copy_len <= len, ("copy_len > len"));
|
||||
error = icl_pdu_append_data(response, addr, copy_len, M_NOWAIT);
|
||||
if (error != 0) {
|
||||
CFISCSI_SESSION_WARN(cs, "failed to "
|
||||
"allocate memory; dropping connection");
|
||||
icl_pdu_free(request);
|
||||
icl_pdu_free(response);
|
||||
cfiscsi_session_terminate(cs);
|
||||
return;
|
||||
}
|
||||
addr += copy_len;
|
||||
len -= copy_len;
|
||||
off += copy_len;
|
||||
io->scsiio.ext_data_filled += copy_len;
|
||||
|
||||
if (len == 0) {
|
||||
/*
|
||||
* End of scatter-gather segment;
|
||||
* proceed to the next one...
|
||||
*/
|
||||
if (i == ctl_sg_count - 1) {
|
||||
/*
|
||||
* ... unless this was the last one.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (response->ip_data_len ==
|
||||
cs->cs_max_data_segment_length) {
|
||||
/*
|
||||
* Can't stuff more data into the current PDU;
|
||||
* queue it. Note that's not enough to check
|
||||
* for kern_data_resid == 0 instead; there
|
||||
* may be several Data-In PDUs for the final
|
||||
* call to cfiscsi_datamove(), and we want
|
||||
* to set the F flag only on the last of them.
|
||||
*/
|
||||
if (off == io->scsiio.kern_total_len)
|
||||
bhsdi->bhsdi_flags |= BHSDI_FLAGS_F;
|
||||
KASSERT(response->ip_data_len > 0,
|
||||
("sending empty Data-In"));
|
||||
cfiscsi_pdu_queue(response);
|
||||
response = NULL;
|
||||
bhsdi = NULL;
|
||||
}
|
||||
bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs;
|
||||
bhsdi->bhsdi_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_IN;
|
||||
bhsdi->bhsdi_initiator_task_tag =
|
||||
bhssc->bhssc_initiator_task_tag;
|
||||
bhsdi->bhsdi_datasn = htonl(PDU_EXPDATASN(request));
|
||||
PDU_EXPDATASN(request)++;
|
||||
bhsdi->bhsdi_buffer_offset = htonl(off);
|
||||
}
|
||||
KASSERT(i == ctl_sg_count - 1, ("missed SG segment"));
|
||||
KASSERT(len == 0, ("missed data from SG segment"));
|
||||
if (response != NULL) {
|
||||
if (off == io->scsiio.kern_total_len) {
|
||||
bhsdi->bhsdi_flags |= BHSDI_FLAGS_F;
|
||||
} else {
|
||||
CFISCSI_SESSION_DEBUG(cs, "not setting the F flag; "
|
||||
"have %zd, need %zd", off,
|
||||
(size_t)io->scsiio.kern_total_len);
|
||||
|
||||
if (len == 0) {
|
||||
addr = ctl_sglist[i].addr;
|
||||
len = ctl_sglist[i].len;
|
||||
KASSERT(len > 0, ("len <= 0"));
|
||||
}
|
||||
|
||||
copy_len = len;
|
||||
if (response->ip_data_len + copy_len >
|
||||
cs->cs_max_data_segment_length)
|
||||
copy_len = cs->cs_max_data_segment_length -
|
||||
response->ip_data_len;
|
||||
KASSERT(copy_len <= len, ("copy_len > len"));
|
||||
error = icl_pdu_append_data(response, addr, copy_len, M_NOWAIT);
|
||||
if (error != 0) {
|
||||
CFISCSI_SESSION_WARN(cs, "failed to "
|
||||
"allocate memory; dropping connection");
|
||||
icl_pdu_free(request);
|
||||
icl_pdu_free(response);
|
||||
cfiscsi_session_terminate(cs);
|
||||
return;
|
||||
}
|
||||
addr += copy_len;
|
||||
len -= copy_len;
|
||||
off += copy_len;
|
||||
io->scsiio.ext_data_filled += copy_len;
|
||||
|
||||
if (len == 0) {
|
||||
/*
|
||||
* End of scatter-gather segment;
|
||||
* proceed to the next one...
|
||||
*/
|
||||
if (i == ctl_sg_count - 1) {
|
||||
/*
|
||||
* ... unless this was the last one.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (response->ip_data_len == cs->cs_max_data_segment_length) {
|
||||
/*
|
||||
* Can't stuff more data into the current PDU;
|
||||
* queue it. Note that's not enough to check
|
||||
* for kern_data_resid == 0 instead; there
|
||||
* may be several Data-In PDUs for the final
|
||||
* call to cfiscsi_datamove(), and we want
|
||||
* to set the F flag only on the last of them.
|
||||
*/
|
||||
if (off == io->scsiio.kern_total_len)
|
||||
bhsdi->bhsdi_flags |= BHSDI_FLAGS_F;
|
||||
KASSERT(response->ip_data_len > 0,
|
||||
("sending empty Data-In"));
|
||||
cfiscsi_pdu_queue(response);
|
||||
response = NULL;
|
||||
bhsdi = NULL;
|
||||
}
|
||||
|
||||
io->scsiio.be_move_done(io);
|
||||
} else {
|
||||
CFISCSI_SESSION_LOCK(cs);
|
||||
target_transfer_tag = cs->cs_target_transfer_tag;
|
||||
cs->cs_target_transfer_tag++;
|
||||
CFISCSI_SESSION_UNLOCK(cs);
|
||||
|
||||
#if 0
|
||||
CFISCSI_SESSION_DEBUG(cs, "expecting Data-Out with initiator "
|
||||
"task tag 0x%x, target transfer tag 0x%x",
|
||||
bhssc->bhssc_initiator_task_tag, target_transfer_tag);
|
||||
#endif
|
||||
cdw = uma_zalloc(cfiscsi_data_wait_zone, M_NOWAIT | M_ZERO);
|
||||
if (cdw == NULL) {
|
||||
CFISCSI_SESSION_WARN(cs, "failed to "
|
||||
"allocate memory; dropping connection");
|
||||
icl_pdu_free(request);
|
||||
cfiscsi_session_terminate(cs);
|
||||
}
|
||||
KASSERT(i == ctl_sg_count - 1, ("missed SG segment"));
|
||||
KASSERT(len == 0, ("missed data from SG segment"));
|
||||
if (response != NULL) {
|
||||
if (off == io->scsiio.kern_total_len) {
|
||||
bhsdi->bhsdi_flags |= BHSDI_FLAGS_F;
|
||||
} else {
|
||||
CFISCSI_SESSION_DEBUG(cs, "not setting the F flag; "
|
||||
"have %zd, need %zd", off,
|
||||
(size_t)io->scsiio.kern_total_len);
|
||||
}
|
||||
cdw->cdw_ctl_io = io;
|
||||
cdw->cdw_target_transfer_tag = htonl(target_transfer_tag);
|
||||
cdw->cdw_initiator_task_tag = bhssc->bhssc_initiator_task_tag;
|
||||
|
||||
if (cs->cs_immediate_data &&
|
||||
icl_pdu_data_segment_length(request) > 0) {
|
||||
done = cfiscsi_handle_data_segment(request, cdw);
|
||||
if (done) {
|
||||
uma_zfree(cfiscsi_data_wait_zone, cdw);
|
||||
io->scsiio.be_move_done(io);
|
||||
return;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (io->scsiio.ext_data_filled != 0)
|
||||
CFISCSI_SESSION_DEBUG(cs, "got %zd bytes of immediate data, need %zd",
|
||||
io->scsiio.ext_data_filled, io->scsiio.kern_data_len);
|
||||
#endif
|
||||
}
|
||||
|
||||
CFISCSI_SESSION_LOCK(cs);
|
||||
TAILQ_INSERT_TAIL(&cs->cs_waiting_for_data_out, cdw, cdw_next);
|
||||
CFISCSI_SESSION_UNLOCK(cs);
|
||||
|
||||
/*
|
||||
* XXX: We should limit the number of outstanding R2T PDUs
|
||||
* per task to MaxOutstandingR2T.
|
||||
*/
|
||||
response = cfiscsi_pdu_new_response(request, M_NOWAIT);
|
||||
if (response == NULL) {
|
||||
CFISCSI_SESSION_WARN(cs, "failed to "
|
||||
"allocate memory; dropping connection");
|
||||
icl_pdu_free(request);
|
||||
cfiscsi_session_terminate(cs);
|
||||
}
|
||||
bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs;
|
||||
bhsr2t->bhsr2t_opcode = ISCSI_BHS_OPCODE_R2T;
|
||||
bhsr2t->bhsr2t_flags = 0x80;
|
||||
bhsr2t->bhsr2t_lun = bhssc->bhssc_lun;
|
||||
bhsr2t->bhsr2t_initiator_task_tag =
|
||||
bhssc->bhssc_initiator_task_tag;
|
||||
bhsr2t->bhsr2t_target_transfer_tag =
|
||||
htonl(target_transfer_tag);
|
||||
/*
|
||||
* XXX: Here we assume that cfiscsi_datamove() won't ever
|
||||
* be running concurrently on several CPUs for a given
|
||||
* command.
|
||||
*/
|
||||
bhsr2t->bhsr2t_r2tsn = htonl(PDU_R2TSN(request));
|
||||
PDU_R2TSN(request)++;
|
||||
/*
|
||||
* This is the offset within the current SCSI command;
|
||||
* i.e. for the first call of datamove(), it will be 0,
|
||||
* and for subsequent ones it will be the sum of lengths
|
||||
* of previous ones.
|
||||
*
|
||||
* The ext_data_filled is to account for unsolicited
|
||||
* (immediate) data that might have already arrived.
|
||||
*/
|
||||
bhsr2t->bhsr2t_buffer_offset =
|
||||
htonl(io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled);
|
||||
/*
|
||||
* This is the total length (sum of S/G lengths) this call
|
||||
* to cfiscsi_datamove() is supposed to handle.
|
||||
*
|
||||
* XXX: Limit it to MaxBurstLength.
|
||||
*/
|
||||
bhsr2t->bhsr2t_desired_data_transfer_length =
|
||||
htonl(io->scsiio.kern_data_len - io->scsiio.ext_data_filled);
|
||||
KASSERT(response->ip_data_len > 0, ("sending empty Data-In"));
|
||||
cfiscsi_pdu_queue(response);
|
||||
}
|
||||
|
||||
io->scsiio.be_move_done(io);
|
||||
}
|
||||
|
||||
static void
|
||||
cfiscsi_datamove_out(union ctl_io *io)
|
||||
{
|
||||
struct cfiscsi_session *cs;
|
||||
struct icl_pdu *request, *response;
|
||||
const struct iscsi_bhs_scsi_command *bhssc;
|
||||
struct iscsi_bhs_r2t *bhsr2t;
|
||||
struct cfiscsi_data_wait *cdw;
|
||||
uint32_t target_transfer_tag;
|
||||
bool done;
|
||||
|
||||
request = io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr;
|
||||
cs = PDU_SESSION(request);
|
||||
|
||||
bhssc = (const struct iscsi_bhs_scsi_command *)request->ip_bhs;
|
||||
KASSERT((bhssc->bhssc_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) ==
|
||||
ISCSI_BHS_OPCODE_SCSI_COMMAND,
|
||||
("bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_COMMAND"));
|
||||
|
||||
/*
|
||||
* We need to record it so that we can properly report
|
||||
* underflow/underflow.
|
||||
*/
|
||||
PDU_TOTAL_TRANSFER_LEN(request) = io->scsiio.kern_total_len;
|
||||
|
||||
CFISCSI_SESSION_LOCK(cs);
|
||||
target_transfer_tag = cs->cs_target_transfer_tag;
|
||||
cs->cs_target_transfer_tag++;
|
||||
CFISCSI_SESSION_UNLOCK(cs);
|
||||
|
||||
#if 0
|
||||
CFISCSI_SESSION_DEBUG(cs, "expecting Data-Out with initiator "
|
||||
"task tag 0x%x, target transfer tag 0x%x",
|
||||
bhssc->bhssc_initiator_task_tag, target_transfer_tag);
|
||||
#endif
|
||||
cdw = uma_zalloc(cfiscsi_data_wait_zone, M_NOWAIT | M_ZERO);
|
||||
if (cdw == NULL) {
|
||||
CFISCSI_SESSION_WARN(cs, "failed to "
|
||||
"allocate memory; dropping connection");
|
||||
icl_pdu_free(request);
|
||||
cfiscsi_session_terminate(cs);
|
||||
}
|
||||
cdw->cdw_ctl_io = io;
|
||||
cdw->cdw_target_transfer_tag = htonl(target_transfer_tag);
|
||||
cdw->cdw_initiator_task_tag = bhssc->bhssc_initiator_task_tag;
|
||||
|
||||
if (cs->cs_immediate_data && icl_pdu_data_segment_length(request) > 0) {
|
||||
done = cfiscsi_handle_data_segment(request, cdw);
|
||||
if (done) {
|
||||
uma_zfree(cfiscsi_data_wait_zone, cdw);
|
||||
io->scsiio.be_move_done(io);
|
||||
return;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (io->scsiio.ext_data_filled != 0)
|
||||
CFISCSI_SESSION_DEBUG(cs, "got %zd bytes of immediate data, need %zd",
|
||||
io->scsiio.ext_data_filled, io->scsiio.kern_data_len);
|
||||
#endif
|
||||
}
|
||||
|
||||
CFISCSI_SESSION_LOCK(cs);
|
||||
TAILQ_INSERT_TAIL(&cs->cs_waiting_for_data_out, cdw, cdw_next);
|
||||
CFISCSI_SESSION_UNLOCK(cs);
|
||||
|
||||
/*
|
||||
* XXX: We should limit the number of outstanding R2T PDUs
|
||||
* per task to MaxOutstandingR2T.
|
||||
*/
|
||||
response = cfiscsi_pdu_new_response(request, M_NOWAIT);
|
||||
if (response == NULL) {
|
||||
CFISCSI_SESSION_WARN(cs, "failed to "
|
||||
"allocate memory; dropping connection");
|
||||
icl_pdu_free(request);
|
||||
cfiscsi_session_terminate(cs);
|
||||
}
|
||||
bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs;
|
||||
bhsr2t->bhsr2t_opcode = ISCSI_BHS_OPCODE_R2T;
|
||||
bhsr2t->bhsr2t_flags = 0x80;
|
||||
bhsr2t->bhsr2t_lun = bhssc->bhssc_lun;
|
||||
bhsr2t->bhsr2t_initiator_task_tag = bhssc->bhssc_initiator_task_tag;
|
||||
bhsr2t->bhsr2t_target_transfer_tag = htonl(target_transfer_tag);
|
||||
/*
|
||||
* XXX: Here we assume that cfiscsi_datamove() won't ever
|
||||
* be running concurrently on several CPUs for a given
|
||||
* command.
|
||||
*/
|
||||
bhsr2t->bhsr2t_r2tsn = htonl(PDU_R2TSN(request));
|
||||
PDU_R2TSN(request)++;
|
||||
/*
|
||||
* This is the offset within the current SCSI command;
|
||||
* i.e. for the first call of datamove(), it will be 0,
|
||||
* and for subsequent ones it will be the sum of lengths
|
||||
* of previous ones.
|
||||
*
|
||||
* The ext_data_filled is to account for unsolicited
|
||||
* (immediate) data that might have already arrived.
|
||||
*/
|
||||
bhsr2t->bhsr2t_buffer_offset =
|
||||
htonl(io->scsiio.kern_rel_offset + io->scsiio.ext_data_filled);
|
||||
/*
|
||||
* This is the total length (sum of S/G lengths) this call
|
||||
* to cfiscsi_datamove() is supposed to handle.
|
||||
*
|
||||
* XXX: Limit it to MaxBurstLength.
|
||||
*/
|
||||
bhsr2t->bhsr2t_desired_data_transfer_length =
|
||||
htonl(io->scsiio.kern_data_len - io->scsiio.ext_data_filled);
|
||||
cfiscsi_pdu_queue(response);
|
||||
}
|
||||
|
||||
static void
|
||||
cfiscsi_datamove(union ctl_io *io)
|
||||
{
|
||||
|
||||
if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
|
||||
cfiscsi_datamove_in(io);
|
||||
else
|
||||
cfiscsi_datamove_out(io);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -2920,7 +2920,6 @@ kern/tty_pts.c standard
|
||||
kern/tty_tty.c standard
|
||||
kern/tty_ttydisc.c standard
|
||||
kern/uipc_accf.c optional inet
|
||||
kern/uipc_cow.c optional socket_send_cow
|
||||
kern/uipc_debug.c optional ddb
|
||||
kern/uipc_domain.c standard
|
||||
kern/uipc_mbuf.c standard
|
||||
|
@ -2883,6 +2883,9 @@ igb_setup_msix(struct adapter *adapter)
|
||||
if (queues > maxqueues)
|
||||
queues = maxqueues;
|
||||
|
||||
/* reflect correct sysctl value */
|
||||
igb_num_queues = queues;
|
||||
|
||||
/*
|
||||
** One vector (RX/TX pair) per queue
|
||||
** plus an additional for Link interrupt
|
||||
|
@ -29,6 +29,8 @@
|
||||
/**
|
||||
* Implements low-level interactions with Hypver-V/Azure
|
||||
*/
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/malloc.h>
|
||||
@ -88,6 +90,14 @@ hv_vmbus_query_hypervisor_presence(void)
|
||||
{
|
||||
u_int regs[4];
|
||||
int hyper_v_detected = 0;
|
||||
|
||||
/*
|
||||
* When Xen is detected and native Xen PV support is enabled,
|
||||
* ignore Xen's HyperV emulation.
|
||||
*/
|
||||
if (vm_guest == VM_GUEST_XEN)
|
||||
return (0);
|
||||
|
||||
do_cpuid(1, regs);
|
||||
if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */
|
||||
/* make sure this really is Hyper-V */
|
||||
|
@ -234,6 +234,9 @@ MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
|
||||
** TUNEABLE PARAMETERS:
|
||||
*/
|
||||
|
||||
static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
|
||||
"IXGBE driver parameters");
|
||||
|
||||
/*
|
||||
** AIM: Adaptive Interrupt Moderation
|
||||
** which means that the interrupt rate
|
||||
@ -242,17 +245,29 @@ MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
|
||||
*/
|
||||
static int ixgbe_enable_aim = TRUE;
|
||||
TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
|
||||
SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RW, &ixgbe_enable_aim, 0,
|
||||
"Enable adaptive interrupt moderation");
|
||||
|
||||
static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
|
||||
TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
|
||||
SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
|
||||
&ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
|
||||
|
||||
/* How many packets rxeof tries to clean at a time */
|
||||
static int ixgbe_rx_process_limit = 256;
|
||||
TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
|
||||
SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
|
||||
&ixgbe_rx_process_limit, 0,
|
||||
"Maximum number of received packets to process at a time,"
|
||||
"-1 means unlimited");
|
||||
|
||||
/* How many packets txeof tries to clean at a time */
|
||||
static int ixgbe_tx_process_limit = 256;
|
||||
TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
|
||||
SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
|
||||
&ixgbe_tx_process_limit, 0,
|
||||
"Maximum number of sent packets to process at a time,"
|
||||
"-1 means unlimited");
|
||||
|
||||
/*
|
||||
** Smart speed setting, default to on
|
||||
@ -269,6 +284,8 @@ static int ixgbe_smart_speed = ixgbe_smart_speed_on;
|
||||
*/
|
||||
static int ixgbe_enable_msix = 1;
|
||||
TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
|
||||
SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
|
||||
"Enable MSI-X interrupts");
|
||||
|
||||
/*
|
||||
* Number of Queues, can be set to 0,
|
||||
@ -278,6 +295,8 @@ TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
|
||||
*/
|
||||
static int ixgbe_num_queues = 0;
|
||||
TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
|
||||
SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
|
||||
"Number of queues to configure, 0 indicates autoconfigure");
|
||||
|
||||
/*
|
||||
** Number of TX descriptors per ring,
|
||||
@ -286,10 +305,14 @@ TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
|
||||
*/
|
||||
static int ixgbe_txd = PERFORM_TXD;
|
||||
TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
|
||||
SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
|
||||
"Number of receive descriptors per queue");
|
||||
|
||||
/* Number of RX descriptors per ring */
|
||||
static int ixgbe_rxd = PERFORM_RXD;
|
||||
TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
|
||||
SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
|
||||
"Number of receive descriptors per queue");
|
||||
|
||||
/*
|
||||
** Defining this on will allow the use
|
||||
@ -2442,6 +2465,9 @@ ixgbe_setup_msix(struct adapter *adapter)
|
||||
else if ((ixgbe_num_queues == 0) && (queues > 8))
|
||||
queues = 8;
|
||||
|
||||
/* reflect correct sysctl value */
|
||||
ixgbe_num_queues = queues;
|
||||
|
||||
/*
|
||||
** Want one vector (RX/TX pair) per queue
|
||||
** plus an additional for Link.
|
||||
|
@ -1700,9 +1700,9 @@ vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
|
||||
rxq->vtnrx_stats.vrxs_ipackets++;
|
||||
rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
|
||||
|
||||
/* VTNET_RXQ_UNLOCK(rxq); */
|
||||
VTNET_RXQ_UNLOCK(rxq);
|
||||
(*ifp->if_input)(ifp, m);
|
||||
/* VTNET_RXQ_LOCK(rxq); */
|
||||
VTNET_RXQ_LOCK(rxq);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1782,6 +1782,10 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq)
|
||||
m_adj(m, adjsz);
|
||||
|
||||
vtnet_rxq_input(rxq, m, hdr);
|
||||
|
||||
/* Must recheck after dropping the Rx lock. */
|
||||
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (deq > 0)
|
||||
|
@ -396,7 +396,7 @@ xentimer_et_start(struct eventtimer *et,
|
||||
{
|
||||
int error = 0, i = 0;
|
||||
struct xentimer_softc *sc = et->et_priv;
|
||||
int cpu = PCPU_GET(acpi_id);
|
||||
int cpu = PCPU_GET(vcpu_id);
|
||||
struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu);
|
||||
uint64_t first_in_ns, next_time;
|
||||
|
||||
@ -433,7 +433,7 @@ xentimer_et_start(struct eventtimer *et,
|
||||
static int
|
||||
xentimer_et_stop(struct eventtimer *et)
|
||||
{
|
||||
int cpu = PCPU_GET(acpi_id);
|
||||
int cpu = PCPU_GET(vcpu_id);
|
||||
struct xentimer_pcpu_data *pcpu = DPCPU_PTR(xentimer_pcpu);
|
||||
|
||||
pcpu->timer = 0;
|
||||
|
@ -62,13 +62,13 @@ struct shadow_time_info {
|
||||
vm_paddr_t *pc_pdir_shadow; \
|
||||
uint64_t pc_processed_system_time; \
|
||||
struct shadow_time_info pc_shadow_time; \
|
||||
char __pad[189]
|
||||
char __pad[185]
|
||||
|
||||
#else /* !XEN */
|
||||
|
||||
#define PCPU_XEN_FIELDS \
|
||||
; \
|
||||
char __pad[237]
|
||||
char __pad[233]
|
||||
|
||||
#endif
|
||||
|
||||
@ -84,7 +84,8 @@ struct shadow_time_info {
|
||||
u_int pc_acpi_id; /* ACPI CPU id */ \
|
||||
u_int pc_apic_id; \
|
||||
int pc_private_tss; /* Flag indicating private tss*/\
|
||||
u_int pc_cmci_mask /* MCx banks for CMCI */ \
|
||||
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
|
||||
u_int pc_vcpu_id /* Xen vCPU ID */ \
|
||||
PCPU_XEN_FIELDS
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
@ -783,13 +783,7 @@ start_all_aps(void)
|
||||
dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
|
||||
M_WAITOK | M_ZERO), bootAP);
|
||||
pc->pc_apic_id = cpu_apic_ids[bootAP];
|
||||
/*
|
||||
* The i386 PV port uses the apic_id as vCPU id, but the
|
||||
* PVHVM port needs to use the acpi_id, so set it for PV
|
||||
* also in order to work with shared devices between PV
|
||||
* and PVHVM.
|
||||
*/
|
||||
pc->pc_acpi_id = cpu_apic_ids[bootAP];
|
||||
pc->pc_vcpu_id = cpu_apic_ids[bootAP];
|
||||
pc->pc_prvspace = pc;
|
||||
pc->pc_curthread = 0;
|
||||
|
||||
|
@ -88,7 +88,7 @@ mptable_setup_local(void)
|
||||
{
|
||||
|
||||
PCPU_SET(apic_id, 0);
|
||||
PCPU_SET(acpi_id, 0);
|
||||
PCPU_SET(vcpu_id, 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/vmmeter.h>
|
||||
#include <sys/proc.h>
|
||||
@ -699,10 +698,10 @@ kmeminit(void)
|
||||
* VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
|
||||
* available.
|
||||
*
|
||||
* Note that the kmem_map is also used by the zone allocator,
|
||||
* Note that the kmem arena is also used by the zone allocator,
|
||||
* so make sure that there is enough space.
|
||||
*/
|
||||
vm_kmem_size = VM_KMEM_SIZE + nmbclusters * PAGE_SIZE;
|
||||
vm_kmem_size = VM_KMEM_SIZE;
|
||||
mem_size = cnt.v_page_count;
|
||||
|
||||
#if defined(VM_KMEM_SIZE_SCALE)
|
||||
|
@ -1,182 +0,0 @@
|
||||
/*--
|
||||
* Copyright (c) 1997, Duke University
|
||||
* All rights reserved.
|
||||
*
|
||||
* Author:
|
||||
* Andrew Gallatin <gallatin@cs.duke.edu>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of Duke University may not be used to endorse or promote
|
||||
* products derived from this software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY DUKE UNIVERSITY ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DUKE UNIVERSITY BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITSOR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This is a set of routines for enabling and disabling copy on write
|
||||
* protection for data written into sockets.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/sf_buf.h>
|
||||
#include <sys/socketvar.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_object.h>
|
||||
|
||||
FEATURE(zero_copy_sockets, "Zero copy sockets support");
|
||||
|
||||
struct netsend_cow_stats {
|
||||
int attempted;
|
||||
int fail_not_mapped;
|
||||
int fail_sf_buf;
|
||||
int success;
|
||||
int iodone;
|
||||
};
|
||||
|
||||
static struct netsend_cow_stats socow_stats;
|
||||
|
||||
static int socow_iodone(struct mbuf *m, void *addr, void *args);
|
||||
|
||||
static int
|
||||
socow_iodone(struct mbuf *m, void *addr, void *args)
|
||||
{
|
||||
struct sf_buf *sf;
|
||||
vm_page_t pp;
|
||||
|
||||
sf = args;
|
||||
pp = sf_buf_page(sf);
|
||||
sf_buf_free(sf);
|
||||
/* remove COW mapping */
|
||||
vm_page_lock(pp);
|
||||
vm_page_cowclear(pp);
|
||||
vm_page_unwire(pp, 0);
|
||||
/*
|
||||
* Check for the object going away on us. This can
|
||||
* happen since we don't hold a reference to it.
|
||||
* If so, we're responsible for freeing the page.
|
||||
*/
|
||||
if (pp->wire_count == 0 && pp->object == NULL)
|
||||
vm_page_free(pp);
|
||||
vm_page_unlock(pp);
|
||||
socow_stats.iodone++;
|
||||
return (EXT_FREE_OK);
|
||||
}
|
||||
|
||||
int
|
||||
socow_setup(struct mbuf *m0, struct uio *uio)
|
||||
{
|
||||
struct sf_buf *sf;
|
||||
vm_page_t pp;
|
||||
struct iovec *iov;
|
||||
struct vmspace *vmspace;
|
||||
struct vm_map *map;
|
||||
vm_offset_t offset, uva;
|
||||
vm_size_t len;
|
||||
|
||||
socow_stats.attempted++;
|
||||
vmspace = curproc->p_vmspace;
|
||||
map = &vmspace->vm_map;
|
||||
uva = (vm_offset_t) uio->uio_iov->iov_base;
|
||||
offset = uva & PAGE_MASK;
|
||||
len = PAGE_SIZE - offset;
|
||||
|
||||
/*
|
||||
* Verify that access to the given address is allowed from user-space.
|
||||
*/
|
||||
if (vm_fault_quick_hold_pages(map, uva, len, VM_PROT_READ, &pp, 1) <
|
||||
0) {
|
||||
socow_stats.fail_not_mapped++;
|
||||
return(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* set up COW
|
||||
*/
|
||||
vm_page_lock(pp);
|
||||
if (vm_page_cowsetup(pp) != 0) {
|
||||
vm_page_unhold(pp);
|
||||
vm_page_unlock(pp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* wire the page for I/O
|
||||
*/
|
||||
vm_page_wire(pp);
|
||||
vm_page_unhold(pp);
|
||||
vm_page_unlock(pp);
|
||||
/*
|
||||
* Allocate an sf buf
|
||||
*/
|
||||
sf = sf_buf_alloc(pp, SFB_CATCH);
|
||||
if (sf == NULL) {
|
||||
vm_page_lock(pp);
|
||||
vm_page_cowclear(pp);
|
||||
vm_page_unwire(pp, 0);
|
||||
/*
|
||||
* Check for the object going away on us. This can
|
||||
* happen since we don't hold a reference to it.
|
||||
* If so, we're responsible for freeing the page.
|
||||
*/
|
||||
if (pp->wire_count == 0 && pp->object == NULL)
|
||||
vm_page_free(pp);
|
||||
vm_page_unlock(pp);
|
||||
socow_stats.fail_sf_buf++;
|
||||
return(0);
|
||||
}
|
||||
/*
|
||||
* attach to mbuf
|
||||
*/
|
||||
MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone,
|
||||
(void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF);
|
||||
m0->m_len = len;
|
||||
m0->m_data = (caddr_t)sf_buf_kva(sf) + offset;
|
||||
socow_stats.success++;
|
||||
|
||||
iov = uio->uio_iov;
|
||||
iov->iov_base = (char *)iov->iov_base + m0->m_len;
|
||||
iov->iov_len -= m0->m_len;
|
||||
uio->uio_resid -= m0->m_len;
|
||||
uio->uio_offset += m0->m_len;
|
||||
if (iov->iov_len == 0) {
|
||||
uio->uio_iov++;
|
||||
uio->uio_iovcnt--;
|
||||
}
|
||||
|
||||
return(m0->m_len);
|
||||
}
|
@ -575,4 +575,4 @@ madt_set_ids(void *dummy)
|
||||
la->la_acpi_id);
|
||||
}
|
||||
}
|
||||
SYSINIT(madt_set_ids, SI_SUB_CPU, SI_ORDER_ANY, madt_set_ids, NULL);
|
||||
SYSINIT(madt_set_ids, SI_SUB_CPU, SI_ORDER_MIDDLE, madt_set_ids, NULL);
|
||||
|
@ -700,6 +700,7 @@ xen_hvm_init(enum xen_hvm_init_type init_type)
|
||||
|
||||
setup_xen_features();
|
||||
cpu_ops = xen_hvm_cpu_ops;
|
||||
vm_guest = VM_GUEST_XEN;
|
||||
break;
|
||||
case XEN_HVM_INIT_RESUME:
|
||||
if (error != 0)
|
||||
@ -742,6 +743,22 @@ xen_hvm_sysinit(void *arg __unused)
|
||||
xen_hvm_init(XEN_HVM_INIT_COLD);
|
||||
}
|
||||
|
||||
static void
|
||||
xen_set_vcpu_id(void)
|
||||
{
|
||||
struct pcpu *pc;
|
||||
int i;
|
||||
|
||||
/* Set vcpu_id to acpi_id */
|
||||
CPU_FOREACH(i) {
|
||||
pc = pcpu_find(i);
|
||||
pc->pc_vcpu_id = pc->pc_acpi_id;
|
||||
if (bootverbose)
|
||||
printf("XEN: CPU %u has VCPU ID %u\n",
|
||||
i, pc->pc_vcpu_id);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
xen_hvm_cpu_init(void)
|
||||
{
|
||||
@ -762,7 +779,7 @@ xen_hvm_cpu_init(void)
|
||||
}
|
||||
|
||||
vcpu_info = DPCPU_PTR(vcpu_local_info);
|
||||
cpu = PCPU_GET(acpi_id);
|
||||
cpu = PCPU_GET(vcpu_id);
|
||||
info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
|
||||
info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
|
||||
|
||||
@ -778,3 +795,4 @@ SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
|
||||
SYSINIT(xen_setup_cpus, SI_SUB_SMP, SI_ORDER_FIRST, xen_setup_cpus, NULL);
|
||||
#endif
|
||||
SYSINIT(xen_hvm_cpu_init, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_cpu_init, NULL);
|
||||
SYSINIT(xen_set_vcpu_id, SI_SUB_CPU, SI_ORDER_ANY, xen_set_vcpu_id, NULL);
|
||||
|
@ -611,9 +611,9 @@ xen_rebind_ipi(struct xenisrc *isrc)
|
||||
{
|
||||
#ifdef SMP
|
||||
int cpu = isrc->xi_cpu;
|
||||
int acpi_id = pcpu_find(cpu)->pc_acpi_id;
|
||||
int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
|
||||
int error;
|
||||
struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
|
||||
struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id };
|
||||
|
||||
error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
|
||||
&bind_ipi);
|
||||
@ -640,10 +640,10 @@ static void
|
||||
xen_rebind_virq(struct xenisrc *isrc)
|
||||
{
|
||||
int cpu = isrc->xi_cpu;
|
||||
int acpi_id = pcpu_find(cpu)->pc_acpi_id;
|
||||
int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
|
||||
int error;
|
||||
struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq,
|
||||
.vcpu = acpi_id };
|
||||
.vcpu = vcpu_id };
|
||||
|
||||
error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
|
||||
&bind_virq);
|
||||
@ -796,7 +796,7 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
|
||||
#ifdef SMP
|
||||
struct evtchn_bind_vcpu bind_vcpu;
|
||||
struct xenisrc *isrc;
|
||||
u_int to_cpu, acpi_id;
|
||||
u_int to_cpu, vcpu_id;
|
||||
int error;
|
||||
|
||||
#ifdef XENHVM
|
||||
@ -805,7 +805,7 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
|
||||
#endif
|
||||
|
||||
to_cpu = apic_cpuid(apic_id);
|
||||
acpi_id = pcpu_find(to_cpu)->pc_acpi_id;
|
||||
vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id;
|
||||
xen_intr_intrcnt_add(to_cpu);
|
||||
|
||||
mtx_lock(&xen_intr_isrc_lock);
|
||||
@ -830,7 +830,7 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
|
||||
}
|
||||
|
||||
bind_vcpu.port = isrc->xi_port;
|
||||
bind_vcpu.vcpu = acpi_id;
|
||||
bind_vcpu.vcpu = vcpu_id;
|
||||
|
||||
/*
|
||||
* Allow interrupts to be fielded on the new VCPU before
|
||||
@ -1063,9 +1063,9 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
|
||||
driver_filter_t filter, driver_intr_t handler, void *arg,
|
||||
enum intr_type flags, xen_intr_handle_t *port_handlep)
|
||||
{
|
||||
int acpi_id = pcpu_find(cpu)->pc_acpi_id;
|
||||
int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
|
||||
struct xenisrc *isrc;
|
||||
struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = acpi_id };
|
||||
struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = vcpu_id };
|
||||
int error;
|
||||
|
||||
/* Ensure the target CPU is ready to handle evtchn interrupts. */
|
||||
@ -1126,9 +1126,9 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu,
|
||||
xen_intr_handle_t *port_handlep)
|
||||
{
|
||||
#ifdef SMP
|
||||
int acpi_id = pcpu_find(cpu)->pc_acpi_id;
|
||||
int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
|
||||
struct xenisrc *isrc;
|
||||
struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
|
||||
struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id };
|
||||
int error;
|
||||
|
||||
/* Ensure the target CPU is ready to handle evtchn interrupts. */
|
||||
|
@ -101,7 +101,7 @@ struct bhyvestats {
|
||||
uint64_t vmexit_hlt;
|
||||
uint64_t vmexit_pause;
|
||||
uint64_t vmexit_mtrap;
|
||||
uint64_t vmexit_paging;
|
||||
uint64_t vmexit_inst_emul;
|
||||
uint64_t cpu_switch_rotate;
|
||||
uint64_t cpu_switch_direct;
|
||||
int io_reset;
|
||||
@ -208,14 +208,12 @@ fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
|
||||
vmexit[vcpu].rip = rip;
|
||||
vmexit[vcpu].inst_length = 0;
|
||||
|
||||
if (vcpu == BSP) {
|
||||
mt_vmm_info[vcpu].mt_ctx = ctx;
|
||||
mt_vmm_info[vcpu].mt_vcpu = vcpu;
|
||||
|
||||
error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
|
||||
fbsdrun_start_thread, &mt_vmm_info[vcpu]);
|
||||
assert(error == 0);
|
||||
}
|
||||
mt_vmm_info[vcpu].mt_ctx = ctx;
|
||||
mt_vmm_info[vcpu].mt_vcpu = vcpu;
|
||||
|
||||
error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
|
||||
fbsdrun_start_thread, &mt_vmm_info[vcpu]);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static int
|
||||
@ -385,13 +383,13 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
}
|
||||
|
||||
static int
|
||||
vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
int err;
|
||||
stats.vmexit_paging++;
|
||||
stats.vmexit_inst_emul++;
|
||||
|
||||
err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa,
|
||||
&vmexit->u.paging.vie);
|
||||
err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
|
||||
&vmexit->u.inst_emul.vie);
|
||||
|
||||
if (err) {
|
||||
if (err == EINVAL) {
|
||||
@ -400,7 +398,7 @@ vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
vmexit->rip);
|
||||
} else if (err == ESRCH) {
|
||||
fprintf(stderr, "Unhandled memory access to 0x%lx\n",
|
||||
vmexit->u.paging.gpa);
|
||||
vmexit->u.inst_emul.gpa);
|
||||
}
|
||||
|
||||
return (VMEXIT_ABORT);
|
||||
@ -416,7 +414,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
|
||||
[VM_EXITCODE_RDMSR] = vmexit_rdmsr,
|
||||
[VM_EXITCODE_WRMSR] = vmexit_wrmsr,
|
||||
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
|
||||
[VM_EXITCODE_PAGING] = vmexit_paging,
|
||||
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
|
||||
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
|
||||
};
|
||||
|
||||
|
@ -1048,7 +1048,7 @@ init_pci(struct vmctx *ctx)
|
||||
* Accesses to memory addresses that are not allocated to system
|
||||
* memory or PCI devices return 0xff's.
|
||||
*/
|
||||
error = vm_get_memory_seg(ctx, 0, &lowmem);
|
||||
error = vm_get_memory_seg(ctx, 0, &lowmem, NULL);
|
||||
assert(error == 0);
|
||||
|
||||
memset(&memp, 0, sizeof(struct mem_range));
|
||||
|
@ -341,14 +341,14 @@ rtc_init(struct vmctx *ctx)
|
||||
* 0x34/0x35 - 64KB chunks above 16MB, below 4GB
|
||||
* 0x5b/0x5c/0x5d - 64KB chunks above 4GB
|
||||
*/
|
||||
err = vm_get_memory_seg(ctx, 0, &lomem);
|
||||
err = vm_get_memory_seg(ctx, 0, &lomem, NULL);
|
||||
assert(err == 0);
|
||||
|
||||
lomem = (lomem - m_16MB) / m_64KB;
|
||||
rtc_nvram[nvoff(RTC_LMEM_LSB)] = lomem;
|
||||
rtc_nvram[nvoff(RTC_LMEM_MSB)] = lomem >> 8;
|
||||
|
||||
if (vm_get_memory_seg(ctx, m_4GB, &himem) == 0) {
|
||||
if (vm_get_memory_seg(ctx, m_4GB, &himem, NULL) == 0) {
|
||||
himem /= m_64KB;
|
||||
rtc_nvram[nvoff(RTC_HMEM_LSB)] = himem;
|
||||
rtc_nvram[nvoff(RTC_HMEM_SB)] = himem >> 8;
|
||||
|
@ -188,12 +188,13 @@ usage(void)
|
||||
" [--unassign-pptdev=<bus/slot/func>]\n"
|
||||
" [--set-mem=<memory in units of MB>]\n"
|
||||
" [--get-lowmem]\n"
|
||||
" [--get-highmem]\n",
|
||||
" [--get-highmem]\n"
|
||||
" [--get-gpa-pmap]\n",
|
||||
progname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static int get_stats, getcap, setcap, capval;
|
||||
static int get_stats, getcap, setcap, capval, get_gpa_pmap;
|
||||
static const char *capname;
|
||||
static int create, destroy, get_lowmem, get_highmem;
|
||||
static uint64_t memsize;
|
||||
@ -377,18 +378,20 @@ enum {
|
||||
SET_CAP,
|
||||
CAPNAME,
|
||||
UNASSIGN_PPTDEV,
|
||||
GET_GPA_PMAP,
|
||||
};
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
char *vmname;
|
||||
int error, ch, vcpu;
|
||||
vm_paddr_t gpa;
|
||||
int error, ch, vcpu, ptenum;
|
||||
vm_paddr_t gpa, gpa_pmap;
|
||||
size_t len;
|
||||
struct vm_exit vmexit;
|
||||
uint64_t ctl, eptp, bm, addr, u64;
|
||||
uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte;
|
||||
struct vmctx *ctx;
|
||||
int wired;
|
||||
|
||||
uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat;
|
||||
uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp;
|
||||
@ -427,6 +430,7 @@ main(int argc, char *argv[])
|
||||
{ "capname", REQ_ARG, 0, CAPNAME },
|
||||
{ "unassign-pptdev", REQ_ARG, 0, UNASSIGN_PPTDEV },
|
||||
{ "setcap", REQ_ARG, 0, SET_CAP },
|
||||
{ "get-gpa-pmap", REQ_ARG, 0, GET_GPA_PMAP },
|
||||
{ "getcap", NO_ARG, &getcap, 1 },
|
||||
{ "get-stats", NO_ARG, &get_stats, 1 },
|
||||
{ "get-desc-ds",NO_ARG, &get_desc_ds, 1 },
|
||||
@ -666,6 +670,10 @@ main(int argc, char *argv[])
|
||||
capval = strtoul(optarg, NULL, 0);
|
||||
setcap = 1;
|
||||
break;
|
||||
case GET_GPA_PMAP:
|
||||
gpa_pmap = strtoul(optarg, NULL, 0);
|
||||
get_gpa_pmap = 1;
|
||||
break;
|
||||
case CAPNAME:
|
||||
capname = optarg;
|
||||
break;
|
||||
@ -819,16 +827,18 @@ main(int argc, char *argv[])
|
||||
|
||||
if (!error && (get_lowmem || get_all)) {
|
||||
gpa = 0;
|
||||
error = vm_get_memory_seg(ctx, gpa, &len);
|
||||
error = vm_get_memory_seg(ctx, gpa, &len, &wired);
|
||||
if (error == 0)
|
||||
printf("lowmem\t\t0x%016lx/%ld\n", gpa, len);
|
||||
printf("lowmem\t\t0x%016lx/%ld%s\n", gpa, len,
|
||||
wired ? " wired" : "");
|
||||
}
|
||||
|
||||
if (!error && (get_highmem || get_all)) {
|
||||
gpa = 4 * GB;
|
||||
error = vm_get_memory_seg(ctx, gpa, &len);
|
||||
error = vm_get_memory_seg(ctx, gpa, &len, &wired);
|
||||
if (error == 0)
|
||||
printf("highmem\t\t0x%016lx/%ld\n", gpa, len);
|
||||
printf("highmem\t\t0x%016lx/%ld%s\n", gpa, len,
|
||||
wired ? " wired" : "");
|
||||
}
|
||||
|
||||
if (!error && (get_efer || get_all)) {
|
||||
@ -1457,6 +1467,17 @@ main(int argc, char *argv[])
|
||||
printf("Capability \"%s\" is not available\n", capname);
|
||||
}
|
||||
|
||||
if (!error && get_gpa_pmap) {
|
||||
error = vm_get_gpa_pmap(ctx, gpa_pmap, pteval, &ptenum);
|
||||
if (error == 0) {
|
||||
printf("gpa %#lx:", gpa_pmap);
|
||||
pte = &pteval[0];
|
||||
while (ptenum-- > 0)
|
||||
printf(" %#lx", *pte++);
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (!error && (getcap || get_all)) {
|
||||
int captype, val, getcaptype;
|
||||
|
||||
|
@ -492,8 +492,8 @@ static void
|
||||
cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem)
|
||||
{
|
||||
|
||||
vm_get_memory_seg(ctx, 0, ret_lowmem);
|
||||
vm_get_memory_seg(ctx, 4 * GB, ret_highmem);
|
||||
vm_get_memory_seg(ctx, 0, ret_lowmem, NULL);
|
||||
vm_get_memory_seg(ctx, 4 * GB, ret_highmem, NULL);
|
||||
}
|
||||
|
||||
static const char *
|
||||
|
Loading…
x
Reference in New Issue
Block a user