Mark Johnston 927aa5fefd bhyve: Fix getaddrinfo() error handling
- Use errx() since errno will not be set.
- Print the message returned by gai_strerror().

MFC after:	1 week
Sponsored by:	The FreeBSD Foundation
2022-02-08 12:40:41 -05:00

1905 lines
38 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#ifndef WITHOUT_CAPSICUM
#include <sys/capsicum.h>
#endif
#include <sys/endian.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <machine/atomic.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
#include <netinet/in.h>
#include <assert.h>
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <pthread.h>
#include <pthread_np.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <vmmapi.h>
#include "bhyverun.h"
#include "config.h"
#include "gdb.h"
#include "mem.h"
#include "mevent.h"
/*
* GDB_SIGNAL_* numbers are part of the GDB remote protocol. Most stops
* use SIGTRAP.
*/
#define GDB_SIGNAL_TRAP 5
static void gdb_resume_vcpus(void);
static void check_command(int fd);
static struct mevent *read_event, *write_event;
static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
static pthread_mutex_t gdb_lock;
static pthread_cond_t idle_vcpus;
static bool first_stop, report_next_stop, swbreak_enabled;
/*
* An I/O buffer contains 'capacity' bytes of room at 'data'. For a
* read buffer, 'start' is unused and 'len' contains the number of
* valid bytes in the buffer. For a write buffer, 'start' is set to
* the index of the next byte in 'data' to send, and 'len' contains
* the remaining number of valid bytes to send.
*/
struct io_buffer {
uint8_t *data;
size_t capacity;
size_t start;
size_t len;
};
struct breakpoint {
uint64_t gpa;
uint8_t shadow_inst;
TAILQ_ENTRY(breakpoint) link;
};
/*
* When a vCPU stops to due to an event that should be reported to the
* debugger, information about the event is stored in this structure.
* The vCPU thread then sets 'stopped_vcpu' if it is not already set
* and stops other vCPUs so the event can be reported. The
* report_stop() function reports the event for the 'stopped_vcpu'
* vCPU. When the debugger resumes execution via continue or step,
* the event for 'stopped_vcpu' is cleared. vCPUs will loop in their
* event handlers until the associated event is reported or disabled.
*
* An idle vCPU will have all of the boolean fields set to false.
*
* When a vCPU is stepped, 'stepping' is set to true when the vCPU is
* released to execute the stepped instruction. When the vCPU reports
* the stepping trap, 'stepped' is set.
*
* When a vCPU hits a breakpoint set by the debug server,
* 'hit_swbreak' is set to true.
*/
struct vcpu_state {
bool stepping;
bool stepped;
bool hit_swbreak;
};
static struct io_buffer cur_comm, cur_resp;
static uint8_t cur_csum;
static struct vmctx *ctx;
static int cur_fd = -1;
static TAILQ_HEAD(, breakpoint) breakpoints;
static struct vcpu_state *vcpu_state;
static int cur_vcpu, stopped_vcpu;
static bool gdb_active = false;
const int gdb_regset[] = {
VM_REG_GUEST_RAX,
VM_REG_GUEST_RBX,
VM_REG_GUEST_RCX,
VM_REG_GUEST_RDX,
VM_REG_GUEST_RSI,
VM_REG_GUEST_RDI,
VM_REG_GUEST_RBP,
VM_REG_GUEST_RSP,
VM_REG_GUEST_R8,
VM_REG_GUEST_R9,
VM_REG_GUEST_R10,
VM_REG_GUEST_R11,
VM_REG_GUEST_R12,
VM_REG_GUEST_R13,
VM_REG_GUEST_R14,
VM_REG_GUEST_R15,
VM_REG_GUEST_RIP,
VM_REG_GUEST_RFLAGS,
VM_REG_GUEST_CS,
VM_REG_GUEST_SS,
VM_REG_GUEST_DS,
VM_REG_GUEST_ES,
VM_REG_GUEST_FS,
VM_REG_GUEST_GS
};
const int gdb_regsize[] = {
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
8,
4,
4,
4,
4,
4,
4,
4
};
#ifdef GDB_LOG
#include <stdarg.h>
#include <stdio.h>
static void __printflike(1, 2)
debug(const char *fmt, ...)
{
static FILE *logfile;
va_list ap;
if (logfile == NULL) {
logfile = fopen("/tmp/bhyve_gdb.log", "w");
if (logfile == NULL)
return;
#ifndef WITHOUT_CAPSICUM
if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
fclose(logfile);
logfile = NULL;
return;
}
#endif
setlinebuf(logfile);
}
va_start(ap, fmt);
vfprintf(logfile, fmt, ap);
va_end(ap);
}
#else
#define debug(...)
#endif
static void remove_all_sw_breakpoints(void);
static int
guest_paging_info(int vcpu, struct vm_guest_paging *paging)
{
uint64_t regs[4];
const int regset[4] = {
VM_REG_GUEST_CR0,
VM_REG_GUEST_CR3,
VM_REG_GUEST_CR4,
VM_REG_GUEST_EFER
};
if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
return (-1);
/*
* For the debugger, always pretend to be the kernel (CPL 0),
* and if long-mode is enabled, always parse addresses as if
* in 64-bit mode.
*/
paging->cr3 = regs[1];
paging->cpl = 0;
if (regs[3] & EFER_LMA)
paging->cpu_mode = CPU_MODE_64BIT;
else if (regs[0] & CR0_PE)
paging->cpu_mode = CPU_MODE_PROTECTED;
else
paging->cpu_mode = CPU_MODE_REAL;
if (!(regs[0] & CR0_PG))
paging->paging_mode = PAGING_MODE_FLAT;
else if (!(regs[2] & CR4_PAE))
paging->paging_mode = PAGING_MODE_32;
else if (regs[3] & EFER_LME)
paging->paging_mode = (regs[2] & CR4_LA57) ?
PAGING_MODE_64_LA57 : PAGING_MODE_64;
else
paging->paging_mode = PAGING_MODE_PAE;
return (0);
}
/*
* Map a guest virtual address to a physical address (for a given vcpu).
* If a guest virtual address is valid, return 1. If the address is
* not valid, return 0. If an error occurs obtaining the mapping,
* return -1.
*/
static int
guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
{
struct vm_guest_paging paging;
int fault;
if (guest_paging_info(vcpu, &paging) == -1)
return (-1);
/*
* Always use PROT_READ. We really care if the VA is
* accessible, not if the current vCPU can write.
*/
if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
&fault) == -1)
return (-1);
if (fault)
return (0);
return (1);
}
static void
io_buffer_reset(struct io_buffer *io)
{
io->start = 0;
io->len = 0;
}
/* Available room for adding data. */
static size_t
io_buffer_avail(struct io_buffer *io)
{
return (io->capacity - (io->start + io->len));
}
static uint8_t *
io_buffer_head(struct io_buffer *io)
{
return (io->data + io->start);
}
static uint8_t *
io_buffer_tail(struct io_buffer *io)
{
return (io->data + io->start + io->len);
}
static void
io_buffer_advance(struct io_buffer *io, size_t amount)
{
assert(amount <= io->len);
io->start += amount;
io->len -= amount;
}
static void
io_buffer_consume(struct io_buffer *io, size_t amount)
{
io_buffer_advance(io, amount);
if (io->len == 0) {
io->start = 0;
return;
}
/*
* XXX: Consider making this move optional and compacting on a
* future read() before realloc().
*/
memmove(io->data, io_buffer_head(io), io->len);
io->start = 0;
}
static void
io_buffer_grow(struct io_buffer *io, size_t newsize)
{
uint8_t *new_data;
size_t avail, new_cap;
avail = io_buffer_avail(io);
if (newsize <= avail)
return;
new_cap = io->capacity + (newsize - avail);
new_data = realloc(io->data, new_cap);
if (new_data == NULL)
err(1, "Failed to grow GDB I/O buffer");
io->data = new_data;
io->capacity = new_cap;
}
static bool
response_pending(void)
{
if (cur_resp.start == 0 && cur_resp.len == 0)
return (false);
if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
return (false);
return (true);
}
static void
close_connection(void)
{
/*
* XXX: This triggers a warning because mevent does the close
* before the EV_DELETE.
*/
pthread_mutex_lock(&gdb_lock);
mevent_delete(write_event);
mevent_delete_close(read_event);
write_event = NULL;
read_event = NULL;
io_buffer_reset(&cur_comm);
io_buffer_reset(&cur_resp);
cur_fd = -1;
remove_all_sw_breakpoints();
/* Clear any pending events. */
memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
/* Resume any stopped vCPUs. */
gdb_resume_vcpus();
pthread_mutex_unlock(&gdb_lock);
}
static uint8_t
hex_digit(uint8_t nibble)
{
if (nibble <= 9)
return (nibble + '0');
else
return (nibble + 'a' - 10);
}
static uint8_t
parse_digit(uint8_t v)
{
if (v >= '0' && v <= '9')
return (v - '0');
if (v >= 'a' && v <= 'f')
return (v - 'a' + 10);
if (v >= 'A' && v <= 'F')
return (v - 'A' + 10);
return (0xF);
}
/* Parses big-endian hexadecimal. */
static uintmax_t
parse_integer(const uint8_t *p, size_t len)
{
uintmax_t v;
v = 0;
while (len > 0) {
v <<= 4;
v |= parse_digit(*p);
p++;
len--;
}
return (v);
}
static uint8_t
parse_byte(const uint8_t *p)
{
return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
}
static void
send_pending_data(int fd)
{
ssize_t nwritten;
if (cur_resp.len == 0) {
mevent_disable(write_event);
return;
}
nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
if (nwritten == -1) {
warn("Write to GDB socket failed");
close_connection();
} else {
io_buffer_advance(&cur_resp, nwritten);
if (cur_resp.len == 0)
mevent_disable(write_event);
else
mevent_enable(write_event);
}
}
/* Append a single character to the output buffer. */
static void
send_char(uint8_t data)
{
io_buffer_grow(&cur_resp, 1);
*io_buffer_tail(&cur_resp) = data;
cur_resp.len++;
}
/* Append an array of bytes to the output buffer. */
static void
send_data(const uint8_t *data, size_t len)
{
io_buffer_grow(&cur_resp, len);
memcpy(io_buffer_tail(&cur_resp), data, len);
cur_resp.len += len;
}
static void
format_byte(uint8_t v, uint8_t *buf)
{
buf[0] = hex_digit(v >> 4);
buf[1] = hex_digit(v & 0xf);
}
/*
* Append a single byte (formatted as two hex characters) to the
* output buffer.
*/
static void
send_byte(uint8_t v)
{
uint8_t buf[2];
format_byte(v, buf);
send_data(buf, sizeof(buf));
}
static void
start_packet(void)
{
send_char('$');
cur_csum = 0;
}
static void
finish_packet(void)
{
send_char('#');
send_byte(cur_csum);
debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
}
/*
* Append a single character (for the packet payload) and update the
* checksum.
*/
static void
append_char(uint8_t v)
{
send_char(v);
cur_csum += v;
}
/*
* Append an array of bytes (for the packet payload) and update the
* checksum.
*/
static void
append_packet_data(const uint8_t *data, size_t len)
{
send_data(data, len);
while (len > 0) {
cur_csum += *data;
data++;
len--;
}
}
static void
append_string(const char *str)
{
append_packet_data(str, strlen(str));
}
static void
append_byte(uint8_t v)
{
uint8_t buf[2];
format_byte(v, buf);
append_packet_data(buf, sizeof(buf));
}
static void
append_unsigned_native(uintmax_t value, size_t len)
{
size_t i;
for (i = 0; i < len; i++) {
append_byte(value);
value >>= 8;
}
}
static void
append_unsigned_be(uintmax_t value, size_t len)
{
char buf[len * 2];
size_t i;
for (i = 0; i < len; i++) {
format_byte(value, buf + (len - i - 1) * 2);
value >>= 8;
}
append_packet_data(buf, sizeof(buf));
}
static void
append_integer(unsigned int value)
{
if (value == 0)
append_char('0');
else
append_unsigned_be(value, (fls(value) + 7) / 8);
}
static void
append_asciihex(const char *str)
{
while (*str != '\0') {
append_byte(*str);
str++;
}
}
static void
send_empty_response(void)
{
start_packet();
finish_packet();
}
static void
send_error(int error)
{
start_packet();
append_char('E');
append_byte(error);
finish_packet();
}
static void
send_ok(void)
{
start_packet();
append_string("OK");
finish_packet();
}
static int
parse_threadid(const uint8_t *data, size_t len)
{
if (len == 1 && *data == '0')
return (0);
if (len == 2 && memcmp(data, "-1", 2) == 0)
return (-1);
if (len == 0)
return (-2);
return (parse_integer(data, len));
}
/*
* Report the current stop event to the debugger. If the stop is due
* to an event triggered on a specific vCPU such as a breakpoint or
* stepping trap, stopped_vcpu will be set to the vCPU triggering the
* stop. If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
* the reporting vCPU for vCPU events.
*/
static void
report_stop(bool set_cur_vcpu)
{
struct vcpu_state *vs;
start_packet();
if (stopped_vcpu == -1) {
append_char('S');
append_byte(GDB_SIGNAL_TRAP);
} else {
vs = &vcpu_state[stopped_vcpu];
if (set_cur_vcpu)
cur_vcpu = stopped_vcpu;
append_char('T');
append_byte(GDB_SIGNAL_TRAP);
append_string("thread:");
append_integer(stopped_vcpu + 1);
append_char(';');
if (vs->hit_swbreak) {
debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
if (swbreak_enabled)
append_string("swbreak:;");
} else if (vs->stepped)
debug("$vCPU %d reporting step\n", stopped_vcpu);
else
debug("$vCPU %d reporting ???\n", stopped_vcpu);
}
finish_packet();
report_next_stop = false;
}
/*
* If this stop is due to a vCPU event, clear that event to mark it as
* acknowledged.
*/
static void
discard_stop(void)
{
struct vcpu_state *vs;
if (stopped_vcpu != -1) {
vs = &vcpu_state[stopped_vcpu];
vs->hit_swbreak = false;
vs->stepped = false;
stopped_vcpu = -1;
}
report_next_stop = true;
}
static void
gdb_finish_suspend_vcpus(void)
{
if (first_stop) {
first_stop = false;
stopped_vcpu = -1;
} else if (report_next_stop) {
assert(!response_pending());
report_stop(true);
send_pending_data(cur_fd);
}
}
/*
* vCPU threads invoke this function whenever the vCPU enters the
* debug server to pause or report an event. vCPU threads wait here
* as long as the debug server keeps them suspended.
*/
static void
_gdb_cpu_suspend(int vcpu, bool report_stop)
{
debug("$vCPU %d suspending\n", vcpu);
CPU_SET(vcpu, &vcpus_waiting);
if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
gdb_finish_suspend_vcpus();
while (CPU_ISSET(vcpu, &vcpus_suspended))
pthread_cond_wait(&idle_vcpus, &gdb_lock);
CPU_CLR(vcpu, &vcpus_waiting);
debug("$vCPU %d resuming\n", vcpu);
}
/*
* Invoked at the start of a vCPU thread's execution to inform the
* debug server about the new thread.
*/
void
gdb_cpu_add(int vcpu)
{
if (!gdb_active)
return;
debug("$vCPU %d starting\n", vcpu);
pthread_mutex_lock(&gdb_lock);
assert(vcpu < guest_ncpus);
CPU_SET(vcpu, &vcpus_active);
if (!TAILQ_EMPTY(&breakpoints)) {
vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
debug("$vCPU %d enabled breakpoint exits\n", vcpu);
}
/*
* If a vcpu is added while vcpus are stopped, suspend the new
* vcpu so that it will pop back out with a debug exit before
* executing the first instruction.
*/
if (!CPU_EMPTY(&vcpus_suspended)) {
CPU_SET(vcpu, &vcpus_suspended);
_gdb_cpu_suspend(vcpu, false);
}
pthread_mutex_unlock(&gdb_lock);
}
/*
* Invoked by vCPU before resuming execution. This enables stepping
* if the vCPU is marked as stepping.
*/
static void
gdb_cpu_resume(int vcpu)
{
struct vcpu_state *vs;
int error;
vs = &vcpu_state[vcpu];
/*
* Any pending event should already be reported before
* resuming.
*/
assert(vs->hit_swbreak == false);
assert(vs->stepped == false);
if (vs->stepping) {
error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
assert(error == 0);
}
}
/*
* Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
* has been suspended due to an event on different vCPU or in response
* to a guest-wide suspend such as Ctrl-C or the stop on attach.
*/
void
gdb_cpu_suspend(int vcpu)
{
if (!gdb_active)
return;
pthread_mutex_lock(&gdb_lock);
_gdb_cpu_suspend(vcpu, true);
gdb_cpu_resume(vcpu);
pthread_mutex_unlock(&gdb_lock);
}
static void
gdb_suspend_vcpus(void)
{
assert(pthread_mutex_isowned_np(&gdb_lock));
debug("suspending all CPUs\n");
vcpus_suspended = vcpus_active;
vm_suspend_cpu(ctx, -1);
if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
gdb_finish_suspend_vcpus();
}
/*
* Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
* the VT-x-specific MTRAP exit.
*/
void
gdb_cpu_mtrap(int vcpu)
{
struct vcpu_state *vs;
if (!gdb_active)
return;
debug("$vCPU %d MTRAP\n", vcpu);
pthread_mutex_lock(&gdb_lock);
vs = &vcpu_state[vcpu];
if (vs->stepping) {
vs->stepping = false;
vs->stepped = true;
vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
while (vs->stepped) {
if (stopped_vcpu == -1) {
debug("$vCPU %d reporting step\n", vcpu);
stopped_vcpu = vcpu;
gdb_suspend_vcpus();
}
_gdb_cpu_suspend(vcpu, true);
}
gdb_cpu_resume(vcpu);
}
pthread_mutex_unlock(&gdb_lock);
}
static struct breakpoint *
find_breakpoint(uint64_t gpa)
{
struct breakpoint *bp;
TAILQ_FOREACH(bp, &breakpoints, link) {
if (bp->gpa == gpa)
return (bp);
}
return (NULL);
}
void
gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
{
struct breakpoint *bp;
struct vcpu_state *vs;
uint64_t gpa;
int error;
if (!gdb_active) {
fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
exit(4);
}
pthread_mutex_lock(&gdb_lock);
error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
assert(error == 1);
bp = find_breakpoint(gpa);
if (bp != NULL) {
vs = &vcpu_state[vcpu];
assert(vs->stepping == false);
assert(vs->stepped == false);
assert(vs->hit_swbreak == false);
vs->hit_swbreak = true;
vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
for (;;) {
if (stopped_vcpu == -1) {
debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
vmexit->rip);
stopped_vcpu = vcpu;
gdb_suspend_vcpus();
}
_gdb_cpu_suspend(vcpu, true);
if (!vs->hit_swbreak) {
/* Breakpoint reported. */
break;
}
bp = find_breakpoint(gpa);
if (bp == NULL) {
/* Breakpoint was removed. */
vs->hit_swbreak = false;
break;
}
}
gdb_cpu_resume(vcpu);
} else {
debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
vmexit->rip);
error = vm_set_register(ctx, vcpu,
VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
assert(error == 0);
error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
assert(error == 0);
}
pthread_mutex_unlock(&gdb_lock);
}
static bool
gdb_step_vcpu(int vcpu)
{
int error, val;
debug("$vCPU %d step\n", vcpu);
error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
if (error < 0)
return (false);
discard_stop();
vcpu_state[vcpu].stepping = true;
vm_resume_cpu(ctx, vcpu);
CPU_CLR(vcpu, &vcpus_suspended);
pthread_cond_broadcast(&idle_vcpus);
return (true);
}
static void
gdb_resume_vcpus(void)
{
assert(pthread_mutex_isowned_np(&gdb_lock));
vm_resume_cpu(ctx, -1);
debug("resuming all CPUs\n");
CPU_ZERO(&vcpus_suspended);
pthread_cond_broadcast(&idle_vcpus);
}
static void
gdb_read_regs(void)
{
uint64_t regvals[nitems(gdb_regset)];
int i;
if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
gdb_regset, regvals) == -1) {
send_error(errno);
return;
}
start_packet();
for (i = 0; i < nitems(regvals); i++)
append_unsigned_native(regvals[i], gdb_regsize[i]);
finish_packet();
}
static void
gdb_read_mem(const uint8_t *data, size_t len)
{
uint64_t gpa, gva, val;
uint8_t *cp;
size_t resid, todo, bytes;
bool started;
int error;
/* Skip 'm' */
data += 1;
len -= 1;
/* Parse and consume address. */
cp = memchr(data, ',', len);
if (cp == NULL || cp == data) {
send_error(EINVAL);
return;
}
gva = parse_integer(data, cp - data);
len -= (cp - data) + 1;
data += (cp - data) + 1;
/* Parse length. */
resid = parse_integer(data, len);
started = false;
while (resid > 0) {
error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
if (error == -1) {
if (started)
finish_packet();
else
send_error(errno);
return;
}
if (error == 0) {
if (started)
finish_packet();
else
send_error(EFAULT);
return;
}
/* Read bytes from current page. */
todo = getpagesize() - gpa % getpagesize();
if (todo > resid)
todo = resid;
cp = paddr_guest2host(ctx, gpa, todo);
if (cp != NULL) {
/*
* If this page is guest RAM, read it a byte
* at a time.
*/
if (!started) {
start_packet();
started = true;
}
while (todo > 0) {
append_byte(*cp);
cp++;
gpa++;
gva++;
resid--;
todo--;
}
} else {
/*
* If this page isn't guest RAM, try to handle
* it via MMIO. For MMIO requests, use
* aligned reads of words when possible.
*/
while (todo > 0) {
if (gpa & 1 || todo == 1)
bytes = 1;
else if (gpa & 2 || todo == 2)
bytes = 2;
else
bytes = 4;
error = read_mem(ctx, cur_vcpu, gpa, &val,
bytes);
if (error == 0) {
if (!started) {
start_packet();
started = true;
}
gpa += bytes;
gva += bytes;
resid -= bytes;
todo -= bytes;
while (bytes > 0) {
append_byte(val);
val >>= 8;
bytes--;
}
} else {
if (started)
finish_packet();
else
send_error(EFAULT);
return;
}
}
}
assert(resid == 0 || gpa % getpagesize() == 0);
}
if (!started)
start_packet();
finish_packet();
}
static void
gdb_write_mem(const uint8_t *data, size_t len)
{
uint64_t gpa, gva, val;
uint8_t *cp;
size_t resid, todo, bytes;
int error;
/* Skip 'M' */
data += 1;
len -= 1;
/* Parse and consume address. */
cp = memchr(data, ',', len);
if (cp == NULL || cp == data) {
send_error(EINVAL);
return;
}
gva = parse_integer(data, cp - data);
len -= (cp - data) + 1;
data += (cp - data) + 1;
/* Parse and consume length. */
cp = memchr(data, ':', len);
if (cp == NULL || cp == data) {
send_error(EINVAL);
return;
}
resid = parse_integer(data, cp - data);
len -= (cp - data) + 1;
data += (cp - data) + 1;
/* Verify the available bytes match the length. */
if (len != resid * 2) {
send_error(EINVAL);
return;
}
while (resid > 0) {
error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
if (error == -1) {
send_error(errno);
return;
}
if (error == 0) {
send_error(EFAULT);
return;
}
/* Write bytes to current page. */
todo = getpagesize() - gpa % getpagesize();
if (todo > resid)
todo = resid;
cp = paddr_guest2host(ctx, gpa, todo);
if (cp != NULL) {
/*
* If this page is guest RAM, write it a byte
* at a time.
*/
while (todo > 0) {
assert(len >= 2);
*cp = parse_byte(data);
data += 2;
len -= 2;
cp++;
gpa++;
gva++;
resid--;
todo--;
}
} else {
/*
* If this page isn't guest RAM, try to handle
* it via MMIO. For MMIO requests, use
* aligned writes of words when possible.
*/
while (todo > 0) {
if (gpa & 1 || todo == 1) {
bytes = 1;
val = parse_byte(data);
} else if (gpa & 2 || todo == 2) {
bytes = 2;
val = be16toh(parse_integer(data, 4));
} else {
bytes = 4;
val = be32toh(parse_integer(data, 8));
}
error = write_mem(ctx, cur_vcpu, gpa, val,
bytes);
if (error == 0) {
gpa += bytes;
gva += bytes;
resid -= bytes;
todo -= bytes;
data += 2 * bytes;
len -= 2 * bytes;
} else {
send_error(EFAULT);
return;
}
}
}
assert(resid == 0 || gpa % getpagesize() == 0);
}
assert(len == 0);
send_ok();
}
static bool
set_breakpoint_caps(bool enable)
{
cpuset_t mask;
int vcpu;
mask = vcpus_active;
while (!CPU_EMPTY(&mask)) {
vcpu = CPU_FFS(&mask) - 1;
CPU_CLR(vcpu, &mask);
if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
enable ? 1 : 0) < 0)
return (false);
debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
enable ? "en" : "dis");
}
return (true);
}
static void
remove_all_sw_breakpoints(void)
{
struct breakpoint *bp, *nbp;
uint8_t *cp;
if (TAILQ_EMPTY(&breakpoints))
return;
TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
debug("remove breakpoint at %#lx\n", bp->gpa);
cp = paddr_guest2host(ctx, bp->gpa, 1);
*cp = bp->shadow_inst;
TAILQ_REMOVE(&breakpoints, bp, link);
free(bp);
}
TAILQ_INIT(&breakpoints);
set_breakpoint_caps(false);
}
static void
update_sw_breakpoint(uint64_t gva, int kind, bool insert)
{
struct breakpoint *bp;
uint64_t gpa;
uint8_t *cp;
int error;
if (kind != 1) {
send_error(EINVAL);
return;
}
error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
if (error == -1) {
send_error(errno);
return;
}
if (error == 0) {
send_error(EFAULT);
return;
}
cp = paddr_guest2host(ctx, gpa, 1);
/* Only permit breakpoints in guest RAM. */
if (cp == NULL) {
send_error(EFAULT);
return;
}
/* Find any existing breakpoint. */
bp = find_breakpoint(gpa);
/*
* Silently ignore duplicate commands since the protocol
* requires these packets to be idempotent.
*/
if (insert) {
if (bp == NULL) {
if (TAILQ_EMPTY(&breakpoints) &&
!set_breakpoint_caps(true)) {
send_empty_response();
return;
}
bp = malloc(sizeof(*bp));
bp->gpa = gpa;
bp->shadow_inst = *cp;
*cp = 0xcc; /* INT 3 */
TAILQ_INSERT_TAIL(&breakpoints, bp, link);
debug("new breakpoint at %#lx\n", gpa);
}
} else {
if (bp != NULL) {
debug("remove breakpoint at %#lx\n", gpa);
*cp = bp->shadow_inst;
TAILQ_REMOVE(&breakpoints, bp, link);
free(bp);
if (TAILQ_EMPTY(&breakpoints))
set_breakpoint_caps(false);
}
}
send_ok();
}
static void
parse_breakpoint(const uint8_t *data, size_t len)
{
uint64_t gva;
uint8_t *cp;
bool insert;
int kind, type;
insert = data[0] == 'Z';
/* Skip 'Z/z' */
data += 1;
len -= 1;
/* Parse and consume type. */
cp = memchr(data, ',', len);
if (cp == NULL || cp == data) {
send_error(EINVAL);
return;
}
type = parse_integer(data, cp - data);
len -= (cp - data) + 1;
data += (cp - data) + 1;
/* Parse and consume address. */
cp = memchr(data, ',', len);
if (cp == NULL || cp == data) {
send_error(EINVAL);
return;
}
gva = parse_integer(data, cp - data);
len -= (cp - data) + 1;
data += (cp - data) + 1;
/* Parse and consume kind. */
cp = memchr(data, ';', len);
if (cp == data) {
send_error(EINVAL);
return;
}
if (cp != NULL) {
/*
* We do not advertise support for either the
* ConditionalBreakpoints or BreakpointCommands
* features, so we should not be getting conditions or
* commands from the remote end.
*/
send_empty_response();
return;
}
kind = parse_integer(data, len);
data += len;
len = 0;
switch (type) {
case 0:
update_sw_breakpoint(gva, kind, insert);
break;
default:
send_empty_response();
break;
}
}
static bool
command_equals(const uint8_t *data, size_t len, const char *cmd)
{
if (strlen(cmd) > len)
return (false);
return (memcmp(data, cmd, strlen(cmd)) == 0);
}
static void
check_features(const uint8_t *data, size_t len)
{
char *feature, *next_feature, *str, *value;
bool supported;
str = malloc(len + 1);
memcpy(str, data, len);
str[len] = '\0';
next_feature = str;
while ((feature = strsep(&next_feature, ";")) != NULL) {
/*
* Null features shouldn't exist, but skip if they
* do.
*/
if (strcmp(feature, "") == 0)
continue;
/*
* Look for the value or supported / not supported
* flag.
*/
value = strchr(feature, '=');
if (value != NULL) {
*value = '\0';
value++;
supported = true;
} else {
value = feature + strlen(feature) - 1;
switch (*value) {
case '+':
supported = true;
break;
case '-':
supported = false;
break;
default:
/*
* This is really a protocol error,
* but we just ignore malformed
* features for ease of
* implementation.
*/
continue;
}
value = NULL;
}
if (strcmp(feature, "swbreak") == 0)
swbreak_enabled = supported;
}
free(str);
start_packet();
/* This is an arbitrary limit. */
append_string("PacketSize=4096");
append_string(";swbreak+");
finish_packet();
}
static void
gdb_query(const uint8_t *data, size_t len)
{
/*
* TODO:
* - qSearch
*/
if (command_equals(data, len, "qAttached")) {
start_packet();
append_char('1');
finish_packet();
} else if (command_equals(data, len, "qC")) {
start_packet();
append_string("QC");
append_integer(cur_vcpu + 1);
finish_packet();
} else if (command_equals(data, len, "qfThreadInfo")) {
cpuset_t mask;
bool first;
int vcpu;
if (CPU_EMPTY(&vcpus_active)) {
send_error(EINVAL);
return;
}
mask = vcpus_active;
start_packet();
append_char('m');
first = true;
while (!CPU_EMPTY(&mask)) {
vcpu = CPU_FFS(&mask) - 1;
CPU_CLR(vcpu, &mask);
if (first)
first = false;
else
append_char(',');
append_integer(vcpu + 1);
}
finish_packet();
} else if (command_equals(data, len, "qsThreadInfo")) {
start_packet();
append_char('l');
finish_packet();
} else if (command_equals(data, len, "qSupported")) {
data += strlen("qSupported");
len -= strlen("qSupported");
check_features(data, len);
} else if (command_equals(data, len, "qThreadExtraInfo")) {
char buf[16];
int tid;
data += strlen("qThreadExtraInfo");
len -= strlen("qThreadExtraInfo");
if (*data != ',') {
send_error(EINVAL);
return;
}
tid = parse_threadid(data + 1, len - 1);
if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
send_error(EINVAL);
return;
}
snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
start_packet();
append_asciihex(buf);
finish_packet();
} else
send_empty_response();
}
static void
handle_command(const uint8_t *data, size_t len)
{
/* Reject packets with a sequence-id. */
if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
send_empty_response();
return;
}
switch (*data) {
case 'c':
if (len != 1) {
send_error(EINVAL);
break;
}
discard_stop();
gdb_resume_vcpus();
break;
case 'D':
send_ok();
/* TODO: Resume any stopped CPUs. */
break;
case 'g': {
gdb_read_regs();
break;
}
case 'H': {
int tid;
if (data[1] != 'g' && data[1] != 'c') {
send_error(EINVAL);
break;
}
tid = parse_threadid(data + 2, len - 2);
if (tid == -2) {
send_error(EINVAL);
break;
}
if (CPU_EMPTY(&vcpus_active)) {
send_error(EINVAL);
break;
}
if (tid == -1 || tid == 0)
cur_vcpu = CPU_FFS(&vcpus_active) - 1;
else if (CPU_ISSET(tid - 1, &vcpus_active))
cur_vcpu = tid - 1;
else {
send_error(EINVAL);
break;
}
send_ok();
break;
}
case 'm':
gdb_read_mem(data, len);
break;
case 'M':
gdb_write_mem(data, len);
break;
case 'T': {
int tid;
tid = parse_threadid(data + 1, len - 1);
if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
send_error(EINVAL);
return;
}
send_ok();
break;
}
case 'q':
gdb_query(data, len);
break;
case 's':
if (len != 1) {
send_error(EINVAL);
break;
}
/* Don't send a reply until a stop occurs. */
if (!gdb_step_vcpu(cur_vcpu)) {
send_error(EOPNOTSUPP);
break;
}
break;
case 'z':
case 'Z':
parse_breakpoint(data, len);
break;
case '?':
report_stop(false);
break;
case 'G': /* TODO */
case 'v':
/* Handle 'vCont' */
/* 'vCtrlC' */
case 'p': /* TODO */
case 'P': /* TODO */
case 'Q': /* TODO */
case 't': /* TODO */
case 'X': /* TODO */
default:
send_empty_response();
}
}
/* Check for a valid packet in the command buffer. */
static void
check_command(int fd)
{
uint8_t *head, *hash, *p, sum;
size_t avail, plen;
for (;;) {
avail = cur_comm.len;
if (avail == 0)
return;
head = io_buffer_head(&cur_comm);
switch (*head) {
case 0x03:
debug("<- Ctrl-C\n");
io_buffer_consume(&cur_comm, 1);
gdb_suspend_vcpus();
break;
case '+':
/* ACK of previous response. */
debug("<- +\n");
if (response_pending())
io_buffer_reset(&cur_resp);
io_buffer_consume(&cur_comm, 1);
if (stopped_vcpu != -1 && report_next_stop) {
report_stop(true);
send_pending_data(fd);
}
break;
case '-':
/* NACK of previous response. */
debug("<- -\n");
if (response_pending()) {
cur_resp.len += cur_resp.start;
cur_resp.start = 0;
if (cur_resp.data[0] == '+')
io_buffer_advance(&cur_resp, 1);
debug("-> %.*s\n", (int)cur_resp.len,
io_buffer_head(&cur_resp));
}
io_buffer_consume(&cur_comm, 1);
send_pending_data(fd);
break;
case '$':
/* Packet. */
if (response_pending()) {
warnx("New GDB command while response in "
"progress");
io_buffer_reset(&cur_resp);
}
/* Is packet complete? */
hash = memchr(head, '#', avail);
if (hash == NULL)
return;
plen = (hash - head + 1) + 2;
if (avail < plen)
return;
debug("<- %.*s\n", (int)plen, head);
/* Verify checksum. */
for (sum = 0, p = head + 1; p < hash; p++)
sum += *p;
if (sum != parse_byte(hash + 1)) {
io_buffer_consume(&cur_comm, plen);
debug("-> -\n");
send_char('-');
send_pending_data(fd);
break;
}
send_char('+');
handle_command(head + 1, hash - (head + 1));
io_buffer_consume(&cur_comm, plen);
if (!response_pending())
debug("-> +\n");
send_pending_data(fd);
break;
default:
/* XXX: Possibly drop connection instead. */
debug("-> %02x\n", *head);
io_buffer_consume(&cur_comm, 1);
break;
}
}
}
static void
gdb_readable(int fd, enum ev_type event, void *arg)
{
ssize_t nread;
int pending;
if (ioctl(fd, FIONREAD, &pending) == -1) {
warn("FIONREAD on GDB socket");
return;
}
/*
* 'pending' might be zero due to EOF. We need to call read
* with a non-zero length to detect EOF.
*/
if (pending == 0)
pending = 1;
/* Ensure there is room in the command buffer. */
io_buffer_grow(&cur_comm, pending);
assert(io_buffer_avail(&cur_comm) >= pending);
nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
if (nread == 0) {
close_connection();
} else if (nread == -1) {
if (errno == EAGAIN)
return;
warn("Read from GDB socket");
close_connection();
} else {
cur_comm.len += nread;
pthread_mutex_lock(&gdb_lock);
check_command(fd);
pthread_mutex_unlock(&gdb_lock);
}
}
static void
gdb_writable(int fd, enum ev_type event, void *arg)
{
send_pending_data(fd);
}
static void
new_connection(int fd, enum ev_type event, void *arg)
{
int optval, s;
s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
if (s == -1) {
if (arg != NULL)
err(1, "Failed accepting initial GDB connection");
/* Silently ignore errors post-startup. */
return;
}
optval = 1;
if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
-1) {
warn("Failed to disable SIGPIPE for GDB connection");
close(s);
return;
}
pthread_mutex_lock(&gdb_lock);
if (cur_fd != -1) {
close(s);
warnx("Ignoring additional GDB connection.");
}
read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
if (read_event == NULL) {
if (arg != NULL)
err(1, "Failed to setup initial GDB connection");
pthread_mutex_unlock(&gdb_lock);
return;
}
write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
if (write_event == NULL) {
if (arg != NULL)
err(1, "Failed to setup initial GDB connection");
mevent_delete_close(read_event);
read_event = NULL;
}
cur_fd = s;
cur_vcpu = 0;
stopped_vcpu = -1;
/* Break on attach. */
first_stop = true;
report_next_stop = false;
gdb_suspend_vcpus();
pthread_mutex_unlock(&gdb_lock);
}
#ifndef WITHOUT_CAPSICUM
void
limit_gdb_socket(int s)
{
cap_rights_t rights;
unsigned long ioctls[] = { FIONREAD };
cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
CAP_SETSOCKOPT, CAP_IOCTL);
if (caph_rights_limit(s, &rights) == -1)
errx(EX_OSERR, "Unable to apply rights for sandbox");
if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
errx(EX_OSERR, "Unable to apply rights for sandbox");
}
#endif
void
init_gdb(struct vmctx *_ctx)
{
int error, flags, optval, s;
struct addrinfo hints;
struct addrinfo *gdbaddr;
const char *saddr, *value;
char *sport;
bool wait;
value = get_config_value("gdb.port");
if (value == NULL)
return;
sport = strdup(value);
if (sport == NULL)
errx(4, "Failed to allocate memory");
wait = get_config_bool_default("gdb.wait", false);
saddr = get_config_value("gdb.address");
if (saddr == NULL) {
saddr = "localhost";
}
debug("==> starting on %s:%s, %swaiting\n",
saddr, sport, wait ? "" : "not ");
error = pthread_mutex_init(&gdb_lock, NULL);
if (error != 0)
errc(1, error, "gdb mutex init");
error = pthread_cond_init(&idle_vcpus, NULL);
if (error != 0)
errc(1, error, "gdb cv init");
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
error = getaddrinfo(saddr, sport, &hints, &gdbaddr);
if (error != 0)
errx(1, "gdb address resolution: %s", gai_strerror(error));
ctx = _ctx;
s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
if (s < 0)
err(1, "gdb socket create");
optval = 1;
(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
err(1, "gdb socket bind");
if (listen(s, 1) < 0)
err(1, "gdb socket listen");
stopped_vcpu = -1;
TAILQ_INIT(&breakpoints);
vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
if (wait) {
/*
* Set vcpu 0 in vcpus_suspended. This will trigger the
* logic in gdb_cpu_add() to suspend the first vcpu before
* it starts execution. The vcpu will remain suspended
* until a debugger connects.
*/
CPU_SET(0, &vcpus_suspended);
stopped_vcpu = 0;
}
flags = fcntl(s, F_GETFL);
if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
err(1, "Failed to mark gdb socket non-blocking");
#ifndef WITHOUT_CAPSICUM
limit_gdb_socket(s);
#endif
mevent_add(s, EVF_READ, new_connection, NULL);
gdb_active = true;
freeaddrinfo(gdbaddr);
free(sport);
}