From a5063650ed2e7b72532d19f3b2557d59042f3c5e Mon Sep 17 00:00:00 2001 From: Rui Paulo Date: Tue, 6 Jul 2010 10:22:17 +0000 Subject: [PATCH] Import fasttrap_isa.c from OpenSolaris. --- uts/intel/dtrace/fasttrap_isa.c | 1745 +++++++++++++++++++++++++++++++ uts/sparc/dtrace/fasttrap_isa.c | 1597 ++++++++++++++++++++++++++++ 2 files changed, 3342 insertions(+) create mode 100644 uts/intel/dtrace/fasttrap_isa.c create mode 100644 uts/sparc/dtrace/fasttrap_isa.c diff --git a/uts/intel/dtrace/fasttrap_isa.c b/uts/intel/dtrace/fasttrap_isa.c new file mode 100644 index 000000000000..1b93869a7358 --- /dev/null +++ b/uts/intel/dtrace/fasttrap_isa.c @@ -0,0 +1,1745 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Lossless User-Land Tracing on x86 + * --------------------------------- + * + * The execution of most instructions is not dependent on the address; for + * these instructions it is sufficient to copy them into the user process's + * address space and execute them. To effectively single-step an instruction + * in user-land, we copy out the following sequence of instructions to scratch + * space in the user thread's ulwp_t structure. + * + * We then set the program counter (%eip or %rip) to point to this scratch + * space. Once execution resumes, the original instruction is executed and + * then control flow is redirected to what was originally the subsequent + * instruction. If the kernel attemps to deliver a signal while single- + * stepping, the signal is deferred and the program counter is moved into the + * second sequence of instructions. The second sequence ends in a trap into + * the kernel where the deferred signal is then properly handled and delivered. + * + * For instructions whose execute is position dependent, we perform simple + * emulation. These instructions are limited to control transfer + * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle + * of %rip-relative addressing that means that almost any instruction can be + * position dependent. For all the details on how we emulate generic + * instructions included %rip-relative instructions, see the code in + * fasttrap_pid_probe() below where we handle instructions of type + * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing). + */ + +#define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3) +#define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7) +#define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7) +#define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm)) + +#define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3) +#define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7) +#define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7) + +#define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1) +#define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1) +#define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1) +#define FASTTRAP_REX_B(rex) ((rex) & 1) +#define FASTTRAP_REX(w, r, x, b) \ + (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b)) + +/* + * Single-byte op-codes. + */ +#define FASTTRAP_PUSHL_EBP 0x55 + +#define FASTTRAP_JO 0x70 +#define FASTTRAP_JNO 0x71 +#define FASTTRAP_JB 0x72 +#define FASTTRAP_JAE 0x73 +#define FASTTRAP_JE 0x74 +#define FASTTRAP_JNE 0x75 +#define FASTTRAP_JBE 0x76 +#define FASTTRAP_JA 0x77 +#define FASTTRAP_JS 0x78 +#define FASTTRAP_JNS 0x79 +#define FASTTRAP_JP 0x7a +#define FASTTRAP_JNP 0x7b +#define FASTTRAP_JL 0x7c +#define FASTTRAP_JGE 0x7d +#define FASTTRAP_JLE 0x7e +#define FASTTRAP_JG 0x7f + +#define FASTTRAP_NOP 0x90 + +#define FASTTRAP_MOV_EAX 0xb8 +#define FASTTRAP_MOV_ECX 0xb9 + +#define FASTTRAP_RET16 0xc2 +#define FASTTRAP_RET 0xc3 + +#define FASTTRAP_LOOPNZ 0xe0 +#define FASTTRAP_LOOPZ 0xe1 +#define FASTTRAP_LOOP 0xe2 +#define FASTTRAP_JCXZ 0xe3 + +#define FASTTRAP_CALL 0xe8 +#define FASTTRAP_JMP32 0xe9 +#define FASTTRAP_JMP8 0xeb + +#define FASTTRAP_INT3 0xcc +#define FASTTRAP_INT 0xcd + +#define FASTTRAP_2_BYTE_OP 0x0f +#define FASTTRAP_GROUP5_OP 0xff + +/* + * Two-byte op-codes (second byte only). + */ +#define FASTTRAP_0F_JO 0x80 +#define FASTTRAP_0F_JNO 0x81 +#define FASTTRAP_0F_JB 0x82 +#define FASTTRAP_0F_JAE 0x83 +#define FASTTRAP_0F_JE 0x84 +#define FASTTRAP_0F_JNE 0x85 +#define FASTTRAP_0F_JBE 0x86 +#define FASTTRAP_0F_JA 0x87 +#define FASTTRAP_0F_JS 0x88 +#define FASTTRAP_0F_JNS 0x89 +#define FASTTRAP_0F_JP 0x8a +#define FASTTRAP_0F_JNP 0x8b +#define FASTTRAP_0F_JL 0x8c +#define FASTTRAP_0F_JGE 0x8d +#define FASTTRAP_0F_JLE 0x8e +#define FASTTRAP_0F_JG 0x8f + +#define FASTTRAP_EFLAGS_OF 0x800 +#define FASTTRAP_EFLAGS_DF 0x400 +#define FASTTRAP_EFLAGS_SF 0x080 +#define FASTTRAP_EFLAGS_ZF 0x040 +#define FASTTRAP_EFLAGS_AF 0x010 +#define FASTTRAP_EFLAGS_PF 0x004 +#define FASTTRAP_EFLAGS_CF 0x001 + +/* + * Instruction prefixes. + */ +#define FASTTRAP_PREFIX_OPERAND 0x66 +#define FASTTRAP_PREFIX_ADDRESS 0x67 +#define FASTTRAP_PREFIX_CS 0x2E +#define FASTTRAP_PREFIX_DS 0x3E +#define FASTTRAP_PREFIX_ES 0x26 +#define FASTTRAP_PREFIX_FS 0x64 +#define FASTTRAP_PREFIX_GS 0x65 +#define FASTTRAP_PREFIX_SS 0x36 +#define FASTTRAP_PREFIX_LOCK 0xF0 +#define FASTTRAP_PREFIX_REP 0xF3 +#define FASTTRAP_PREFIX_REPNE 0xF2 + +#define FASTTRAP_NOREG 0xff + +/* + * Map between instruction register encodings and the kernel constants which + * correspond to indicies into struct regs. + */ +#ifdef __amd64 +static const uint8_t regmap[16] = { + REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI, + REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, +}; +#else +static const uint8_t regmap[8] = { + EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI +}; +#endif + +static ulong_t fasttrap_getreg(struct regs *, uint_t); + +static uint64_t +fasttrap_anarg(struct regs *rp, int function_entry, int argno) +{ + uint64_t value; + int shift = function_entry ? 1 : 0; + +#ifdef __amd64 + if (curproc->p_model == DATAMODEL_LP64) { + uintptr_t *stack; + + /* + * In 64-bit mode, the first six arguments are stored in + * registers. + */ + if (argno < 6) + return ((&rp->r_rdi)[argno]); + + stack = (uintptr_t *)rp->r_sp; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fulword(&stack[argno - 6 + shift]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); + } else { +#endif + uint32_t *stack = (uint32_t *)rp->r_sp; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fuword32(&stack[argno + shift]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); +#ifdef __amd64 + } +#endif + + return (value); +} + +/*ARGSUSED*/ +int +fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, + fasttrap_probe_type_t type) +{ + uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10]; + size_t len = FASTTRAP_MAX_INSTR_SIZE; + size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET)); + uint_t start = 0; + int rmindex, size; + uint8_t seg, rex = 0; + + /* + * Read the instruction at the given address out of the process's + * address space. We don't have to worry about a debugger + * changing this instruction before we overwrite it with our trap + * instruction since P_PR_LOCK is set. Since instructions can span + * pages, we potentially read the instruction in two parts. If the + * second part fails, we just zero out that part of the instruction. + */ + if (uread(p, &instr[0], first, pc) != 0) + return (-1); + if (len > first && + uread(p, &instr[first], len - first, pc + first) != 0) { + bzero(&instr[first], len - first); + len = first; + } + + /* + * If the disassembly fails, then we have a malformed instruction. + */ + if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0) + return (-1); + + /* + * Make sure the disassembler isn't completely broken. + */ + ASSERT(-1 <= rmindex && rmindex < size); + + /* + * If the computed size is greater than the number of bytes read, + * then it was a malformed instruction possibly because it fell on a + * page boundary and the subsequent page was missing or because of + * some malicious user. + */ + if (size > len) + return (-1); + + tp->ftt_size = (uint8_t)size; + tp->ftt_segment = FASTTRAP_SEG_NONE; + + /* + * Find the start of the instruction's opcode by processing any + * legacy prefixes. + */ + for (;;) { + seg = 0; + switch (instr[start]) { + case FASTTRAP_PREFIX_SS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_GS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_FS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_ES: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_DS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_CS: + seg++; + /*FALLTHRU*/ + case FASTTRAP_PREFIX_OPERAND: + case FASTTRAP_PREFIX_ADDRESS: + case FASTTRAP_PREFIX_LOCK: + case FASTTRAP_PREFIX_REP: + case FASTTRAP_PREFIX_REPNE: + if (seg != 0) { + /* + * It's illegal for an instruction to specify + * two segment prefixes -- give up on this + * illegal instruction. + */ + if (tp->ftt_segment != FASTTRAP_SEG_NONE) + return (-1); + + tp->ftt_segment = seg; + } + start++; + continue; + } + break; + } + +#ifdef __amd64 + /* + * Identify the REX prefix on 64-bit processes. + */ + if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40) + rex = instr[start++]; +#endif + + /* + * Now that we're pretty sure that the instruction is okay, copy the + * valid part to the tracepoint. + */ + bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE); + + tp->ftt_type = FASTTRAP_T_COMMON; + if (instr[start] == FASTTRAP_2_BYTE_OP) { + switch (instr[start + 1]) { + case FASTTRAP_0F_JO: + case FASTTRAP_0F_JNO: + case FASTTRAP_0F_JB: + case FASTTRAP_0F_JAE: + case FASTTRAP_0F_JE: + case FASTTRAP_0F_JNE: + case FASTTRAP_0F_JBE: + case FASTTRAP_0F_JA: + case FASTTRAP_0F_JS: + case FASTTRAP_0F_JNS: + case FASTTRAP_0F_JP: + case FASTTRAP_0F_JNP: + case FASTTRAP_0F_JL: + case FASTTRAP_0F_JGE: + case FASTTRAP_0F_JLE: + case FASTTRAP_0F_JG: + tp->ftt_type = FASTTRAP_T_JCC; + tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 2]; + break; + } + } else if (instr[start] == FASTTRAP_GROUP5_OP) { + uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]); + uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]); + uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]); + + if (reg == 2 || reg == 4) { + uint_t i, sz; + + if (reg == 2) + tp->ftt_type = FASTTRAP_T_CALL; + else + tp->ftt_type = FASTTRAP_T_JMP; + + if (mod == 3) + tp->ftt_code = 2; + else + tp->ftt_code = 1; + + ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); + + /* + * See AMD x86-64 Architecture Programmer's Manual + * Volume 3, Section 1.2.7, Table 1-12, and + * Appendix A.3.1, Table A-15. + */ + if (mod != 3 && rm == 4) { + uint8_t sib = instr[start + 2]; + uint_t index = FASTTRAP_SIB_INDEX(sib); + uint_t base = FASTTRAP_SIB_BASE(sib); + + tp->ftt_scale = FASTTRAP_SIB_SCALE(sib); + + tp->ftt_index = (index == 4) ? + FASTTRAP_NOREG : + regmap[index | (FASTTRAP_REX_X(rex) << 3)]; + tp->ftt_base = (mod == 0 && base == 5) ? + FASTTRAP_NOREG : + regmap[base | (FASTTRAP_REX_B(rex) << 3)]; + + i = 3; + sz = mod == 1 ? 1 : 4; + } else { + /* + * In 64-bit mode, mod == 0 and r/m == 5 + * denotes %rip-relative addressing; in 32-bit + * mode, the base register isn't used. In both + * modes, there is a 32-bit operand. + */ + if (mod == 0 && rm == 5) { +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) + tp->ftt_base = REG_RIP; + else +#endif + tp->ftt_base = FASTTRAP_NOREG; + sz = 4; + } else { + uint8_t base = rm | + (FASTTRAP_REX_B(rex) << 3); + + tp->ftt_base = regmap[base]; + sz = mod == 1 ? 1 : mod == 2 ? 4 : 0; + } + tp->ftt_index = FASTTRAP_NOREG; + i = 2; + } + + if (sz == 1) { + tp->ftt_dest = *(int8_t *)&instr[start + i]; + } else if (sz == 4) { + /* LINTED - alignment */ + tp->ftt_dest = *(int32_t *)&instr[start + i]; + } else { + tp->ftt_dest = 0; + } + } + } else { + switch (instr[start]) { + case FASTTRAP_RET: + tp->ftt_type = FASTTRAP_T_RET; + break; + + case FASTTRAP_RET16: + tp->ftt_type = FASTTRAP_T_RET16; + /* LINTED - alignment */ + tp->ftt_dest = *(uint16_t *)&instr[start + 1]; + break; + + case FASTTRAP_JO: + case FASTTRAP_JNO: + case FASTTRAP_JB: + case FASTTRAP_JAE: + case FASTTRAP_JE: + case FASTTRAP_JNE: + case FASTTRAP_JBE: + case FASTTRAP_JA: + case FASTTRAP_JS: + case FASTTRAP_JNS: + case FASTTRAP_JP: + case FASTTRAP_JNP: + case FASTTRAP_JL: + case FASTTRAP_JGE: + case FASTTRAP_JLE: + case FASTTRAP_JG: + tp->ftt_type = FASTTRAP_T_JCC; + tp->ftt_code = instr[start]; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_LOOPNZ: + case FASTTRAP_LOOPZ: + case FASTTRAP_LOOP: + tp->ftt_type = FASTTRAP_T_LOOP; + tp->ftt_code = instr[start]; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_JCXZ: + tp->ftt_type = FASTTRAP_T_JCXZ; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_CALL: + tp->ftt_type = FASTTRAP_T_CALL; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 1]; + tp->ftt_code = 0; + break; + + case FASTTRAP_JMP32: + tp->ftt_type = FASTTRAP_T_JMP; + tp->ftt_dest = pc + tp->ftt_size + + /* LINTED - alignment */ + *(int32_t *)&instr[start + 1]; + break; + case FASTTRAP_JMP8: + tp->ftt_type = FASTTRAP_T_JMP; + tp->ftt_dest = pc + tp->ftt_size + + (int8_t)instr[start + 1]; + break; + + case FASTTRAP_PUSHL_EBP: + if (start == 0) + tp->ftt_type = FASTTRAP_T_PUSHL_EBP; + break; + + case FASTTRAP_NOP: +#ifdef __amd64 + ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0); + + /* + * On amd64 we have to be careful not to confuse a nop + * (actually xchgl %eax, %eax) with an instruction using + * the same opcode, but that does something different + * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax). + */ + if (FASTTRAP_REX_B(rex) == 0) +#endif + tp->ftt_type = FASTTRAP_T_NOP; + break; + + case FASTTRAP_INT3: + /* + * The pid provider shares the int3 trap with debugger + * breakpoints so we can't instrument them. + */ + ASSERT(instr[start] == FASTTRAP_INSTR); + return (-1); + + case FASTTRAP_INT: + /* + * Interrupts seem like they could be traced with + * no negative implications, but it's possible that + * a thread could be redirected by the trap handling + * code which would eventually return to the + * instruction after the interrupt. If the interrupt + * were in our scratch space, the subsequent + * instruction might be overwritten before we return. + * Accordingly we refuse to instrument any interrupt. + */ + return (-1); + } + } + +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) { + /* + * If the process is 64-bit and the instruction type is still + * FASTTRAP_T_COMMON -- meaning we're going to copy it out an + * execute it -- we need to watch for %rip-relative + * addressing mode. See the portion of fasttrap_pid_probe() + * below where we handle tracepoints with type + * FASTTRAP_T_COMMON for how we emulate instructions that + * employ %rip-relative addressing. + */ + if (rmindex != -1) { + uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]); + uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]); + uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]); + + ASSERT(rmindex > start); + + if (mod == 0 && rm == 5) { + /* + * We need to be sure to avoid other + * registers used by this instruction. While + * the reg field may determine the op code + * rather than denoting a register, assuming + * that it denotes a register is always safe. + * We leave the REX field intact and use + * whatever value's there for simplicity. + */ + if (reg != 0) { + tp->ftt_ripmode = FASTTRAP_RIP_1 | + (FASTTRAP_RIP_X * + FASTTRAP_REX_B(rex)); + rm = 0; + } else { + tp->ftt_ripmode = FASTTRAP_RIP_2 | + (FASTTRAP_RIP_X * + FASTTRAP_REX_B(rex)); + rm = 1; + } + + tp->ftt_modrm = tp->ftt_instr[rmindex]; + tp->ftt_instr[rmindex] = + FASTTRAP_MODRM(2, reg, rm); + } + } + } +#endif + + return (0); +} + +int +fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) +{ + fasttrap_instr_t instr = FASTTRAP_INSTR; + + if (uwrite(p, &instr, 1, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +int +fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) +{ + uint8_t instr; + + /* + * Distinguish between read or write failures and a changed + * instruction. + */ + if (uread(p, &instr, 1, tp->ftt_pc) != 0) + return (0); + if (instr != FASTTRAP_INSTR) + return (0); + if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +#ifdef __amd64 +static uintptr_t +fasttrap_fulword_noerr(const void *uaddr) +{ + uintptr_t ret; + + if (fasttrap_fulword(uaddr, &ret) == 0) + return (ret); + + return (0); +} +#endif + +static uint32_t +fasttrap_fuword32_noerr(const void *uaddr) +{ + uint32_t ret; + + if (fasttrap_fuword32(uaddr, &ret) == 0) + return (ret); + + return (0); +} + +static void +fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, + uintptr_t new_pc) +{ + fasttrap_tracepoint_t *tp; + fasttrap_bucket_t *bucket; + fasttrap_id_t *id; + kmutex_t *pid_mtx; + + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * Don't sweat it if we can't find the tracepoint again; unlike + * when we're in fasttrap_pid_probe(), finding the tracepoint here + * is not essential to the correct execution of the process. + */ + if (tp == NULL) { + mutex_exit(pid_mtx); + return; + } + + for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { + /* + * If there's a branch that could act as a return site, we + * need to trace it, and check here if the program counter is + * external to the function. + */ + if (tp->ftt_type != FASTTRAP_T_RET && + tp->ftt_type != FASTTRAP_T_RET16 && + new_pc - id->fti_probe->ftp_faddr < + id->fti_probe->ftp_fsize) + continue; + + dtrace_probe(id->fti_probe->ftp_id, + pc - id->fti_probe->ftp_faddr, + rp->r_r0, rp->r_r1, 0, 0); + } + + mutex_exit(pid_mtx); +} + +static void +fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr) +{ + sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP); + + sqp->sq_info.si_signo = SIGSEGV; + sqp->sq_info.si_code = SEGV_MAPERR; + sqp->sq_info.si_addr = (caddr_t)addr; + + mutex_enter(&p->p_lock); + sigaddqa(p, t, sqp); + mutex_exit(&p->p_lock); + + if (t != NULL) + aston(t); +} + +#ifdef __amd64 +static void +fasttrap_usdt_args64(fasttrap_probe_t *probe, struct regs *rp, int argc, + uintptr_t *argv) +{ + int i, x, cap = MIN(argc, probe->ftp_nargs); + uintptr_t *stack = (uintptr_t *)rp->r_sp; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + if (x < 6) + argv[i] = (&rp->r_rdi)[x]; + else + argv[i] = fasttrap_fulword_noerr(&stack[x]); + } + + for (; i < argc; i++) { + argv[i] = 0; + } +} +#endif + +static void +fasttrap_usdt_args32(fasttrap_probe_t *probe, struct regs *rp, int argc, + uint32_t *argv) +{ + int i, x, cap = MIN(argc, probe->ftp_nargs); + uint32_t *stack = (uint32_t *)rp->r_sp; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + argv[i] = fasttrap_fuword32_noerr(&stack[x]); + } + + for (; i < argc; i++) { + argv[i] = 0; + } +} + +static int +fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct regs *rp, uintptr_t *addr) +{ + proc_t *p = curproc; + user_desc_t *desc; + uint16_t sel, ndx, type; + uintptr_t limit; + + switch (tp->ftt_segment) { + case FASTTRAP_SEG_CS: + sel = rp->r_cs; + break; + case FASTTRAP_SEG_DS: + sel = rp->r_ds; + break; + case FASTTRAP_SEG_ES: + sel = rp->r_es; + break; + case FASTTRAP_SEG_FS: + sel = rp->r_fs; + break; + case FASTTRAP_SEG_GS: + sel = rp->r_gs; + break; + case FASTTRAP_SEG_SS: + sel = rp->r_ss; + break; + } + + /* + * Make sure the given segment register specifies a user priority + * selector rather than a kernel selector. + */ + if (!SELISUPL(sel)) + return (-1); + + ndx = SELTOIDX(sel); + + /* + * Check the bounds and grab the descriptor out of the specified + * descriptor table. + */ + if (SELISLDT(sel)) { + if (ndx > p->p_ldtlimit) + return (-1); + + desc = p->p_ldt + ndx; + + } else { + if (ndx >= NGDT) + return (-1); + + desc = cpu_get_gdt() + ndx; + } + + /* + * The descriptor must have user privilege level and it must be + * present in memory. + */ + if (desc->usd_dpl != SEL_UPL || desc->usd_p != 1) + return (-1); + + type = desc->usd_type; + + /* + * If the S bit in the type field is not set, this descriptor can + * only be used in system context. + */ + if ((type & 0x10) != 0x10) + return (-1); + + limit = USEGD_GETLIMIT(desc) * (desc->usd_gran ? PAGESIZE : 1); + + if (tp->ftt_segment == FASTTRAP_SEG_CS) { + /* + * The code/data bit and readable bit must both be set. + */ + if ((type & 0xa) != 0xa) + return (-1); + + if (*addr > limit) + return (-1); + } else { + /* + * The code/data bit must be clear. + */ + if ((type & 0x8) != 0) + return (-1); + + /* + * If the expand-down bit is clear, we just check the limit as + * it would naturally be applied. Otherwise, we need to check + * that the address is the range [limit + 1 .. 0xffff] or + * [limit + 1 ... 0xffffffff] depending on if the default + * operand size bit is set. + */ + if ((type & 0x4) == 0) { + if (*addr > limit) + return (-1); + } else if (desc->usd_def32) { + if (*addr < limit + 1 || 0xffff < *addr) + return (-1); + } else { + if (*addr < limit + 1 || 0xffffffff < *addr) + return (-1); + } + } + + *addr += USEGD_GETBASE(desc); + + return (0); +} + +int +fasttrap_pid_probe(struct regs *rp) +{ + proc_t *p = curproc; + uintptr_t pc = rp->r_pc - 1, new_pc = 0; + fasttrap_bucket_t *bucket; + kmutex_t *pid_mtx; + fasttrap_tracepoint_t *tp, tp_local; + pid_t pid; + dtrace_icookie_t cookie; + uint_t is_enabled = 0; + + /* + * It's possible that a user (in a veritable orgy of bad planning) + * could redirect this thread's flow of control before it reached the + * return probe fasttrap. In this case we need to kill the process + * since it's in a unrecoverable state. + */ + if (curthread->t_dtrace_step) { + ASSERT(curthread->t_dtrace_on); + fasttrap_sigtrap(p, curthread, pc); + return (0); + } + + /* + * Clear all user tracing flags. + */ + curthread->t_dtrace_ft = 0; + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; +#ifdef __amd64 + curthread->t_dtrace_regv = 0; +#endif + + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know that there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } + + pid = p->p_pid; + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + /* + * Lookup the tracepoint that the process just hit. + */ + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * If we couldn't find a matching tracepoint, either a tracepoint has + * been inserted without using the pid ioctl interface (see + * fasttrap_ioctl), or somehow we have mislaid this tracepoint. + */ + if (tp == NULL) { + mutex_exit(pid_mtx); + return (-1); + } + + /* + * Set the program counter to the address of the traced instruction + * so that it looks right in ustack() output. + */ + rp->r_pc = pc; + + if (tp->ftt_ids != NULL) { + fasttrap_id_t *id; + +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) { + for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + + if (id->fti_ptype == DTFTP_ENTRY) { + /* + * We note that this was an entry + * probe to help ustack() find the + * first caller. + */ + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); + dtrace_probe(probe->ftp_id, rp->r_rdi, + rp->r_rsi, rp->r_rdx, rp->r_rcx, + rp->r_r8); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); + dtrace_interrupt_enable(cookie); + } else if (id->fti_ptype == DTFTP_IS_ENABLED) { + /* + * Note that in this case, we don't + * call dtrace_probe() since it's only + * an artificial probe meant to change + * the flow of control so that it + * encounters the true probe. + */ + is_enabled = 1; + } else if (probe->ftp_argmap == NULL) { + dtrace_probe(probe->ftp_id, rp->r_rdi, + rp->r_rsi, rp->r_rdx, rp->r_rcx, + rp->r_r8); + } else { + uintptr_t t[5]; + + fasttrap_usdt_args64(probe, rp, + sizeof (t) / sizeof (t[0]), t); + + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + } + } + } else { +#endif + uintptr_t s0, s1, s2, s3, s4, s5; + uint32_t *stack = (uint32_t *)rp->r_sp; + + /* + * In 32-bit mode, all arguments are passed on the + * stack. If this is a function entry probe, we need + * to skip the first entry on the stack as it + * represents the return address rather than a + * parameter to the function. + */ + s0 = fasttrap_fuword32_noerr(&stack[0]); + s1 = fasttrap_fuword32_noerr(&stack[1]); + s2 = fasttrap_fuword32_noerr(&stack[2]); + s3 = fasttrap_fuword32_noerr(&stack[3]); + s4 = fasttrap_fuword32_noerr(&stack[4]); + s5 = fasttrap_fuword32_noerr(&stack[5]); + + for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + + if (id->fti_ptype == DTFTP_ENTRY) { + /* + * We note that this was an entry + * probe to help ustack() find the + * first caller. + */ + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); + dtrace_probe(probe->ftp_id, s1, s2, + s3, s4, s5); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); + dtrace_interrupt_enable(cookie); + } else if (id->fti_ptype == DTFTP_IS_ENABLED) { + /* + * Note that in this case, we don't + * call dtrace_probe() since it's only + * an artificial probe meant to change + * the flow of control so that it + * encounters the true probe. + */ + is_enabled = 1; + } else if (probe->ftp_argmap == NULL) { + dtrace_probe(probe->ftp_id, s0, s1, + s2, s3, s4); + } else { + uint32_t t[5]; + + fasttrap_usdt_args32(probe, rp, + sizeof (t) / sizeof (t[0]), t); + + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + } + } +#ifdef __amd64 + } +#endif + } + + /* + * We're about to do a bunch of work so we cache a local copy of + * the tracepoint to emulate the instruction, and then find the + * tracepoint again later if we need to light up any return probes. + */ + tp_local = *tp; + mutex_exit(pid_mtx); + tp = &tp_local; + + /* + * Set the program counter to appear as though the traced instruction + * had completely executed. This ensures that fasttrap_getreg() will + * report the expected value for REG_RIP. + */ + rp->r_pc = pc + tp->ftt_size; + + /* + * If there's an is-enabled probe connected to this tracepoint it + * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax' + * instruction that was placed there by DTrace when the binary was + * linked. As this probe is, in fact, enabled, we need to stuff 1 + * into %eax or %rax. Accordingly, we can bypass all the instruction + * emulation logic since we know the inevitable result. It's possible + * that a user could construct a scenario where the 'is-enabled' + * probe was on some other instruction, but that would be a rather + * exotic way to shoot oneself in the foot. + */ + if (is_enabled) { + rp->r_r0 = 1; + new_pc = rp->r_pc; + goto done; + } + + /* + * We emulate certain types of instructions to ensure correctness + * (in the case of position dependent instructions) or optimize + * common cases. The rest we have the thread execute back in user- + * land. + */ + switch (tp->ftt_type) { + case FASTTRAP_T_RET: + case FASTTRAP_T_RET16: + { + uintptr_t dst; + uintptr_t addr; + int ret; + + /* + * We have to emulate _every_ facet of the behavior of a ret + * instruction including what happens if the load from %esp + * fails; in that case, we send a SIGSEGV. + */ +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { +#endif + ret = fasttrap_fulword((void *)rp->r_sp, &dst); + addr = rp->r_sp + sizeof (uintptr_t); +#ifdef __amd64 + } else { + uint32_t dst32; + ret = fasttrap_fuword32((void *)rp->r_sp, &dst32); + dst = dst32; + addr = rp->r_sp + sizeof (uint32_t); + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, rp->r_sp); + new_pc = pc; + break; + } + + if (tp->ftt_type == FASTTRAP_T_RET16) + addr += tp->ftt_dest; + + rp->r_sp = addr; + new_pc = dst; + break; + } + + case FASTTRAP_T_JCC: + { + uint_t taken; + + switch (tp->ftt_code) { + case FASTTRAP_JO: + taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) != 0; + break; + case FASTTRAP_JNO: + taken = (rp->r_ps & FASTTRAP_EFLAGS_OF) == 0; + break; + case FASTTRAP_JB: + taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0; + break; + case FASTTRAP_JAE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0; + break; + case FASTTRAP_JE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0; + break; + case FASTTRAP_JNE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0; + break; + case FASTTRAP_JBE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) != 0 || + (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0; + break; + case FASTTRAP_JA: + taken = (rp->r_ps & FASTTRAP_EFLAGS_CF) == 0 && + (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0; + break; + case FASTTRAP_JS: + taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) != 0; + break; + case FASTTRAP_JNS: + taken = (rp->r_ps & FASTTRAP_EFLAGS_SF) == 0; + break; + case FASTTRAP_JP: + taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) != 0; + break; + case FASTTRAP_JNP: + taken = (rp->r_ps & FASTTRAP_EFLAGS_PF) == 0; + break; + case FASTTRAP_JL: + taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) != + ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JGE: + taken = ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) == + ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JLE: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 || + ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) != + ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); + break; + case FASTTRAP_JG: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 && + ((rp->r_ps & FASTTRAP_EFLAGS_SF) == 0) == + ((rp->r_ps & FASTTRAP_EFLAGS_OF) == 0); + break; + + } + + if (taken) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_LOOP: + { + uint_t taken; +#ifdef __amd64 + greg_t cx = rp->r_rcx--; +#else + greg_t cx = rp->r_ecx--; +#endif + + switch (tp->ftt_code) { + case FASTTRAP_LOOPNZ: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) == 0 && + cx != 0; + break; + case FASTTRAP_LOOPZ: + taken = (rp->r_ps & FASTTRAP_EFLAGS_ZF) != 0 && + cx != 0; + break; + case FASTTRAP_LOOP: + taken = (cx != 0); + break; + } + + if (taken) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_JCXZ: + { +#ifdef __amd64 + greg_t cx = rp->r_rcx; +#else + greg_t cx = rp->r_ecx; +#endif + + if (cx == 0) + new_pc = tp->ftt_dest; + else + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_PUSHL_EBP: + { + int ret; + uintptr_t addr; +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { +#endif + addr = rp->r_sp - sizeof (uintptr_t); + ret = fasttrap_sulword((void *)addr, rp->r_fp); +#ifdef __amd64 + } else { + addr = rp->r_sp - sizeof (uint32_t); + ret = fasttrap_suword32((void *)addr, + (uint32_t)rp->r_fp); + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + + rp->r_sp = addr; + new_pc = pc + tp->ftt_size; + break; + } + + case FASTTRAP_T_NOP: + new_pc = pc + tp->ftt_size; + break; + + case FASTTRAP_T_JMP: + case FASTTRAP_T_CALL: + if (tp->ftt_code == 0) { + new_pc = tp->ftt_dest; + } else { + uintptr_t value, addr = tp->ftt_dest; + + if (tp->ftt_base != FASTTRAP_NOREG) + addr += fasttrap_getreg(rp, tp->ftt_base); + if (tp->ftt_index != FASTTRAP_NOREG) + addr += fasttrap_getreg(rp, tp->ftt_index) << + tp->ftt_scale; + + if (tp->ftt_code == 1) { + /* + * If there's a segment prefix for this + * instruction, we'll need to check permissions + * and bounds on the given selector, and adjust + * the address accordingly. + */ + if (tp->ftt_segment != FASTTRAP_SEG_NONE && + fasttrap_do_seg(tp, rp, &addr) != 0) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { +#endif + if (fasttrap_fulword((void *)addr, + &value) == -1) { + fasttrap_sigsegv(p, curthread, + addr); + new_pc = pc; + break; + } + new_pc = value; +#ifdef __amd64 + } else { + uint32_t value32; + addr = (uintptr_t)(uint32_t)addr; + if (fasttrap_fuword32((void *)addr, + &value32) == -1) { + fasttrap_sigsegv(p, curthread, + addr); + new_pc = pc; + break; + } + new_pc = value32; + } +#endif + } else { + new_pc = addr; + } + } + + /* + * If this is a call instruction, we need to push the return + * address onto the stack. If this fails, we send the process + * a SIGSEGV and reset the pc to emulate what would happen if + * this instruction weren't traced. + */ + if (tp->ftt_type == FASTTRAP_T_CALL) { + int ret; + uintptr_t addr; +#ifdef __amd64 + if (p->p_model == DATAMODEL_NATIVE) { + addr = rp->r_sp - sizeof (uintptr_t); + ret = fasttrap_sulword((void *)addr, + pc + tp->ftt_size); + } else { +#endif + addr = rp->r_sp - sizeof (uint32_t); + ret = fasttrap_suword32((void *)addr, + (uint32_t)(pc + tp->ftt_size)); +#ifdef __amd64 + } +#endif + + if (ret == -1) { + fasttrap_sigsegv(p, curthread, addr); + new_pc = pc; + break; + } + + rp->r_sp = addr; + } + + break; + + case FASTTRAP_T_COMMON: + { + uintptr_t addr; +#if defined(__amd64) + uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22]; +#else + uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7]; +#endif + uint_t i = 0; + klwp_t *lwp = ttolwp(curthread); + + /* + * Compute the address of the ulwp_t and step over the + * ul_self pointer. The method used to store the user-land + * thread pointer is very different on 32- and 64-bit + * kernels. + */ +#if defined(__amd64) + if (p->p_model == DATAMODEL_LP64) { + addr = lwp->lwp_pcb.pcb_fsbase; + addr += sizeof (void *); + } else { + addr = lwp->lwp_pcb.pcb_gsbase; + addr += sizeof (caddr32_t); + } +#else + addr = USEGD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc); + addr += sizeof (void *); +#endif + + /* + * Generic Instruction Tracing + * --------------------------- + * + * This is the layout of the scratch space in the user-land + * thread structure for our generated instructions. + * + * 32-bit mode bytes + * ------------------------ ----- + * a: <= 15 + * jmp ftt_size> 5 + * b: <= 15 + * int T_DTRACE_RET 2 + * ----- + * <= 37 + * + * 64-bit mode bytes + * ------------------------ ----- + * a: <= 15 + * jmp 0(%rip) 6 + * ftt_size> 8 + * b: <= 15 + * int T_DTRACE_RET 2 + * ----- + * <= 46 + * + * The %pc is set to a, and curthread->t_dtrace_astpc is set + * to b. If we encounter a signal on the way out of the + * kernel, trap() will set %pc to curthread->t_dtrace_astpc + * so that we execute the original instruction and re-enter + * the kernel rather than redirecting to the next instruction. + * + * If there are return probes (so we know that we're going to + * need to reenter the kernel after executing the original + * instruction), the scratch space will just contain the + * original instruction followed by an interrupt -- the same + * data as at b. + * + * %rip-relative Addressing + * ------------------------ + * + * There's a further complication in 64-bit mode due to %rip- + * relative addressing. While this is clearly a beneficial + * architectural decision for position independent code, it's + * hard not to see it as a personal attack against the pid + * provider since before there was a relatively small set of + * instructions to emulate; with %rip-relative addressing, + * almost every instruction can potentially depend on the + * address at which it's executed. Rather than emulating + * the broad spectrum of instructions that can now be + * position dependent, we emulate jumps and others as in + * 32-bit mode, and take a different tack for instructions + * using %rip-relative addressing. + * + * For every instruction that uses the ModRM byte, the + * in-kernel disassembler reports its location. We use the + * ModRM byte to identify that an instruction uses + * %rip-relative addressing and to see what other registers + * the instruction uses. To emulate those instructions, + * we modify the instruction to be %rax-relative rather than + * %rip-relative (or %rcx-relative if the instruction uses + * %rax; or %r8- or %r9-relative if the REX.B is present so + * we don't have to rewrite the REX prefix). We then load + * the value that %rip would have been into the scratch + * register and generate an instruction to reset the scratch + * register back to its original value. The instruction + * sequence looks like this: + * + * 64-mode %rip-relative bytes + * ------------------------ ----- + * a: <= 15 + * movq $, % 6 + * jmp 0(%rip) 6 + * ftt_size> 8 + * b: <= 15 + * int T_DTRACE_RET 2 + * ----- + * 52 + * + * We set curthread->t_dtrace_regv so that upon receiving + * a signal we can reset the value of the scratch register. + */ + + ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE); + + curthread->t_dtrace_scrpc = addr; + bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); + i += tp->ftt_size; + +#ifdef __amd64 + if (tp->ftt_ripmode != 0) { + greg_t *reg; + + ASSERT(p->p_model == DATAMODEL_LP64); + ASSERT(tp->ftt_ripmode & + (FASTTRAP_RIP_1 | FASTTRAP_RIP_2)); + + /* + * If this was a %rip-relative instruction, we change + * it to be either a %rax- or %rcx-relative + * instruction (depending on whether those registers + * are used as another operand; or %r8- or %r9- + * relative depending on the value of REX.B). We then + * set that register and generate a movq instruction + * to reset the value. + */ + if (tp->ftt_ripmode & FASTTRAP_RIP_X) + scratch[i++] = FASTTRAP_REX(1, 0, 0, 1); + else + scratch[i++] = FASTTRAP_REX(1, 0, 0, 0); + + if (tp->ftt_ripmode & FASTTRAP_RIP_1) + scratch[i++] = FASTTRAP_MOV_EAX; + else + scratch[i++] = FASTTRAP_MOV_ECX; + + switch (tp->ftt_ripmode) { + case FASTTRAP_RIP_1: + reg = &rp->r_rax; + curthread->t_dtrace_reg = REG_RAX; + break; + case FASTTRAP_RIP_2: + reg = &rp->r_rcx; + curthread->t_dtrace_reg = REG_RCX; + break; + case FASTTRAP_RIP_1 | FASTTRAP_RIP_X: + reg = &rp->r_r8; + curthread->t_dtrace_reg = REG_R8; + break; + case FASTTRAP_RIP_2 | FASTTRAP_RIP_X: + reg = &rp->r_r9; + curthread->t_dtrace_reg = REG_R9; + break; + } + + /* LINTED - alignment */ + *(uint64_t *)&scratch[i] = *reg; + curthread->t_dtrace_regv = *reg; + *reg = pc + tp->ftt_size; + i += sizeof (uint64_t); + } +#endif + + /* + * Generate the branch instruction to what would have + * normally been the subsequent instruction. In 32-bit mode, + * this is just a relative branch; in 64-bit mode this is a + * %rip-relative branch that loads the 64-bit pc value + * immediately after the jmp instruction. + */ +#ifdef __amd64 + if (p->p_model == DATAMODEL_LP64) { + scratch[i++] = FASTTRAP_GROUP5_OP; + scratch[i++] = FASTTRAP_MODRM(0, 4, 5); + /* LINTED - alignment */ + *(uint32_t *)&scratch[i] = 0; + i += sizeof (uint32_t); + /* LINTED - alignment */ + *(uint64_t *)&scratch[i] = pc + tp->ftt_size; + i += sizeof (uint64_t); + } else { +#endif + /* + * Set up the jmp to the next instruction; note that + * the size of the traced instruction cancels out. + */ + scratch[i++] = FASTTRAP_JMP32; + /* LINTED - alignment */ + *(uint32_t *)&scratch[i] = pc - addr - 5; + i += sizeof (uint32_t); +#ifdef __amd64 + } +#endif + + curthread->t_dtrace_astpc = addr + i; + bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size); + i += tp->ftt_size; + scratch[i++] = FASTTRAP_INT; + scratch[i++] = T_DTRACE_RET; + + ASSERT(i <= sizeof (scratch)); + + if (fasttrap_copyout(scratch, (char *)addr, i)) { + fasttrap_sigtrap(p, curthread, pc); + new_pc = pc; + break; + } + + if (tp->ftt_retids != NULL) { + curthread->t_dtrace_step = 1; + curthread->t_dtrace_ret = 1; + new_pc = curthread->t_dtrace_astpc; + } else { + new_pc = curthread->t_dtrace_scrpc; + } + + curthread->t_dtrace_pc = pc; + curthread->t_dtrace_npc = pc + tp->ftt_size; + curthread->t_dtrace_on = 1; + break; + } + + default: + panic("fasttrap: mishandled an instruction"); + } + +done: + /* + * If there were no return probes when we first found the tracepoint, + * we should feel no obligation to honor any return probes that were + * subsequently enabled -- they'll just have to wait until the next + * time around. + */ + if (tp->ftt_retids != NULL) { + /* + * We need to wait until the results of the instruction are + * apparent before invoking any return probes. If this + * instruction was emulated we can just call + * fasttrap_return_common(); if it needs to be executed, we + * need to wait until the user thread returns to the kernel. + */ + if (tp->ftt_type != FASTTRAP_T_COMMON) { + /* + * Set the program counter to the address of the traced + * instruction so that it looks right in ustack() + * output. We had previously set it to the end of the + * instruction to simplify %rip-relative addressing. + */ + rp->r_pc = pc; + + fasttrap_return_common(rp, pc, pid, new_pc); + } else { + ASSERT(curthread->t_dtrace_ret != 0); + ASSERT(curthread->t_dtrace_pc == pc); + ASSERT(curthread->t_dtrace_scrpc != 0); + ASSERT(new_pc == curthread->t_dtrace_astpc); + } + } + + rp->r_pc = new_pc; + + return (0); +} + +int +fasttrap_return_probe(struct regs *rp) +{ + proc_t *p = curproc; + uintptr_t pc = curthread->t_dtrace_pc; + uintptr_t npc = curthread->t_dtrace_npc; + + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; + + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know that there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } + + /* + * We set rp->r_pc to the address of the traced instruction so + * that it appears to dtrace_probe() that we're on the original + * instruction, and so that the user can't easily detect our + * complex web of lies. dtrace_return_probe() (our caller) + * will correctly set %pc after we return. + */ + rp->r_pc = pc; + + fasttrap_return_common(rp, pc, p->p_pid, npc); + + return (0); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 1, argno)); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, 0, argno)); +} + +static ulong_t +fasttrap_getreg(struct regs *rp, uint_t reg) +{ +#ifdef __amd64 + switch (reg) { + case REG_R15: return (rp->r_r15); + case REG_R14: return (rp->r_r14); + case REG_R13: return (rp->r_r13); + case REG_R12: return (rp->r_r12); + case REG_R11: return (rp->r_r11); + case REG_R10: return (rp->r_r10); + case REG_R9: return (rp->r_r9); + case REG_R8: return (rp->r_r8); + case REG_RDI: return (rp->r_rdi); + case REG_RSI: return (rp->r_rsi); + case REG_RBP: return (rp->r_rbp); + case REG_RBX: return (rp->r_rbx); + case REG_RDX: return (rp->r_rdx); + case REG_RCX: return (rp->r_rcx); + case REG_RAX: return (rp->r_rax); + case REG_TRAPNO: return (rp->r_trapno); + case REG_ERR: return (rp->r_err); + case REG_RIP: return (rp->r_rip); + case REG_CS: return (rp->r_cs); + case REG_RFL: return (rp->r_rfl); + case REG_RSP: return (rp->r_rsp); + case REG_SS: return (rp->r_ss); + case REG_FS: return (rp->r_fs); + case REG_GS: return (rp->r_gs); + case REG_DS: return (rp->r_ds); + case REG_ES: return (rp->r_es); + case REG_FSBASE: return (rdmsr(MSR_AMD_FSBASE)); + case REG_GSBASE: return (rdmsr(MSR_AMD_GSBASE)); + } + + panic("dtrace: illegal register constant"); + /*NOTREACHED*/ +#else + if (reg >= _NGREG) + panic("dtrace: illegal register constant"); + + return (((greg_t *)&rp->r_gs)[reg]); +#endif +} diff --git a/uts/sparc/dtrace/fasttrap_isa.c b/uts/sparc/dtrace/fasttrap_isa.c new file mode 100644 index 000000000000..45d87478d6a2 --- /dev/null +++ b/uts/sparc/dtrace/fasttrap_isa.c @@ -0,0 +1,1597 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Lossless User-Land Tracing on SPARC + * ----------------------------------- + * + * The Basic Idea + * + * The most important design constraint is, of course, correct execution of + * the user thread above all else. The next most important goal is rapid + * execution. We combine execution of instructions in user-land with + * emulation of certain instructions in the kernel to aim for complete + * correctness and maximal performance. + * + * We take advantage of the split PC/NPC architecture to speed up logical + * single-stepping; when we copy an instruction out to the scratch space in + * the ulwp_t structure (held in the %g7 register on SPARC), we can + * effectively single step by setting the PC to our scratch space and leaving + * the NPC alone. This executes the replaced instruction and then continues + * on without having to reenter the kernel as with single- stepping. The + * obvious caveat is for instructions whose execution is PC dependant -- + * branches, call and link instructions (call and jmpl), and the rdpc + * instruction. These instructions cannot be executed in the manner described + * so they must be emulated in the kernel. + * + * Emulation for this small set of instructions if fairly simple; the most + * difficult part being emulating branch conditions. + * + * + * A Cache Heavy Portfolio + * + * It's important to note at this time that copying an instruction out to the + * ulwp_t scratch space in user-land is rather complicated. SPARC has + * separate data and instruction caches so any writes to the D$ (using a + * store instruction for example) aren't necessarily reflected in the I$. + * The flush instruction can be used to synchronize the two and must be used + * for any self-modifying code, but the flush instruction only applies to the + * primary address space (the absence of a flusha analogue to the flush + * instruction that accepts an ASI argument is an obvious omission from SPARC + * v9 where the notion of the alternate address space was introduced on + * SPARC). To correctly copy out the instruction we must use a block store + * that doesn't allocate in the D$ and ensures synchronization with the I$; + * see dtrace_blksuword32() for the implementation (this function uses + * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner + * described). Refer to the UltraSPARC I/II manual for details on the + * ASI_BLK_COMMIT_S ASI. + * + * + * Return Subtleties + * + * When we're firing a return probe we need to expose the value returned by + * the function being traced. Since the function can set the return value + * in its last instruction, we need to fire the return probe only _after_ + * the effects of the instruction are apparent. For instructions that we + * emulate, we can call dtrace_probe() after we've performed the emulation; + * for instructions that we execute after we return to user-land, we set + * %pc to the instruction we copied out (as described above) and set %npc + * to a trap instruction stashed in the ulwp_t structure. After the traced + * instruction is executed, the trap instruction returns control to the + * kernel where we can fire the return probe. + * + * This need for a second trap in cases where we execute the traced + * instruction makes it all the more important to emulate the most common + * instructions to avoid the second trip in and out of the kernel. + * + * + * Making it Fast + * + * Since copying out an instruction is neither simple nor inexpensive for the + * CPU, we should attempt to avoid doing it in as many cases as possible. + * Since function entry and return are usually the most interesting probe + * sites, we attempt to tune the performance of the fasttrap provider around + * instructions typically in those places. + * + * Looking at a bunch of functions in libraries and executables reveals that + * most functions begin with either a save or a sethi (to setup a larger + * argument to the save) and end with a restore or an or (in the case of leaf + * functions). To try to improve performance, we emulate all of these + * instructions in the kernel. + * + * The save and restore instructions are a little tricky since they perform + * register window maniplulation. Rather than trying to tinker with the + * register windows from the kernel, we emulate the implicit add that takes + * place as part of those instructions and set the %pc to point to a simple + * save or restore we've hidden in the ulwp_t structure. If we're in a return + * probe so want to make it seem as though the tracepoint has been completely + * executed we need to remember that we've pulled this trick with restore and + * pull registers from the previous window (the one that we'll switch to once + * the simple store instruction is executed) rather than the current one. This + * is why in the case of emulating a restore we set the DTrace CPU flag + * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes + * (see fasttrap_return_common()). + */ + +#define OP(x) ((x) >> 30) +#define OP2(x) (((x) >> 22) & 0x07) +#define OP3(x) (((x) >> 19) & 0x3f) +#define RCOND(x) (((x) >> 25) & 0x07) +#define COND(x) (((x) >> 25) & 0x0f) +#define A(x) (((x) >> 29) & 0x01) +#define I(x) (((x) >> 13) & 0x01) +#define RD(x) (((x) >> 25) & 0x1f) +#define RS1(x) (((x) >> 14) & 0x1f) +#define RS2(x) (((x) >> 0) & 0x1f) +#define CC(x) (((x) >> 20) & 0x03) +#define DISP16(x) ((((x) >> 6) & 0xc000) | ((x) & 0x3fff)) +#define DISP22(x) ((x) & 0x3fffff) +#define DISP19(x) ((x) & 0x7ffff) +#define DISP30(x) ((x) & 0x3fffffff) +#define SW_TRAP(x) ((x) & 0x7f) + +#define OP3_OR 0x02 +#define OP3_RD 0x28 +#define OP3_JMPL 0x38 +#define OP3_RETURN 0x39 +#define OP3_TCC 0x3a +#define OP3_SAVE 0x3c +#define OP3_RESTORE 0x3d + +#define OP3_PREFETCH 0x2d +#define OP3_CASA 0x3c +#define OP3_PREFETCHA 0x3d +#define OP3_CASXA 0x3e + +#define OP2_ILLTRAP 0x0 +#define OP2_BPcc 0x1 +#define OP2_Bicc 0x2 +#define OP2_BPr 0x3 +#define OP2_SETHI 0x4 +#define OP2_FBPfcc 0x5 +#define OP2_FBfcc 0x6 + +#define R_G0 0 +#define R_O0 8 +#define R_SP 14 +#define R_I0 24 +#define R_I1 25 +#define R_I2 26 +#define R_I3 27 +#define R_I4 28 + +/* + * Check the comment in fasttrap.h when changing these offsets or adding + * new instructions. + */ +#define FASTTRAP_OFF_SAVE 64 +#define FASTTRAP_OFF_RESTORE 68 +#define FASTTRAP_OFF_FTRET 72 +#define FASTTRAP_OFF_RETURN 76 + +#define BREAKPOINT_INSTR 0x91d02001 /* ta 1 */ + +/* + * Tunable to let users turn off the fancy save instruction optimization. + * If a program is non-ABI compliant, there's a possibility that the save + * instruction optimization could cause an error. + */ +int fasttrap_optimize_save = 1; + +static uint64_t +fasttrap_anarg(struct regs *rp, int argno) +{ + uint64_t value; + + if (argno < 6) + return ((&rp->r_o0)[argno]); + + if (curproc->p_model == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fulword(&fr->fr_argd[argno]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | + CPU_DTRACE_BADALIGN); + } else { + struct frame32 *fr = (struct frame32 *)rp->r_sp; + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + value = dtrace_fuword32(&fr->fr_argd[argno]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR | + CPU_DTRACE_BADALIGN); + } + + return (value); +} + +static ulong_t fasttrap_getreg(struct regs *, uint_t); +static void fasttrap_putreg(struct regs *, uint_t, ulong_t); + +static void +fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp, + uint_t fake_restore, int argc, uintptr_t *argv) +{ + int i, x, cap = MIN(argc, probe->ftp_nargs); + int inc = (fake_restore ? 16 : 0); + + /* + * The only way we'll hit the fake_restore case is if a USDT probe is + * invoked as a tail-call. While it wouldn't be incorrect, we can + * avoid a call to fasttrap_getreg(), and safely use rp->r_sp + * directly since a tail-call can't be made if the invoked function + * would use the argument dump space (i.e. if there were more than + * 6 arguments). We take this shortcut because unconditionally rooting + * around for R_FP (R_SP + 16) would be unnecessarily painful. + */ + + if (curproc->p_model == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + uintptr_t v; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + if (x < 6) + argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); + else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0) + argv[i] = 0; + } + + } else { + struct frame32 *fr = (struct frame32 *)rp->r_sp; + uint32_t v; + + for (i = 0; i < cap; i++) { + x = probe->ftp_argmap[i]; + + if (x < 6) + argv[i] = fasttrap_getreg(rp, R_O0 + x + inc); + else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0) + argv[i] = 0; + } + } + + for (; i < argc; i++) { + argv[i] = 0; + } +} + +static void +fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid, + uint_t fake_restore) +{ + fasttrap_tracepoint_t *tp; + fasttrap_bucket_t *bucket; + fasttrap_id_t *id; + kmutex_t *pid_mtx; + dtrace_icookie_t cookie; + + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * Don't sweat it if we can't find the tracepoint again; unlike + * when we're in fasttrap_pid_probe(), finding the tracepoint here + * is not essential to the correct execution of the process. + */ + if (tp == NULL || tp->ftt_retids == NULL) { + mutex_exit(pid_mtx); + return; + } + + for (id = tp->ftt_retids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + + if (id->fti_ptype == DTFTP_POST_OFFSETS) { + if (probe->ftp_argmap != NULL && fake_restore) { + uintptr_t t[5]; + + fasttrap_usdt_args(probe, rp, fake_restore, + sizeof (t) / sizeof (t[0]), t); + + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); + dtrace_interrupt_enable(cookie); + + } else if (probe->ftp_argmap != NULL) { + uintptr_t t[5]; + + fasttrap_usdt_args(probe, rp, fake_restore, + sizeof (t) / sizeof (t[0]), t); + + dtrace_probe(probe->ftp_id, t[0], t[1], + t[2], t[3], t[4]); + + } else if (fake_restore) { + uintptr_t arg0 = fasttrap_getreg(rp, R_I0); + uintptr_t arg1 = fasttrap_getreg(rp, R_I1); + uintptr_t arg2 = fasttrap_getreg(rp, R_I2); + uintptr_t arg3 = fasttrap_getreg(rp, R_I3); + uintptr_t arg4 = fasttrap_getreg(rp, R_I4); + + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); + dtrace_probe(probe->ftp_id, arg0, arg1, + arg2, arg3, arg4); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); + dtrace_interrupt_enable(cookie); + + } else { + dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, + rp->r_o2, rp->r_o3, rp->r_o4); + } + + continue; + } + + /* + * If this is only a possible return point, we must + * be looking at a potential tail call in leaf context. + * If the %npc is still within this function, then we + * must have misidentified a jmpl as a tail-call when it + * is, in fact, part of a jump table. It would be nice to + * remove this tracepoint, but this is neither the time + * nor the place. + */ + if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) && + rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) + continue; + + /* + * It's possible for a function to branch to the delay slot + * of an instruction that we've identified as a return site. + * We can dectect this spurious return probe activation by + * observing that in this case %npc will be %pc + 4 and %npc + * will be inside the current function (unless the user is + * doing _crazy_ instruction picking in which case there's + * very little we can do). The second check is important + * in case the last instructions of a function make a tail- + * call to the function located immediately subsequent. + */ + if (rp->r_npc == rp->r_pc + 4 && + rp->r_npc - probe->ftp_faddr < probe->ftp_fsize) + continue; + + /* + * The first argument is the offset of return tracepoint + * in the function; the remaining arguments are the return + * values. + * + * If fake_restore is set, we need to pull the return values + * out of the %i's rather than the %o's -- a little trickier. + */ + if (!fake_restore) { + dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, + rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3); + } else { + uintptr_t arg0 = fasttrap_getreg(rp, R_I0); + uintptr_t arg1 = fasttrap_getreg(rp, R_I1); + uintptr_t arg2 = fasttrap_getreg(rp, R_I2); + uintptr_t arg3 = fasttrap_getreg(rp, R_I3); + + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE); + dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr, + arg0, arg1, arg2, arg3); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE); + dtrace_interrupt_enable(cookie); + } + } + + mutex_exit(pid_mtx); +} + +int +fasttrap_pid_probe(struct regs *rp) +{ + proc_t *p = curproc; + fasttrap_tracepoint_t *tp, tp_local; + fasttrap_id_t *id; + pid_t pid; + uintptr_t pc = rp->r_pc; + uintptr_t npc = rp->r_npc; + uintptr_t orig_pc = pc; + fasttrap_bucket_t *bucket; + kmutex_t *pid_mtx; + uint_t fake_restore = 0, is_enabled = 0; + dtrace_icookie_t cookie; + + /* + * It's possible that a user (in a veritable orgy of bad planning) + * could redirect this thread's flow of control before it reached the + * return probe fasttrap. In this case we need to kill the process + * since it's in a unrecoverable state. + */ + if (curthread->t_dtrace_step) { + ASSERT(curthread->t_dtrace_on); + fasttrap_sigtrap(p, curthread, pc); + return (0); + } + + /* + * Clear all user tracing flags. + */ + curthread->t_dtrace_ft = 0; + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; + + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know that there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } + + pid = p->p_pid; + pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock; + mutex_enter(pid_mtx); + bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)]; + + /* + * Lookup the tracepoint that the process just hit. + */ + for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) { + if (pid == tp->ftt_pid && pc == tp->ftt_pc && + tp->ftt_proc->ftpc_acount != 0) + break; + } + + /* + * If we couldn't find a matching tracepoint, either a tracepoint has + * been inserted without using the pid ioctl interface (see + * fasttrap_ioctl), or somehow we have mislaid this tracepoint. + */ + if (tp == NULL) { + mutex_exit(pid_mtx); + return (-1); + } + + for (id = tp->ftt_ids; id != NULL; id = id->fti_next) { + fasttrap_probe_t *probe = id->fti_probe; + int isentry = (id->fti_ptype == DTFTP_ENTRY); + + if (id->fti_ptype == DTFTP_IS_ENABLED) { + is_enabled = 1; + continue; + } + + /* + * We note that this was an entry probe to help ustack() find + * the first caller. + */ + if (isentry) { + cookie = dtrace_interrupt_disable(); + DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY); + } + dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2, + rp->r_o3, rp->r_o4); + if (isentry) { + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY); + dtrace_interrupt_enable(cookie); + } + } + + /* + * We're about to do a bunch of work so we cache a local copy of + * the tracepoint to emulate the instruction, and then find the + * tracepoint again later if we need to light up any return probes. + */ + tp_local = *tp; + mutex_exit(pid_mtx); + tp = &tp_local; + + /* + * If there's an is-enabled probe conntected to this tracepoint it + * means that there was a 'mov %g0, %o0' instruction that was placed + * there by DTrace when the binary was linked. As this probe is, in + * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can + * bypass all the instruction emulation logic since we know the + * inevitable result. It's possible that a user could construct a + * scenario where the 'is-enabled' probe was on some other + * instruction, but that would be a rather exotic way to shoot oneself + * in the foot. + */ + if (is_enabled) { + rp->r_o0 = 1; + pc = rp->r_npc; + npc = pc + 4; + goto done; + } + + /* + * We emulate certain types of instructions to ensure correctness + * (in the case of position dependent instructions) or optimize + * common cases. The rest we have the thread execute back in user- + * land. + */ + switch (tp->ftt_type) { + case FASTTRAP_T_SAVE: + { + int32_t imm; + + /* + * This an optimization to let us handle function entry + * probes more efficiently. Many functions begin with a save + * instruction that follows the pattern: + * save %sp, , %sp + * + * Meanwhile, we've stashed the instruction: + * save %g1, %g0, %sp + * + * off of %g7, so all we have to do is stick the right value + * into %g1 and reset %pc to point to the instruction we've + * cleverly hidden (%npc should not be touched). + */ + + imm = tp->ftt_instr << 19; + imm >>= 19; + rp->r_g1 = rp->r_sp + imm; + pc = rp->r_g7 + FASTTRAP_OFF_SAVE; + break; + } + + case FASTTRAP_T_RESTORE: + { + ulong_t value; + uint_t rd; + + /* + * This is an optimization to let us handle function + * return probes more efficiently. Most non-leaf functions + * end with the sequence: + * ret + * restore , , %oX + * + * We've stashed the instruction: + * restore %g0, %g0, %g0 + * + * off of %g7 so we just need to place the correct value + * in the right %i register (since after our fake-o + * restore, the %i's will become the %o's) and set the %pc + * to point to our hidden restore. We also set fake_restore to + * let fasttrap_return_common() know that it will find the + * return values in the %i's rather than the %o's. + */ + + if (I(tp->ftt_instr)) { + int32_t imm; + + imm = tp->ftt_instr << 19; + imm >>= 19; + value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; + } else { + value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + + fasttrap_getreg(rp, RS2(tp->ftt_instr)); + } + + /* + * Convert %o's to %i's; leave %g's as they are. + */ + rd = RD(tp->ftt_instr); + fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value); + + pc = rp->r_g7 + FASTTRAP_OFF_RESTORE; + fake_restore = 1; + break; + } + + case FASTTRAP_T_RETURN: + { + uintptr_t target; + + /* + * A return instruction is like a jmpl (without the link + * part) that executes an implicit restore. We've stashed + * the instruction: + * return %o0 + * + * off of %g7 so we just need to place the target in %o0 + * and set the %pc to point to the stashed return instruction. + * We use %o0 since that register disappears after the return + * executes, erasing any evidence of this tampering. + */ + if (I(tp->ftt_instr)) { + int32_t imm; + + imm = tp->ftt_instr << 19; + imm >>= 19; + target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm; + } else { + target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + + fasttrap_getreg(rp, RS2(tp->ftt_instr)); + } + + fasttrap_putreg(rp, R_O0, target); + + pc = rp->r_g7 + FASTTRAP_OFF_RETURN; + fake_restore = 1; + break; + } + + case FASTTRAP_T_OR: + { + ulong_t value; + + if (I(tp->ftt_instr)) { + int32_t imm; + + imm = tp->ftt_instr << 19; + imm >>= 19; + value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm; + } else { + value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | + fasttrap_getreg(rp, RS2(tp->ftt_instr)); + } + + fasttrap_putreg(rp, RD(tp->ftt_instr), value); + pc = rp->r_npc; + npc = pc + 4; + break; + } + + case FASTTRAP_T_SETHI: + if (RD(tp->ftt_instr) != R_G0) { + uint32_t imm32 = tp->ftt_instr << 10; + fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32); + } + pc = rp->r_npc; + npc = pc + 4; + break; + + case FASTTRAP_T_CCR: + { + uint_t c, v, z, n, taken; + uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT; + + if (tp->ftt_cc != 0) + ccr >>= 4; + + c = (ccr >> 0) & 1; + v = (ccr >> 1) & 1; + z = (ccr >> 2) & 1; + n = (ccr >> 3) & 1; + + switch (tp->ftt_code) { + case 0x0: /* BN */ + taken = 0; break; + case 0x1: /* BE */ + taken = z; break; + case 0x2: /* BLE */ + taken = z | (n ^ v); break; + case 0x3: /* BL */ + taken = n ^ v; break; + case 0x4: /* BLEU */ + taken = c | z; break; + case 0x5: /* BCS (BLU) */ + taken = c; break; + case 0x6: /* BNEG */ + taken = n; break; + case 0x7: /* BVS */ + taken = v; break; + case 0x8: /* BA */ + /* + * We handle the BA case differently since the annul + * bit means something slightly different. + */ + panic("fasttrap: mishandled a branch"); + taken = 1; break; + case 0x9: /* BNE */ + taken = ~z; break; + case 0xa: /* BG */ + taken = ~(z | (n ^ v)); break; + case 0xb: /* BGE */ + taken = ~(n ^ v); break; + case 0xc: /* BGU */ + taken = ~(c | z); break; + case 0xd: /* BCC (BGEU) */ + taken = ~c; break; + case 0xe: /* BPOS */ + taken = ~n; break; + case 0xf: /* BVC */ + taken = ~v; break; + } + + if (taken & 1) { + pc = rp->r_npc; + npc = tp->ftt_dest; + } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { + /* + * Untaken annulled branches don't execute the + * instruction in the delay slot. + */ + pc = rp->r_npc + 4; + npc = pc + 4; + } else { + pc = rp->r_npc; + npc = pc + 4; + } + break; + } + + case FASTTRAP_T_FCC: + { + uint_t fcc; + uint_t taken; + uint64_t fsr; + + dtrace_getfsr(&fsr); + + if (tp->ftt_cc == 0) { + fcc = (fsr >> 10) & 0x3; + } else { + uint_t shift; + ASSERT(tp->ftt_cc <= 3); + shift = 30 + tp->ftt_cc * 2; + fcc = (fsr >> shift) & 0x3; + } + + switch (tp->ftt_code) { + case 0x0: /* FBN */ + taken = (1 << fcc) & (0|0|0|0); break; + case 0x1: /* FBNE */ + taken = (1 << fcc) & (8|4|2|0); break; + case 0x2: /* FBLG */ + taken = (1 << fcc) & (0|4|2|0); break; + case 0x3: /* FBUL */ + taken = (1 << fcc) & (8|0|2|0); break; + case 0x4: /* FBL */ + taken = (1 << fcc) & (0|0|2|0); break; + case 0x5: /* FBUG */ + taken = (1 << fcc) & (8|4|0|0); break; + case 0x6: /* FBG */ + taken = (1 << fcc) & (0|4|0|0); break; + case 0x7: /* FBU */ + taken = (1 << fcc) & (8|0|0|0); break; + case 0x8: /* FBA */ + /* + * We handle the FBA case differently since the annul + * bit means something slightly different. + */ + panic("fasttrap: mishandled a branch"); + taken = (1 << fcc) & (8|4|2|1); break; + case 0x9: /* FBE */ + taken = (1 << fcc) & (0|0|0|1); break; + case 0xa: /* FBUE */ + taken = (1 << fcc) & (8|0|0|1); break; + case 0xb: /* FBGE */ + taken = (1 << fcc) & (0|4|0|1); break; + case 0xc: /* FBUGE */ + taken = (1 << fcc) & (8|4|0|1); break; + case 0xd: /* FBLE */ + taken = (1 << fcc) & (0|0|2|1); break; + case 0xe: /* FBULE */ + taken = (1 << fcc) & (8|0|2|1); break; + case 0xf: /* FBO */ + taken = (1 << fcc) & (0|4|2|1); break; + } + + if (taken) { + pc = rp->r_npc; + npc = tp->ftt_dest; + } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { + /* + * Untaken annulled branches don't execute the + * instruction in the delay slot. + */ + pc = rp->r_npc + 4; + npc = pc + 4; + } else { + pc = rp->r_npc; + npc = pc + 4; + } + break; + } + + case FASTTRAP_T_REG: + { + int64_t value; + uint_t taken; + uint_t reg = RS1(tp->ftt_instr); + + /* + * An ILP32 process shouldn't be using a branch predicated on + * an %i or an %l since it would violate the ABI. It's a + * violation of the ABI because we can't ensure deterministic + * behavior. We should have identified this case when we + * enabled the probe. + */ + ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16); + + value = (int64_t)fasttrap_getreg(rp, reg); + + switch (tp->ftt_code) { + case 0x1: /* BRZ */ + taken = (value == 0); break; + case 0x2: /* BRLEZ */ + taken = (value <= 0); break; + case 0x3: /* BRLZ */ + taken = (value < 0); break; + case 0x5: /* BRNZ */ + taken = (value != 0); break; + case 0x6: /* BRGZ */ + taken = (value > 0); break; + case 0x7: /* BRGEZ */ + taken = (value >= 0); break; + default: + case 0x0: + case 0x4: + panic("fasttrap: mishandled a branch"); + } + + if (taken) { + pc = rp->r_npc; + npc = tp->ftt_dest; + } else if (tp->ftt_flags & FASTTRAP_F_ANNUL) { + /* + * Untaken annulled branches don't execute the + * instruction in the delay slot. + */ + pc = rp->r_npc + 4; + npc = pc + 4; + } else { + pc = rp->r_npc; + npc = pc + 4; + } + break; + } + + case FASTTRAP_T_ALWAYS: + /* + * BAs, BA,As... + */ + + if (tp->ftt_flags & FASTTRAP_F_ANNUL) { + /* + * Annulled branch always instructions never execute + * the instruction in the delay slot. + */ + pc = tp->ftt_dest; + npc = tp->ftt_dest + 4; + } else { + pc = rp->r_npc; + npc = tp->ftt_dest; + } + break; + + case FASTTRAP_T_RDPC: + fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); + pc = rp->r_npc; + npc = pc + 4; + break; + + case FASTTRAP_T_CALL: + /* + * It's a call _and_ link remember... + */ + rp->r_o7 = rp->r_pc; + pc = rp->r_npc; + npc = tp->ftt_dest; + break; + + case FASTTRAP_T_JMPL: + pc = rp->r_npc; + + if (I(tp->ftt_instr)) { + uint_t rs1 = RS1(tp->ftt_instr); + int32_t imm; + + imm = tp->ftt_instr << 19; + imm >>= 19; + npc = fasttrap_getreg(rp, rs1) + imm; + } else { + uint_t rs1 = RS1(tp->ftt_instr); + uint_t rs2 = RS2(tp->ftt_instr); + + npc = fasttrap_getreg(rp, rs1) + + fasttrap_getreg(rp, rs2); + } + + /* + * Do the link part of the jump-and-link instruction. + */ + fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc); + + break; + + case FASTTRAP_T_COMMON: + { + curthread->t_dtrace_scrpc = rp->r_g7; + curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET; + + /* + * Copy the instruction to a reserved location in the + * user-land thread structure, then set the PC to that + * location and leave the NPC alone. We take pains to ensure + * consistency in the instruction stream (See SPARC + * Architecture Manual Version 9, sections 8.4.7, A.20, and + * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1, + * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the + * instruction into the user's address space without + * bypassing the I$. There's no AS_USER version of this ASI + * (as exist for other ASIs) so we use the lofault + * mechanism to catch faults. + */ + if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) { + /* + * If the copyout fails, then the process's state + * is not consistent (the effects of the traced + * instruction will never be seen). This process + * cannot be allowed to continue execution. + */ + fasttrap_sigtrap(curproc, curthread, pc); + return (0); + } + + curthread->t_dtrace_pc = pc; + curthread->t_dtrace_npc = npc; + curthread->t_dtrace_on = 1; + + pc = curthread->t_dtrace_scrpc; + + if (tp->ftt_retids != NULL) { + curthread->t_dtrace_step = 1; + curthread->t_dtrace_ret = 1; + npc = curthread->t_dtrace_astpc; + } + break; + } + + default: + panic("fasttrap: mishandled an instruction"); + } + + /* + * This bit me in the ass a couple of times, so lets toss this + * in as a cursory sanity check. + */ + ASSERT(pc != rp->r_g7 + 4); + ASSERT(pc != rp->r_g7 + 8); + +done: + /* + * If there were no return probes when we first found the tracepoint, + * we should feel no obligation to honor any return probes that were + * subsequently enabled -- they'll just have to wait until the next + * time around. + */ + if (tp->ftt_retids != NULL) { + /* + * We need to wait until the results of the instruction are + * apparent before invoking any return probes. If this + * instruction was emulated we can just call + * fasttrap_return_common(); if it needs to be executed, we + * need to wait until we return to the kernel. + */ + if (tp->ftt_type != FASTTRAP_T_COMMON) { + fasttrap_return_common(rp, orig_pc, pid, fake_restore); + } else { + ASSERT(curthread->t_dtrace_ret != 0); + ASSERT(curthread->t_dtrace_pc == orig_pc); + ASSERT(curthread->t_dtrace_scrpc == rp->r_g7); + ASSERT(npc == curthread->t_dtrace_astpc); + } + } + + ASSERT(pc != 0); + rp->r_pc = pc; + rp->r_npc = npc; + + return (0); +} + +int +fasttrap_return_probe(struct regs *rp) +{ + proc_t *p = ttoproc(curthread); + pid_t pid; + uintptr_t pc = curthread->t_dtrace_pc; + uintptr_t npc = curthread->t_dtrace_npc; + + curthread->t_dtrace_pc = 0; + curthread->t_dtrace_npc = 0; + curthread->t_dtrace_scrpc = 0; + curthread->t_dtrace_astpc = 0; + + /* + * Treat a child created by a call to vfork(2) as if it were its + * parent. We know there's only one thread of control in such a + * process: this one. + */ + while (p->p_flag & SVFORK) { + p = p->p_parent; + } + + /* + * We set the %pc and %npc to their values when the traced + * instruction was initially executed so that it appears to + * dtrace_probe() that we're on the original instruction, and so that + * the user can't easily detect our complex web of lies. + * dtrace_return_probe() (our caller) will correctly set %pc and %npc + * after we return. + */ + rp->r_pc = pc; + rp->r_npc = npc; + + pid = p->p_pid; + fasttrap_return_common(rp, pc, pid, 0); + + return (0); +} + +int +fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp) +{ + fasttrap_instr_t instr = FASTTRAP_INSTR; + + if (uwrite(p, &instr, 4, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +int +fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp) +{ + fasttrap_instr_t instr; + + /* + * Distinguish between read or write failures and a changed + * instruction. + */ + if (uread(p, &instr, 4, tp->ftt_pc) != 0) + return (0); + if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR) + return (0); + if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0) + return (-1); + + return (0); +} + +int +fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc, + fasttrap_probe_type_t type) +{ + uint32_t instr; + int32_t disp; + + /* + * Read the instruction at the given address out of the process's + * address space. We don't have to worry about a debugger + * changing this instruction before we overwrite it with our trap + * instruction since P_PR_LOCK is set. + */ + if (uread(p, &instr, 4, pc) != 0) + return (-1); + + /* + * Decode the instruction to fill in the probe flags. We can have + * the process execute most instructions on its own using a pc/npc + * trick, but pc-relative control transfer present a problem since + * we're relocating the instruction. We emulate these instructions + * in the kernel. We assume a default type and over-write that as + * needed. + * + * pc-relative instructions must be emulated for correctness; + * other instructions (which represent a large set of commonly traced + * instructions) are emulated or otherwise optimized for performance. + */ + tp->ftt_type = FASTTRAP_T_COMMON; + if (OP(instr) == 1) { + /* + * Call instructions. + */ + tp->ftt_type = FASTTRAP_T_CALL; + disp = DISP30(instr) << 2; + tp->ftt_dest = pc + (intptr_t)disp; + + } else if (OP(instr) == 0) { + /* + * Branch instructions. + * + * Unconditional branches need careful attention when they're + * annulled: annulled unconditional branches never execute + * the instruction in the delay slot. + */ + switch (OP2(instr)) { + case OP2_ILLTRAP: + case 0x7: + /* + * The compiler may place an illtrap after a call to + * a function that returns a structure. In the case of + * a returned structure, the compiler places an illtrap + * whose const22 field is the size of the returned + * structure immediately following the delay slot of + * the call. To stay out of the way, we refuse to + * place tracepoints on top of illtrap instructions. + * + * This is one of the dumbest architectural decisions + * I've ever had to work around. + * + * We also identify the only illegal op2 value (See + * SPARC Architecture Manual Version 9, E.2 table 31). + */ + return (-1); + + case OP2_BPcc: + if (COND(instr) == 8) { + tp->ftt_type = FASTTRAP_T_ALWAYS; + } else { + /* + * Check for an illegal instruction. + */ + if (CC(instr) & 1) + return (-1); + tp->ftt_type = FASTTRAP_T_CCR; + tp->ftt_cc = CC(instr); + tp->ftt_code = COND(instr); + } + + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + + disp = DISP19(instr); + disp <<= 13; + disp >>= 11; + tp->ftt_dest = pc + (intptr_t)disp; + break; + + case OP2_Bicc: + if (COND(instr) == 8) { + tp->ftt_type = FASTTRAP_T_ALWAYS; + } else { + tp->ftt_type = FASTTRAP_T_CCR; + tp->ftt_cc = 0; + tp->ftt_code = COND(instr); + } + + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + + disp = DISP22(instr); + disp <<= 10; + disp >>= 8; + tp->ftt_dest = pc + (intptr_t)disp; + break; + + case OP2_BPr: + /* + * Check for an illegal instruction. + */ + if ((RCOND(instr) & 3) == 0) + return (-1); + + /* + * It's a violation of the v8plus ABI to use a + * register-predicated branch in a 32-bit app if + * the register used is an %l or an %i (%gs and %os + * are legit because they're not saved to the stack + * in 32-bit words when we take a trap). + */ + if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16) + return (-1); + + tp->ftt_type = FASTTRAP_T_REG; + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + disp = DISP16(instr); + disp <<= 16; + disp >>= 14; + tp->ftt_dest = pc + (intptr_t)disp; + tp->ftt_code = RCOND(instr); + break; + + case OP2_SETHI: + tp->ftt_type = FASTTRAP_T_SETHI; + break; + + case OP2_FBPfcc: + if (COND(instr) == 8) { + tp->ftt_type = FASTTRAP_T_ALWAYS; + } else { + tp->ftt_type = FASTTRAP_T_FCC; + tp->ftt_cc = CC(instr); + tp->ftt_code = COND(instr); + } + + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + + disp = DISP19(instr); + disp <<= 13; + disp >>= 11; + tp->ftt_dest = pc + (intptr_t)disp; + break; + + case OP2_FBfcc: + if (COND(instr) == 8) { + tp->ftt_type = FASTTRAP_T_ALWAYS; + } else { + tp->ftt_type = FASTTRAP_T_FCC; + tp->ftt_cc = 0; + tp->ftt_code = COND(instr); + } + + if (A(instr) != 0) + tp->ftt_flags |= FASTTRAP_F_ANNUL; + + disp = DISP22(instr); + disp <<= 10; + disp >>= 8; + tp->ftt_dest = pc + (intptr_t)disp; + break; + } + + } else if (OP(instr) == 2) { + switch (OP3(instr)) { + case OP3_RETURN: + tp->ftt_type = FASTTRAP_T_RETURN; + break; + + case OP3_JMPL: + tp->ftt_type = FASTTRAP_T_JMPL; + break; + + case OP3_RD: + if (RS1(instr) == 5) + tp->ftt_type = FASTTRAP_T_RDPC; + break; + + case OP3_SAVE: + /* + * We optimize for save instructions at function + * entry; see the comment in fasttrap_pid_probe() + * (near FASTTRAP_T_SAVE) for details. + */ + if (fasttrap_optimize_save != 0 && + type == DTFTP_ENTRY && + I(instr) == 1 && RD(instr) == R_SP) + tp->ftt_type = FASTTRAP_T_SAVE; + break; + + case OP3_RESTORE: + /* + * We optimize restore instructions at function + * return; see the comment in fasttrap_pid_probe() + * (near FASTTRAP_T_RESTORE) for details. + * + * rd must be an %o or %g register. + */ + if ((RD(instr) & 0x10) == 0) + tp->ftt_type = FASTTRAP_T_RESTORE; + break; + + case OP3_OR: + /* + * A large proportion of instructions in the delay + * slot of retl instructions are or's so we emulate + * these downstairs as an optimization. + */ + tp->ftt_type = FASTTRAP_T_OR; + break; + + case OP3_TCC: + /* + * Breakpoint instructions are effectively position- + * dependent since the debugger uses the %pc value + * to lookup which breakpoint was executed. As a + * result, we can't actually instrument breakpoints. + */ + if (SW_TRAP(instr) == ST_BREAKPOINT) + return (-1); + break; + + case 0x19: + case 0x1d: + case 0x29: + case 0x33: + case 0x3f: + /* + * Identify illegal instructions (See SPARC + * Architecture Manual Version 9, E.2 table 32). + */ + return (-1); + } + } else if (OP(instr) == 3) { + uint32_t op3 = OP3(instr); + + /* + * Identify illegal instructions (See SPARC Architecture + * Manual Version 9, E.2 table 33). + */ + if ((op3 & 0x28) == 0x28) { + if (op3 != OP3_PREFETCH && op3 != OP3_CASA && + op3 != OP3_PREFETCHA && op3 != OP3_CASXA) + return (-1); + } else { + if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31) + return (-1); + } + } + + tp->ftt_instr = instr; + + /* + * We don't know how this tracepoint is going to be used, but in case + * it's used as part of a function return probe, we need to indicate + * whether it's always a return site or only potentially a return + * site. If it's part of a return probe, it's always going to be a + * return from that function if it's a restore instruction or if + * the previous instruction was a return. If we could reliably + * distinguish jump tables from return sites, this wouldn't be + * necessary. + */ + if (tp->ftt_type != FASTTRAP_T_RESTORE && + (uread(p, &instr, 4, pc - sizeof (instr)) != 0 || + !(OP(instr) == 2 && OP3(instr) == OP3_RETURN))) + tp->ftt_flags |= FASTTRAP_F_RETMAYBE; + + return (0); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); +} + +/*ARGSUSED*/ +uint64_t +fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, + int aframes) +{ + return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno)); +} + +static uint64_t fasttrap_getreg_fast_cnt; +static uint64_t fasttrap_getreg_mpcb_cnt; +static uint64_t fasttrap_getreg_slow_cnt; + +static ulong_t +fasttrap_getreg(struct regs *rp, uint_t reg) +{ + ulong_t value; + dtrace_icookie_t cookie; + struct machpcb *mpcb; + extern ulong_t dtrace_getreg_win(uint_t, uint_t); + + /* + * We have the %os and %gs in our struct regs, but if we need to + * snag a %l or %i we need to go scrounging around in the process's + * address space. + */ + if (reg == 0) + return (0); + + if (reg < 16) + return ((&rp->r_g1)[reg - 1]); + + /* + * Before we look at the user's stack, we'll check the register + * windows to see if the information we want is in there. + */ + cookie = dtrace_interrupt_disable(); + if (dtrace_getotherwin() > 0) { + value = dtrace_getreg_win(reg, 1); + dtrace_interrupt_enable(cookie); + + atomic_add_64(&fasttrap_getreg_fast_cnt, 1); + + return (value); + } + dtrace_interrupt_enable(cookie); + + /* + * First check the machpcb structure to see if we've already read + * in the register window we're looking for; if we haven't, (and + * we probably haven't) try to copy in the value of the register. + */ + /* LINTED - alignment */ + mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); + + if (get_udatamodel() == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + + if (mpcb->mpcb_wbcnt > 0) { + struct rwindow *rwin = (void *)mpcb->mpcb_wbuf; + int i = mpcb->mpcb_wbcnt; + do { + i--; + if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) + continue; + + atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); + return (rwin[i].rw_local[reg - 16]); + } while (i > 0); + } + + if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0) + goto err; + } else { + struct frame32 *fr = + (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; + uint32_t *v32 = (uint32_t *)&value; + + if (mpcb->mpcb_wbcnt > 0) { + struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf; + int i = mpcb->mpcb_wbcnt; + do { + i--; + if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) + continue; + + atomic_add_64(&fasttrap_getreg_mpcb_cnt, 1); + return (rwin[i].rw_local[reg - 16]); + } while (i > 0); + } + + if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0) + goto err; + + v32[0] = 0; + } + + atomic_add_64(&fasttrap_getreg_slow_cnt, 1); + return (value); + +err: + /* + * If the copy in failed, the process will be in a irrecoverable + * state, and we have no choice but to kill it. + */ + psignal(ttoproc(curthread), SIGILL); + return (0); +} + +static uint64_t fasttrap_putreg_fast_cnt; +static uint64_t fasttrap_putreg_mpcb_cnt; +static uint64_t fasttrap_putreg_slow_cnt; + +static void +fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value) +{ + dtrace_icookie_t cookie; + struct machpcb *mpcb; + extern void dtrace_putreg_win(uint_t, ulong_t); + + if (reg == 0) + return; + + if (reg < 16) { + (&rp->r_g1)[reg - 1] = value; + return; + } + + /* + * If the user process is still using some register windows, we + * can just place the value in the correct window. + */ + cookie = dtrace_interrupt_disable(); + if (dtrace_getotherwin() > 0) { + dtrace_putreg_win(reg, value); + dtrace_interrupt_enable(cookie); + atomic_add_64(&fasttrap_putreg_fast_cnt, 1); + return; + } + dtrace_interrupt_enable(cookie); + + /* + * First see if there's a copy of the register window in the + * machpcb structure that we can modify; if there isn't try to + * copy out the value. If that fails, we try to create a new + * register window in the machpcb structure. While this isn't + * _precisely_ the intended use of the machpcb structure, it + * can't cause any problems since we know at this point in the + * code that all of the user's data have been flushed out of the + * register file (since %otherwin is 0). + */ + /* LINTED - alignment */ + mpcb = (struct machpcb *)((caddr_t)rp - REGOFF); + + if (get_udatamodel() == DATAMODEL_NATIVE) { + struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS); + /* LINTED - alignment */ + struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf; + + if (mpcb->mpcb_wbcnt > 0) { + int i = mpcb->mpcb_wbcnt; + do { + i--; + if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) + continue; + + rwin[i].rw_local[reg - 16] = value; + atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); + return; + } while (i > 0); + } + + if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) { + if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, + &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) + goto err; + + rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value; + mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; + mpcb->mpcb_wbcnt++; + atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); + return; + } + } else { + struct frame32 *fr = + (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp; + /* LINTED - alignment */ + struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf; + uint32_t v32 = (uint32_t)value; + + if (mpcb->mpcb_wbcnt > 0) { + int i = mpcb->mpcb_wbcnt; + do { + i--; + if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp) + continue; + + rwin[i].rw_local[reg - 16] = v32; + atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); + return; + } while (i > 0); + } + + if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) { + if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr, + &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0) + goto err; + + rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32; + mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp; + mpcb->mpcb_wbcnt++; + atomic_add_64(&fasttrap_putreg_mpcb_cnt, 1); + return; + } + } + + atomic_add_64(&fasttrap_putreg_slow_cnt, 1); + return; + +err: + /* + * If we couldn't record this register's value, the process is in an + * irrecoverable state and we have no choice but to euthanize it. + */ + psignal(ttoproc(curthread), SIGILL); +}