diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile index 885a313812..7a9e00cf30 100644 --- a/lib/librte_bpf/Makefile +++ b/lib/librte_bpf/Makefile @@ -28,6 +28,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c ifeq ($(CONFIG_RTE_LIBRTE_BPF_ELF),y) SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load_elf.c endif +ifeq ($(CONFIG_RTE_ARCH_X86_64),y) +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_jit_x86.c +endif # install header files SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += bpf_def.h diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c index d7f68c017b..dc6d109913 100644 --- a/lib/librte_bpf/bpf.c +++ b/lib/librte_bpf/bpf.c @@ -41,7 +41,12 @@ bpf_jit(struct rte_bpf *bpf) { int32_t rc; +#ifdef RTE_ARCH_X86_64 + rc = bpf_jit_x86(bpf); +#else rc = -ENOTSUP; +#endif + if (rc != 0) RTE_BPF_LOG(WARNING, "%s(%p) failed, error code: %d;\n", __func__, bpf, rc); diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c new file mode 100644 index 0000000000..111e028d21 --- /dev/null +++ b/lib/librte_bpf/bpf_jit_x86.c @@ -0,0 +1,1369 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "bpf_impl.h" + +#define GET_BPF_OP(op) (BPF_OP(op) >> 4) + +enum { + RAX = 0, /* scratch, return value */ + RCX = 1, /* scratch, 4th arg */ + RDX = 2, /* scratch, 3rd arg */ + RBX = 3, /* callee saved */ + RSP = 4, /* stack pointer */ + RBP = 5, /* frame pointer, callee saved */ + RSI = 6, /* scratch, 2nd arg */ + RDI = 7, /* scratch, 1st arg */ + R8 = 8, /* scratch, 5th arg */ + R9 = 9, /* scratch, 6th arg */ + R10 = 10, /* scratch */ + R11 = 11, /* scratch */ + R12 = 12, /* callee saved */ + R13 = 13, /* callee saved */ + R14 = 14, /* callee saved */ + R15 = 15, /* callee saved */ +}; + +#define IS_EXT_REG(r) ((r) >= R8) + +enum { + REX_PREFIX = 0x40, /* fixed value 0100 */ + REX_W = 0x8, /* 64bit operand size */ + REX_R = 0x4, /* extension of the ModRM.reg field */ + REX_X = 0x2, /* extension of the SIB.index field */ + REX_B = 0x1, /* extension of the ModRM.rm field */ +}; + +enum { + MOD_INDIRECT = 0, + MOD_IDISP8 = 1, + MOD_IDISP32 = 2, + MOD_DIRECT = 3, +}; + +enum { + SIB_SCALE_1 = 0, + SIB_SCALE_2 = 1, + SIB_SCALE_4 = 2, + SIB_SCALE_8 = 3, +}; + +/* + * eBPF to x86_64 register mappings. + */ +static const uint32_t ebpf2x86[] = { + [EBPF_REG_0] = RAX, + [EBPF_REG_1] = RDI, + [EBPF_REG_2] = RSI, + [EBPF_REG_3] = RDX, + [EBPF_REG_4] = RCX, + [EBPF_REG_5] = R8, + [EBPF_REG_6] = RBX, + [EBPF_REG_7] = R13, + [EBPF_REG_8] = R14, + [EBPF_REG_9] = R15, + [EBPF_REG_10] = RBP, +}; + +/* + * r10 and r11 are used as a scratch temporary registers. + */ +enum { + REG_DIV_IMM = R9, + REG_TMP0 = R11, + REG_TMP1 = R10, +}; + +/* + * callee saved registers list. + * keep RBP as the last one. + */ +static const uint32_t save_regs[] = {RBX, R12, R13, R14, R15, RBP}; + +struct bpf_jit_state { + uint32_t idx; + size_t sz; + struct { + uint32_t num; + int32_t off; + } exit; + uint32_t reguse; + int32_t *off; + uint8_t *ins; +}; + +#define INUSE(v, r) (((v) >> (r)) & 1) +#define USED(v, r) ((v) |= 1 << (r)) + +union bpf_jit_imm { + uint32_t u32; + uint8_t u8[4]; +}; + +static size_t +bpf_size(uint32_t bpf_op_sz) +{ + if (bpf_op_sz == BPF_B) + return sizeof(uint8_t); + else if (bpf_op_sz == BPF_H) + return sizeof(uint16_t); + else if (bpf_op_sz == BPF_W) + return sizeof(uint32_t); + else if (bpf_op_sz == EBPF_DW) + return sizeof(uint64_t); + return 0; +} + +/* + * In many cases for imm8 we can produce shorter code. + */ +static size_t +imm_size(int32_t v) +{ + if (v == (int8_t)v) + return sizeof(int8_t); + return sizeof(int32_t); +} + +static void +emit_bytes(struct bpf_jit_state *st, const uint8_t ins[], uint32_t sz) +{ + uint32_t i; + + if (st->ins != NULL) { + for (i = 0; i != sz; i++) + st->ins[st->sz + i] = ins[i]; + } + st->sz += sz; +} + +static void +emit_imm(struct bpf_jit_state *st, const uint32_t imm, uint32_t sz) +{ + union bpf_jit_imm v; + + v.u32 = imm; + emit_bytes(st, v.u8, sz); +} + +/* + * emit REX byte + */ +static void +emit_rex(struct bpf_jit_state *st, uint32_t op, uint32_t reg, uint32_t rm) +{ + uint8_t rex; + + /* mark operand registers as used*/ + USED(st->reguse, reg); + USED(st->reguse, rm); + + rex = 0; + if (BPF_CLASS(op) == EBPF_ALU64 || + op == (BPF_ST | BPF_MEM | EBPF_DW) || + op == (BPF_STX | BPF_MEM | EBPF_DW) || + op == (BPF_STX | EBPF_XADD | EBPF_DW) || + op == (BPF_LD | BPF_IMM | EBPF_DW) || + (BPF_CLASS(op) == BPF_LDX && + BPF_MODE(op) == BPF_MEM && + BPF_SIZE(op) != BPF_W)) + rex |= REX_W; + + if (IS_EXT_REG(reg)) + rex |= REX_R; + + if (IS_EXT_REG(rm)) + rex |= REX_B; + + /* store using SIL, DIL */ + if (op == (BPF_STX | BPF_MEM | BPF_B) && (reg == RDI || reg == RSI)) + rex |= REX_PREFIX; + + if (rex != 0) { + rex |= REX_PREFIX; + emit_bytes(st, &rex, sizeof(rex)); + } +} + +/* + * emit MODRegRM byte + */ +static void +emit_modregrm(struct bpf_jit_state *st, uint32_t mod, uint32_t reg, uint32_t rm) +{ + uint8_t v; + + v = mod << 6 | (reg & 7) << 3 | (rm & 7); + emit_bytes(st, &v, sizeof(v)); +} + +/* + * emit SIB byte + */ +static void +emit_sib(struct bpf_jit_state *st, uint32_t scale, uint32_t idx, uint32_t base) +{ + uint8_t v; + + v = scale << 6 | (idx & 7) << 3 | (base & 7); + emit_bytes(st, &v, sizeof(v)); +} + +/* + * emit xchg %, % + */ +static void +emit_xchg_reg(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg) +{ + const uint8_t ops = 0x87; + + emit_rex(st, EBPF_ALU64, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); +} + +/* + * emit neg % + */ +static void +emit_neg(struct bpf_jit_state *st, uint32_t op, uint32_t dreg) +{ + const uint8_t ops = 0xF7; + const uint8_t mods = 3; + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); +} + +/* + * emit mov %, % + */ +static void +emit_mov_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + const uint8_t ops = 0x89; + + /* if operands are 32-bit, then it can be used to clear upper 32-bit */ + if (sreg != dreg || BPF_CLASS(op) == BPF_ALU) { + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); + } +} + +/* + * emit movzwl %, % + */ +static void +emit_movzwl(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg) +{ + static const uint8_t ops[] = {0x0F, 0xB7}; + + emit_rex(st, BPF_ALU, sreg, dreg); + emit_bytes(st, ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); +} + +/* + * emit ror , % + */ +static void +emit_ror_imm(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm) +{ + const uint8_t prfx = 0x66; + const uint8_t ops = 0xC1; + const uint8_t mods = 1; + + emit_bytes(st, &prfx, sizeof(prfx)); + emit_rex(st, BPF_ALU, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); + emit_imm(st, imm, imm_size(imm)); +} + +/* + * emit bswap % + */ +static void +emit_be2le_48(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm) +{ + uint32_t rop; + + const uint8_t ops = 0x0F; + const uint8_t mods = 1; + + rop = (imm == 64) ? EBPF_ALU64 : BPF_ALU; + emit_rex(st, rop, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); +} + +static void +emit_be2le(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm) +{ + if (imm == 16) { + emit_ror_imm(st, dreg, 8); + emit_movzwl(st, dreg, dreg); + } else + emit_be2le_48(st, dreg, imm); +} + +/* + * In general it is NOP for x86. + * Just clear the upper bits. + */ +static void +emit_le2be(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm) +{ + if (imm == 16) + emit_movzwl(st, dreg, dreg); + else if (imm == 32) + emit_mov_reg(st, BPF_ALU | EBPF_MOV | BPF_X, dreg, dreg); +} + +/* + * emit one of: + * add , % + * and , % + * or , % + * sub , % + * xor , % + */ +static void +emit_alu_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm) +{ + uint8_t mod, opcode; + uint32_t bop, imsz; + + const uint8_t op8 = 0x83; + const uint8_t op32 = 0x81; + static const uint8_t mods[] = { + [GET_BPF_OP(BPF_ADD)] = 0, + [GET_BPF_OP(BPF_AND)] = 4, + [GET_BPF_OP(BPF_OR)] = 1, + [GET_BPF_OP(BPF_SUB)] = 5, + [GET_BPF_OP(BPF_XOR)] = 6, + }; + + bop = GET_BPF_OP(op); + mod = mods[bop]; + + imsz = imm_size(imm); + opcode = (imsz == 1) ? op8 : op32; + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &opcode, sizeof(opcode)); + emit_modregrm(st, MOD_DIRECT, mod, dreg); + emit_imm(st, imm, imsz); +} + +/* + * emit one of: + * add %, % + * and %, % + * or %, % + * sub %, % + * xor %, % + */ +static void +emit_alu_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + uint32_t bop; + + static const uint8_t ops[] = { + [GET_BPF_OP(BPF_ADD)] = 0x01, + [GET_BPF_OP(BPF_AND)] = 0x21, + [GET_BPF_OP(BPF_OR)] = 0x09, + [GET_BPF_OP(BPF_SUB)] = 0x29, + [GET_BPF_OP(BPF_XOR)] = 0x31, + }; + + bop = GET_BPF_OP(op); + + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops[bop], sizeof(ops[bop])); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); +} + +static void +emit_shift(struct bpf_jit_state *st, uint32_t op, uint32_t dreg) +{ + uint8_t mod; + uint32_t bop, opx; + + static const uint8_t ops[] = {0xC1, 0xD3}; + static const uint8_t mods[] = { + [GET_BPF_OP(BPF_LSH)] = 4, + [GET_BPF_OP(BPF_RSH)] = 5, + [GET_BPF_OP(EBPF_ARSH)] = 7, + }; + + bop = GET_BPF_OP(op); + mod = mods[bop]; + opx = (BPF_SRC(op) == BPF_X); + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops[opx], sizeof(ops[opx])); + emit_modregrm(st, MOD_DIRECT, mod, dreg); +} + +/* + * emit one of: + * shl , % + * shr , % + * sar , % + */ +static void +emit_shift_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, + uint32_t imm) +{ + emit_shift(st, op, dreg); + emit_imm(st, imm, imm_size(imm)); +} + +/* + * emit one of: + * shl % + * shr % + * sar % + * note that rcx is implicitly used as a source register, so few extra + * instructions for register spillage might be necessary. + */ +static void +emit_shift_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + if (sreg != RCX) + emit_xchg_reg(st, RCX, sreg); + + emit_shift(st, op, (dreg == RCX) ? sreg : dreg); + + if (sreg != RCX) + emit_xchg_reg(st, RCX, sreg); +} + +/* + * emit mov , % + */ +static void +emit_mov_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm) +{ + const uint8_t ops = 0xC7; + + if (imm == 0) { + /* replace 'mov 0, %' with 'xor %, %' */ + op = BPF_CLASS(op) | BPF_XOR | BPF_X; + emit_alu_reg(st, op, dreg, dreg); + return; + } + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, 0, dreg); + emit_imm(st, imm, sizeof(imm)); +} + +/* + * emit mov , % + */ +static void +emit_ld_imm64(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm0, + uint32_t imm1) +{ + const uint8_t ops = 0xB8; + + if (imm1 == 0) { + emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, dreg, imm0); + return; + } + + emit_rex(st, EBPF_ALU64, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, 0, dreg); + + emit_imm(st, imm0, sizeof(imm0)); + emit_imm(st, imm1, sizeof(imm1)); +} + +/* + * note that rax:rdx are implicitly used as source/destination registers, + * so some reg spillage is necessary. + * emit: + * mov %rax, %r11 + * mov %rdx, %r10 + * mov %, %rax + * either: + * mov %, %rdx + * OR + * mov , %rdx + * mul %rdx + * mov %r10, %rdx + * mov %rax, % + * mov %r11, %rax + */ +static void +emit_mul(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg, + uint32_t imm) +{ + const uint8_t ops = 0xF7; + const uint8_t mods = 4; + + /* save rax & rdx */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, REG_TMP0); + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, REG_TMP1); + + /* rax = dreg */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, dreg, RAX); + + if (BPF_SRC(op) == BPF_X) + /* rdx = sreg */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, + sreg == RAX ? REG_TMP0 : sreg, RDX); + else + /* rdx = imm */ + emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, RDX, imm); + + emit_rex(st, op, RAX, RDX); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, RDX); + + if (dreg != RDX) + /* restore rdx */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP1, RDX); + + if (dreg != RAX) { + /* dreg = rax */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, dreg); + /* restore rax */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP0, RAX); + } +} + +/* + * emit mov (%), % + * note that for non 64-bit ops, higher bits have to be cleared. + */ +static void +emit_ld_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg, + int32_t ofs) +{ + uint32_t mods, opsz; + const uint8_t op32 = 0x8B; + const uint8_t op16[] = {0x0F, 0xB7}; + const uint8_t op8[] = {0x0F, 0xB6}; + + emit_rex(st, op, dreg, sreg); + + opsz = BPF_SIZE(op); + if (opsz == BPF_B) + emit_bytes(st, op8, sizeof(op8)); + else if (opsz == BPF_H) + emit_bytes(st, op16, sizeof(op16)); + else + emit_bytes(st, &op32, sizeof(op32)); + + mods = (imm_size(ofs) == 1) ? MOD_IDISP8 : MOD_IDISP32; + + emit_modregrm(st, mods, dreg, sreg); + if (sreg == RSP || sreg == R12) + emit_sib(st, SIB_SCALE_1, sreg, sreg); + emit_imm(st, ofs, imm_size(ofs)); +} + +/* + * emit one of: + * mov %, (%) + * mov , (%) + */ +static void +emit_st_common(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg, uint32_t imm, int32_t ofs) +{ + uint32_t mods, imsz, opsz, opx; + const uint8_t prfx16 = 0x66; + + /* 8 bit instruction opcodes */ + static const uint8_t op8[] = {0xC6, 0x88}; + + /* 16/32/64 bit instruction opcodes */ + static const uint8_t ops[] = {0xC7, 0x89}; + + /* is the instruction has immediate value or src reg? */ + opx = (BPF_CLASS(op) == BPF_STX); + + opsz = BPF_SIZE(op); + if (opsz == BPF_H) + emit_bytes(st, &prfx16, sizeof(prfx16)); + + emit_rex(st, op, sreg, dreg); + + if (opsz == BPF_B) + emit_bytes(st, &op8[opx], sizeof(op8[opx])); + else + emit_bytes(st, &ops[opx], sizeof(ops[opx])); + + imsz = imm_size(ofs); + mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32; + + emit_modregrm(st, mods, sreg, dreg); + + if (dreg == RSP || dreg == R12) + emit_sib(st, SIB_SCALE_1, dreg, dreg); + + emit_imm(st, ofs, imsz); + + if (opx == 0) { + imsz = RTE_MIN(bpf_size(opsz), sizeof(imm)); + emit_imm(st, imm, imsz); + } +} + +static void +emit_st_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm, + int32_t ofs) +{ + emit_st_common(st, op, 0, dreg, imm, ofs); +} + +static void +emit_st_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg, + int32_t ofs) +{ + emit_st_common(st, op, sreg, dreg, 0, ofs); +} + +/* + * emit lock add %, (%) + */ +static void +emit_st_xadd(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg, int32_t ofs) +{ + uint32_t imsz, mods; + + const uint8_t lck = 0xF0; /* lock prefix */ + const uint8_t ops = 0x01; /* add opcode */ + + imsz = imm_size(ofs); + mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32; + + emit_bytes(st, &lck, sizeof(lck)); + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, mods, sreg, dreg); + emit_imm(st, ofs, imsz); +} + +/* + * emit: + * mov , (%rax) + * call *%rax + */ +static void +emit_call(struct bpf_jit_state *st, uintptr_t trg) +{ + const uint8_t ops = 0xFF; + const uint8_t mods = 2; + + emit_ld_imm64(st, RAX, trg, trg >> 32); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, RAX); +} + +/* + * emit jmp + * where 'ofs' is the target offset for the native code. + */ +static void +emit_abs_jmp(struct bpf_jit_state *st, int32_t ofs) +{ + int32_t joff; + uint32_t imsz; + + const uint8_t op8 = 0xEB; + const uint8_t op32 = 0xE9; + + const int32_t sz8 = sizeof(op8) + sizeof(uint8_t); + const int32_t sz32 = sizeof(op32) + sizeof(uint32_t); + + /* max possible jmp instruction size */ + const int32_t iszm = RTE_MAX(sz8, sz32); + + joff = ofs - st->sz; + imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm)); + + if (imsz == 1) { + emit_bytes(st, &op8, sizeof(op8)); + joff -= sz8; + } else { + emit_bytes(st, &op32, sizeof(op32)); + joff -= sz32; + } + + emit_imm(st, joff, imsz); +} + +/* + * emit jmp + * where 'ofs' is the target offset for the BPF bytecode. + */ +static void +emit_jmp(struct bpf_jit_state *st, int32_t ofs) +{ + emit_abs_jmp(st, st->off[st->idx + ofs]); +} + +/* + * emit one of: + * cmovz %, <%dreg> + * cmovne %, <%dreg> + * cmova %, <%dreg> + * cmovb %, <%dreg> + * cmovae %, <%dreg> + * cmovbe %, <%dreg> + * cmovg %, <%dreg> + * cmovl %, <%dreg> + * cmovge %, <%dreg> + * cmovle %, <%dreg> + */ +static void +emit_movcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + uint32_t bop; + + static const uint8_t ops[][2] = { + [GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x44}, /* CMOVZ */ + [GET_BPF_OP(EBPF_JNE)] = {0x0F, 0x45}, /* CMOVNE */ + [GET_BPF_OP(BPF_JGT)] = {0x0F, 0x47}, /* CMOVA */ + [GET_BPF_OP(EBPF_JLT)] = {0x0F, 0x42}, /* CMOVB */ + [GET_BPF_OP(BPF_JGE)] = {0x0F, 0x43}, /* CMOVAE */ + [GET_BPF_OP(EBPF_JLE)] = {0x0F, 0x46}, /* CMOVBE */ + [GET_BPF_OP(EBPF_JSGT)] = {0x0F, 0x4F}, /* CMOVG */ + [GET_BPF_OP(EBPF_JSLT)] = {0x0F, 0x4C}, /* CMOVL */ + [GET_BPF_OP(EBPF_JSGE)] = {0x0F, 0x4D}, /* CMOVGE */ + [GET_BPF_OP(EBPF_JSLE)] = {0x0F, 0x4E}, /* CMOVLE */ + [GET_BPF_OP(BPF_JSET)] = {0x0F, 0x45}, /* CMOVNE */ + }; + + bop = GET_BPF_OP(op); + + emit_rex(st, op, dreg, sreg); + emit_bytes(st, ops[bop], sizeof(ops[bop])); + emit_modregrm(st, MOD_DIRECT, dreg, sreg); +} + +/* + * emit one of: + * je + * jne + * ja + * jb + * jae + * jbe + * jg + * jl + * jge + * jle + * where 'ofs' is the target offset for the native code. + */ +static void +emit_abs_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs) +{ + uint32_t bop, imsz; + int32_t joff; + + static const uint8_t op8[] = { + [GET_BPF_OP(BPF_JEQ)] = 0x74, /* JE */ + [GET_BPF_OP(EBPF_JNE)] = 0x75, /* JNE */ + [GET_BPF_OP(BPF_JGT)] = 0x77, /* JA */ + [GET_BPF_OP(EBPF_JLT)] = 0x72, /* JB */ + [GET_BPF_OP(BPF_JGE)] = 0x73, /* JAE */ + [GET_BPF_OP(EBPF_JLE)] = 0x76, /* JBE */ + [GET_BPF_OP(EBPF_JSGT)] = 0x7F, /* JG */ + [GET_BPF_OP(EBPF_JSLT)] = 0x7C, /* JL */ + [GET_BPF_OP(EBPF_JSGE)] = 0x7D, /*JGE */ + [GET_BPF_OP(EBPF_JSLE)] = 0x7E, /* JLE */ + [GET_BPF_OP(BPF_JSET)] = 0x75, /*JNE */ + }; + + static const uint8_t op32[][2] = { + [GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x84}, /* JE */ + [GET_BPF_OP(EBPF_JNE)] = {0x0F, 0x85}, /* JNE */ + [GET_BPF_OP(BPF_JGT)] = {0x0F, 0x87}, /* JA */ + [GET_BPF_OP(EBPF_JLT)] = {0x0F, 0x82}, /* JB */ + [GET_BPF_OP(BPF_JGE)] = {0x0F, 0x83}, /* JAE */ + [GET_BPF_OP(EBPF_JLE)] = {0x0F, 0x86}, /* JBE */ + [GET_BPF_OP(EBPF_JSGT)] = {0x0F, 0x8F}, /* JG */ + [GET_BPF_OP(EBPF_JSLT)] = {0x0F, 0x8C}, /* JL */ + [GET_BPF_OP(EBPF_JSGE)] = {0x0F, 0x8D}, /*JGE */ + [GET_BPF_OP(EBPF_JSLE)] = {0x0F, 0x8E}, /* JLE */ + [GET_BPF_OP(BPF_JSET)] = {0x0F, 0x85}, /*JNE */ + }; + + const int32_t sz8 = sizeof(op8[0]) + sizeof(uint8_t); + const int32_t sz32 = sizeof(op32[0]) + sizeof(uint32_t); + + /* max possible jcc instruction size */ + const int32_t iszm = RTE_MAX(sz8, sz32); + + joff = ofs - st->sz; + imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm)); + + bop = GET_BPF_OP(op); + + if (imsz == 1) { + emit_bytes(st, &op8[bop], sizeof(op8[bop])); + joff -= sz8; + } else { + emit_bytes(st, op32[bop], sizeof(op32[bop])); + joff -= sz32; + } + + emit_imm(st, joff, imsz); +} + +/* + * emit one of: + * je + * jne + * ja + * jb + * jae + * jbe + * jg + * jl + * jge + * jle + * where 'ofs' is the target offset for the BPF bytecode. + */ +static void +emit_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs) +{ + emit_abs_jcc(st, op, st->off[st->idx + ofs]); +} + + +/* + * emit cmp , % + */ +static void +emit_cmp_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm) +{ + uint8_t ops; + uint32_t imsz; + + const uint8_t op8 = 0x83; + const uint8_t op32 = 0x81; + const uint8_t mods = 7; + + imsz = imm_size(imm); + ops = (imsz == 1) ? op8 : op32; + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); + emit_imm(st, imm, imsz); +} + +/* + * emit test , % + */ +static void +emit_tst_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm) +{ + const uint8_t ops = 0xF7; + const uint8_t mods = 0; + + emit_rex(st, op, 0, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, dreg); + emit_imm(st, imm, imm_size(imm)); +} + +static void +emit_jcc_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, + uint32_t imm, int32_t ofs) +{ + if (BPF_OP(op) == BPF_JSET) + emit_tst_imm(st, EBPF_ALU64, dreg, imm); + else + emit_cmp_imm(st, EBPF_ALU64, dreg, imm); + + emit_jcc(st, op, ofs); +} + +/* + * emit test %, % + */ +static void +emit_tst_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + const uint8_t ops = 0x85; + + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); +} + +/* + * emit cmp %, % + */ +static void +emit_cmp_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg) +{ + const uint8_t ops = 0x39; + + emit_rex(st, op, sreg, dreg); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, sreg, dreg); + +} + +static void +emit_jcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, + uint32_t dreg, int32_t ofs) +{ + if (BPF_OP(op) == BPF_JSET) + emit_tst_reg(st, EBPF_ALU64, sreg, dreg); + else + emit_cmp_reg(st, EBPF_ALU64, sreg, dreg); + + emit_jcc(st, op, ofs); +} + +/* + * note that rax:rdx are implicitly used as source/destination registers, + * so some reg spillage is necessary. + * emit: + * mov %rax, %r11 + * mov %rdx, %r10 + * mov %, %rax + * xor %rdx, %rdx + * for divisor as immediate value: + * mov , %r9 + * div % + * mov %r10, %rdx + * mov %rax, % + * mov %r11, %rax + * either: + * mov %rax, % + * OR + * mov %rdx, % + * mov %r11, %rax + * mov %r10, %rdx + */ +static void +emit_div(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg, + uint32_t imm) +{ + uint32_t sr; + + const uint8_t ops = 0xF7; + const uint8_t mods = 6; + + if (BPF_SRC(op) == BPF_X) { + + /* check that src divisor is not zero */ + emit_tst_reg(st, BPF_CLASS(op), sreg, sreg); + + /* exit with return value zero */ + emit_movcc_reg(st, BPF_CLASS(op) | BPF_JEQ | BPF_X, sreg, RAX); + emit_abs_jcc(st, BPF_JMP | BPF_JEQ | BPF_K, st->exit.off); + } + + /* save rax & rdx */ + if (dreg != RAX) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, REG_TMP0); + if (dreg != RDX) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, REG_TMP1); + + /* fill rax & rdx */ + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, dreg, RAX); + emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, RDX, 0); + + if (BPF_SRC(op) == BPF_X) { + sr = sreg; + if (sr == RAX) + sr = REG_TMP0; + else if (sr == RDX) + sr = REG_TMP1; + } else { + sr = REG_DIV_IMM; + emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, sr, imm); + } + + emit_rex(st, op, 0, sr); + emit_bytes(st, &ops, sizeof(ops)); + emit_modregrm(st, MOD_DIRECT, mods, sr); + + if (BPF_OP(op) == BPF_DIV) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, dreg); + else + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, dreg); + + if (dreg != RAX) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP0, RAX); + if (dreg != RDX) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP1, RDX); +} + +static void +emit_prolog(struct bpf_jit_state *st, int32_t stack_size) +{ + uint32_t i; + int32_t spil, ofs; + + spil = 0; + for (i = 0; i != RTE_DIM(save_regs); i++) + spil += INUSE(st->reguse, save_regs[i]); + + /* we can avoid touching the stack at all */ + if (spil == 0) + return; + + + emit_alu_imm(st, EBPF_ALU64 | BPF_SUB | BPF_K, RSP, + spil * sizeof(uint64_t)); + + ofs = 0; + for (i = 0; i != RTE_DIM(save_regs); i++) { + if (INUSE(st->reguse, save_regs[i]) != 0) { + emit_st_reg(st, BPF_STX | BPF_MEM | EBPF_DW, + save_regs[i], RSP, ofs); + ofs += sizeof(uint64_t); + } + } + + if (INUSE(st->reguse, RBP) != 0) { + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RSP, RBP); + emit_alu_imm(st, EBPF_ALU64 | BPF_SUB | BPF_K, RSP, stack_size); + } +} + +/* + * emit ret + */ +static void +emit_ret(struct bpf_jit_state *st) +{ + const uint8_t ops = 0xC3; + + emit_bytes(st, &ops, sizeof(ops)); +} + +static void +emit_epilog(struct bpf_jit_state *st) +{ + uint32_t i; + int32_t spil, ofs; + + /* if we allready have an epilog generate a jump to it */ + if (st->exit.num++ != 0) { + emit_abs_jmp(st, st->exit.off); + return; + } + + /* store offset of epilog block */ + st->exit.off = st->sz; + + spil = 0; + for (i = 0; i != RTE_DIM(save_regs); i++) + spil += INUSE(st->reguse, save_regs[i]); + + if (spil != 0) { + + if (INUSE(st->reguse, RBP) != 0) + emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, + RBP, RSP); + + ofs = 0; + for (i = 0; i != RTE_DIM(save_regs); i++) { + if (INUSE(st->reguse, save_regs[i]) != 0) { + emit_ld_reg(st, BPF_LDX | BPF_MEM | EBPF_DW, + RSP, save_regs[i], ofs); + ofs += sizeof(uint64_t); + } + } + + emit_alu_imm(st, EBPF_ALU64 | BPF_ADD | BPF_K, RSP, + spil * sizeof(uint64_t)); + } + + emit_ret(st); +} + +/* + * walk through bpf code and translate them x86_64 one. + */ +static int +emit(struct bpf_jit_state *st, const struct rte_bpf *bpf) +{ + uint32_t i, dr, op, sr; + const struct ebpf_insn *ins; + + /* reset state fields */ + st->sz = 0; + st->exit.num = 0; + + emit_prolog(st, bpf->stack_sz); + + for (i = 0; i != bpf->prm.nb_ins; i++) { + + st->idx = i; + st->off[i] = st->sz; + + ins = bpf->prm.ins + i; + + dr = ebpf2x86[ins->dst_reg]; + sr = ebpf2x86[ins->src_reg]; + op = ins->code; + + switch (op) { + /* 32 bit ALU IMM operations */ + case (BPF_ALU | BPF_ADD | BPF_K): + case (BPF_ALU | BPF_SUB | BPF_K): + case (BPF_ALU | BPF_AND | BPF_K): + case (BPF_ALU | BPF_OR | BPF_K): + case (BPF_ALU | BPF_XOR | BPF_K): + emit_alu_imm(st, op, dr, ins->imm); + break; + case (BPF_ALU | BPF_LSH | BPF_K): + case (BPF_ALU | BPF_RSH | BPF_K): + emit_shift_imm(st, op, dr, ins->imm); + break; + case (BPF_ALU | EBPF_MOV | BPF_K): + emit_mov_imm(st, op, dr, ins->imm); + break; + /* 32 bit ALU REG operations */ + case (BPF_ALU | BPF_ADD | BPF_X): + case (BPF_ALU | BPF_SUB | BPF_X): + case (BPF_ALU | BPF_AND | BPF_X): + case (BPF_ALU | BPF_OR | BPF_X): + case (BPF_ALU | BPF_XOR | BPF_X): + emit_alu_reg(st, op, sr, dr); + break; + case (BPF_ALU | BPF_LSH | BPF_X): + case (BPF_ALU | BPF_RSH | BPF_X): + emit_shift_reg(st, op, sr, dr); + break; + case (BPF_ALU | EBPF_MOV | BPF_X): + emit_mov_reg(st, op, sr, dr); + break; + case (BPF_ALU | BPF_NEG): + emit_neg(st, op, dr); + break; + case (BPF_ALU | EBPF_END | EBPF_TO_BE): + emit_be2le(st, dr, ins->imm); + break; + case (BPF_ALU | EBPF_END | EBPF_TO_LE): + emit_le2be(st, dr, ins->imm); + break; + /* 64 bit ALU IMM operations */ + case (EBPF_ALU64 | BPF_ADD | BPF_K): + case (EBPF_ALU64 | BPF_SUB | BPF_K): + case (EBPF_ALU64 | BPF_AND | BPF_K): + case (EBPF_ALU64 | BPF_OR | BPF_K): + case (EBPF_ALU64 | BPF_XOR | BPF_K): + emit_alu_imm(st, op, dr, ins->imm); + break; + case (EBPF_ALU64 | BPF_LSH | BPF_K): + case (EBPF_ALU64 | BPF_RSH | BPF_K): + case (EBPF_ALU64 | EBPF_ARSH | BPF_K): + emit_shift_imm(st, op, dr, ins->imm); + break; + case (EBPF_ALU64 | EBPF_MOV | BPF_K): + emit_mov_imm(st, op, dr, ins->imm); + break; + /* 64 bit ALU REG operations */ + case (EBPF_ALU64 | BPF_ADD | BPF_X): + case (EBPF_ALU64 | BPF_SUB | BPF_X): + case (EBPF_ALU64 | BPF_AND | BPF_X): + case (EBPF_ALU64 | BPF_OR | BPF_X): + case (EBPF_ALU64 | BPF_XOR | BPF_X): + emit_alu_reg(st, op, sr, dr); + break; + case (EBPF_ALU64 | BPF_LSH | BPF_X): + case (EBPF_ALU64 | BPF_RSH | BPF_X): + case (EBPF_ALU64 | EBPF_ARSH | BPF_X): + emit_shift_reg(st, op, sr, dr); + break; + case (EBPF_ALU64 | EBPF_MOV | BPF_X): + emit_mov_reg(st, op, sr, dr); + break; + case (EBPF_ALU64 | BPF_NEG): + emit_neg(st, op, dr); + break; + /* multiply instructions */ + case (BPF_ALU | BPF_MUL | BPF_K): + case (BPF_ALU | BPF_MUL | BPF_X): + case (EBPF_ALU64 | BPF_MUL | BPF_K): + case (EBPF_ALU64 | BPF_MUL | BPF_X): + emit_mul(st, op, sr, dr, ins->imm); + break; + /* divide instructions */ + case (BPF_ALU | BPF_DIV | BPF_K): + case (BPF_ALU | BPF_MOD | BPF_K): + case (BPF_ALU | BPF_DIV | BPF_X): + case (BPF_ALU | BPF_MOD | BPF_X): + case (EBPF_ALU64 | BPF_DIV | BPF_K): + case (EBPF_ALU64 | BPF_MOD | BPF_K): + case (EBPF_ALU64 | BPF_DIV | BPF_X): + case (EBPF_ALU64 | BPF_MOD | BPF_X): + emit_div(st, op, sr, dr, ins->imm); + break; + /* load instructions */ + case (BPF_LDX | BPF_MEM | BPF_B): + case (BPF_LDX | BPF_MEM | BPF_H): + case (BPF_LDX | BPF_MEM | BPF_W): + case (BPF_LDX | BPF_MEM | EBPF_DW): + emit_ld_reg(st, op, sr, dr, ins->off); + break; + /* load 64 bit immediate value */ + case (BPF_LD | BPF_IMM | EBPF_DW): + emit_ld_imm64(st, dr, ins[0].imm, ins[1].imm); + i++; + break; + /* store instructions */ + case (BPF_STX | BPF_MEM | BPF_B): + case (BPF_STX | BPF_MEM | BPF_H): + case (BPF_STX | BPF_MEM | BPF_W): + case (BPF_STX | BPF_MEM | EBPF_DW): + emit_st_reg(st, op, sr, dr, ins->off); + break; + case (BPF_ST | BPF_MEM | BPF_B): + case (BPF_ST | BPF_MEM | BPF_H): + case (BPF_ST | BPF_MEM | BPF_W): + case (BPF_ST | BPF_MEM | EBPF_DW): + emit_st_imm(st, op, dr, ins->imm, ins->off); + break; + /* atomic add instructions */ + case (BPF_STX | EBPF_XADD | BPF_W): + case (BPF_STX | EBPF_XADD | EBPF_DW): + emit_st_xadd(st, op, sr, dr, ins->off); + break; + /* jump instructions */ + case (BPF_JMP | BPF_JA): + emit_jmp(st, ins->off + 1); + break; + /* jump IMM instructions */ + case (BPF_JMP | BPF_JEQ | BPF_K): + case (BPF_JMP | EBPF_JNE | BPF_K): + case (BPF_JMP | BPF_JGT | BPF_K): + case (BPF_JMP | EBPF_JLT | BPF_K): + case (BPF_JMP | BPF_JGE | BPF_K): + case (BPF_JMP | EBPF_JLE | BPF_K): + case (BPF_JMP | EBPF_JSGT | BPF_K): + case (BPF_JMP | EBPF_JSLT | BPF_K): + case (BPF_JMP | EBPF_JSGE | BPF_K): + case (BPF_JMP | EBPF_JSLE | BPF_K): + case (BPF_JMP | BPF_JSET | BPF_K): + emit_jcc_imm(st, op, dr, ins->imm, ins->off + 1); + break; + /* jump REG instructions */ + case (BPF_JMP | BPF_JEQ | BPF_X): + case (BPF_JMP | EBPF_JNE | BPF_X): + case (BPF_JMP | BPF_JGT | BPF_X): + case (BPF_JMP | EBPF_JLT | BPF_X): + case (BPF_JMP | BPF_JGE | BPF_X): + case (BPF_JMP | EBPF_JLE | BPF_X): + case (BPF_JMP | EBPF_JSGT | BPF_X): + case (BPF_JMP | EBPF_JSLT | BPF_X): + case (BPF_JMP | EBPF_JSGE | BPF_X): + case (BPF_JMP | EBPF_JSLE | BPF_X): + case (BPF_JMP | BPF_JSET | BPF_X): + emit_jcc_reg(st, op, sr, dr, ins->off + 1); + break; + /* call instructions */ + case (BPF_JMP | EBPF_CALL): + emit_call(st, (uintptr_t)bpf->prm.xsym[ins->imm].func); + break; + /* return instruction */ + case (BPF_JMP | EBPF_EXIT): + emit_epilog(st); + break; + default: + RTE_BPF_LOG(ERR, + "%s(%p): invalid opcode %#x at pc: %u;\n", + __func__, bpf, ins->code, i); + return -EINVAL; + } + } + + return 0; +} + +/* + * produce a native ISA version of the given BPF code. + */ +int +bpf_jit_x86(struct rte_bpf *bpf) +{ + int32_t rc; + uint32_t i; + size_t sz; + struct bpf_jit_state st; + + /* init state */ + memset(&st, 0, sizeof(st)); + st.off = malloc(bpf->prm.nb_ins * sizeof(st.off[0])); + if (st.off == NULL) + return -ENOMEM; + + /* fill with fake offsets */ + st.exit.off = INT32_MAX; + for (i = 0; i != bpf->prm.nb_ins; i++) + st.off[i] = INT32_MAX; + + /* + * dry runs, used to calculate total code size and valid jump offsets. + * stop when we get minimal possible size + */ + do { + sz = st.sz; + rc = emit(&st, bpf); + } while (rc == 0 && sz != st.sz); + + if (rc == 0) { + + /* allocate memory needed */ + st.ins = mmap(NULL, st.sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (st.ins == MAP_FAILED) + rc = -ENOMEM; + else + /* generate code */ + rc = emit(&st, bpf); + } + + if (rc == 0 && mprotect(st.ins, st.sz, PROT_READ | PROT_EXEC) != 0) + rc = -ENOMEM; + + if (rc != 0) + munmap(st.ins, st.sz); + else { + bpf->jit.func = (void *)st.ins; + bpf->jit.sz = st.sz; + } + + free(st.off); + return rc; +} diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build index a6a9229bdf..668c89184e 100644 --- a/lib/librte_bpf/meson.build +++ b/lib/librte_bpf/meson.build @@ -7,6 +7,10 @@ sources = files('bpf.c', 'bpf_load.c', 'bpf_validate.c') +if arch_subdir == 'x86' + sources += files('bpf_jit_x86.c') +endif + install_headers = files('bpf_def.h', 'rte_bpf.h')