aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_bpf
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2018-08-14 18:52:30 +0100
committerLuca Boccassi <luca.boccassi@gmail.com>2018-08-14 18:53:17 +0100
commitb63264c8342e6a1b6971c79550d2af2024b6a4de (patch)
tree83114aac64286fe616506c0b3dfaec2ab86ef835 /lib/librte_bpf
parentca33590b6af032bff57d9cc70455660466a654b2 (diff)
New upstream version 18.08upstream/18.08
Change-Id: I32fdf5e5016556d9c0a6d88ddaf1fc468961790a Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib/librte_bpf')
-rw-r--r--lib/librte_bpf/Makefile41
-rw-r--r--lib/librte_bpf/bpf.c61
-rw-r--r--lib/librte_bpf/bpf_def.h143
-rw-r--r--lib/librte_bpf/bpf_exec.c453
-rw-r--r--lib/librte_bpf/bpf_impl.h55
-rw-r--r--lib/librte_bpf/bpf_jit_x86.c1356
-rw-r--r--lib/librte_bpf/bpf_load.c148
-rw-r--r--lib/librte_bpf/bpf_load_elf.c322
-rw-r--r--lib/librte_bpf/bpf_pkt.c605
-rw-r--r--lib/librte_bpf/bpf_validate.c2248
-rw-r--r--lib/librte_bpf/meson.build25
-rw-r--r--lib/librte_bpf/rte_bpf.h203
-rw-r--r--lib/librte_bpf/rte_bpf_ethdev.h117
-rw-r--r--lib/librte_bpf/rte_bpf_version.map16
14 files changed, 5793 insertions, 0 deletions
diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile
new file mode 100644
index 00000000..c0e8aaa6
--- /dev/null
+++ b/lib/librte_bpf/Makefile
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_bpf.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+LDLIBS += -lrte_net -lrte_eal
+LDLIBS += -lrte_mempool -lrte_ring
+LDLIBS += -lrte_mbuf -lrte_ethdev
+ifeq ($(CONFIG_RTE_LIBRTE_BPF_ELF),y)
+LDLIBS += -lelf
+endif
+
+EXPORT_MAP := rte_bpf_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_pkt.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
+ifeq ($(CONFIG_RTE_LIBRTE_BPF_ELF),y)
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load_elf.c
+endif
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_jit_x86.c
+endif
+
+# install header files
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += bpf_def.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf_ethdev.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c
new file mode 100644
index 00000000..f590c8c3
--- /dev/null
+++ b/lib/librte_bpf/bpf.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#include "bpf_impl.h"
+
+int rte_bpf_logtype;
+
+__rte_experimental void
+rte_bpf_destroy(struct rte_bpf *bpf)
+{
+ if (bpf != NULL) {
+ if (bpf->jit.func != NULL)
+ munmap(bpf->jit.func, bpf->jit.sz);
+ munmap(bpf, bpf->sz);
+ }
+}
+
+__rte_experimental int
+rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit)
+{
+ if (bpf == NULL || jit == NULL)
+ return -EINVAL;
+
+ jit[0] = bpf->jit;
+ return 0;
+}
+
+int
+bpf_jit(struct rte_bpf *bpf)
+{
+ int32_t rc;
+
+#ifdef RTE_ARCH_X86_64
+ rc = bpf_jit_x86(bpf);
+#else
+ rc = -ENOTSUP;
+#endif
+
+ if (rc != 0)
+ RTE_BPF_LOG(WARNING, "%s(%p) failed, error code: %d;\n",
+ __func__, bpf, rc);
+ return rc;
+}
+
+RTE_INIT(rte_bpf_init_log)
+{
+ rte_bpf_logtype = rte_log_register("lib.bpf");
+ if (rte_bpf_logtype >= 0)
+ rte_log_set_level(rte_bpf_logtype, RTE_LOG_INFO);
+}
diff --git a/lib/librte_bpf/bpf_def.h b/lib/librte_bpf/bpf_def.h
new file mode 100644
index 00000000..c10f3aec
--- /dev/null
+++ b/lib/librte_bpf/bpf_def.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 1982, 1986, 1990, 1993
+ * The Regents of the University of California.
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef _RTE_BPF_DEF_H_
+#define _RTE_BPF_DEF_H_
+
+/**
+ * @file
+ *
+ * classic BPF (cBPF) and extended BPF (eBPF) related defines.
+ * For more information regarding cBPF and eBPF ISA and their differences,
+ * please refer to:
+ * https://www.kernel.org/doc/Documentation/networking/filter.txt.
+ * As a rule of thumb for that file:
+ * all definitions used by both cBPF and eBPF start with bpf(BPF)_ prefix,
+ * while eBPF only ones start with ebpf(EBPF)) prefix.
+ */
+
+#include <stdint.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * The instruction encodings.
+ */
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+#define EBPF_ALU64 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00
+#define BPF_H 0x08
+#define BPF_B 0x10
+#define EBPF_DW 0x18
+
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+#define EBPF_XADD 0xc0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define EBPF_MOV 0xb0
+#define EBPF_ARSH 0xc0
+#define EBPF_END 0xd0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+
+#define EBPF_JNE 0x50
+#define EBPF_JSGT 0x60
+#define EBPF_JSGE 0x70
+#define EBPF_CALL 0x80
+#define EBPF_EXIT 0x90
+#define EBPF_JLT 0xa0
+#define EBPF_JLE 0xb0
+#define EBPF_JSLT 0xc0
+#define EBPF_JSLE 0xd0
+
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+/* if BPF_OP(code) == EBPF_END */
+#define EBPF_TO_LE 0x00 /* convert to little-endian */
+#define EBPF_TO_BE 0x08 /* convert to big-endian */
+
+/*
+ * eBPF registers
+ */
+enum {
+ EBPF_REG_0, /* return value from internal function/for eBPF program */
+ EBPF_REG_1, /* 0-th argument to internal function */
+ EBPF_REG_2, /* 1-th argument to internal function */
+ EBPF_REG_3, /* 2-th argument to internal function */
+ EBPF_REG_4, /* 3-th argument to internal function */
+ EBPF_REG_5, /* 4-th argument to internal function */
+ EBPF_REG_6, /* callee saved register */
+ EBPF_REG_7, /* callee saved register */
+ EBPF_REG_8, /* callee saved register */
+ EBPF_REG_9, /* callee saved register */
+ EBPF_REG_10, /* stack pointer (read-only) */
+ EBPF_REG_NUM,
+};
+
+/*
+ * eBPF instruction format
+ */
+struct ebpf_insn {
+ uint8_t code;
+ uint8_t dst_reg:4;
+ uint8_t src_reg:4;
+ int16_t off;
+ int32_t imm;
+};
+
+/*
+ * eBPF allows functions with R1-R5 as arguments.
+ */
+#define EBPF_FUNC_MAX_ARGS (EBPF_REG_6 - EBPF_REG_1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_BPF_DEF_H_ */
diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c
new file mode 100644
index 00000000..6a79139c
--- /dev/null
+++ b/lib/librte_bpf/bpf_exec.c
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+#define BPF_JMP_UNC(ins) ((ins) += (ins)->off)
+
+#define BPF_JMP_CND_REG(reg, ins, op, type) \
+ ((ins) += \
+ ((type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) ? \
+ (ins)->off : 0)
+
+#define BPF_JMP_CND_IMM(reg, ins, op, type) \
+ ((ins) += \
+ ((type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) ? \
+ (ins)->off : 0)
+
+#define BPF_NEG_ALU(reg, ins, type) \
+ ((reg)[(ins)->dst_reg] = (type)(-(reg)[(ins)->dst_reg]))
+
+#define EBPF_MOV_ALU_REG(reg, ins, type) \
+ ((reg)[(ins)->dst_reg] = (type)(reg)[(ins)->src_reg])
+
+#define BPF_OP_ALU_REG(reg, ins, op, type) \
+ ((reg)[(ins)->dst_reg] = \
+ (type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg])
+
+#define EBPF_MOV_ALU_IMM(reg, ins, type) \
+ ((reg)[(ins)->dst_reg] = (type)(ins)->imm)
+
+#define BPF_OP_ALU_IMM(reg, ins, op, type) \
+ ((reg)[(ins)->dst_reg] = \
+ (type)(reg)[(ins)->dst_reg] op (type)(ins)->imm)
+
+#define BPF_DIV_ZERO_CHECK(bpf, reg, ins, type) do { \
+ if ((type)(reg)[(ins)->src_reg] == 0) { \
+ RTE_BPF_LOG(ERR, \
+ "%s(%p): division by 0 at pc: %#zx;\n", \
+ __func__, bpf, \
+ (uintptr_t)(ins) - (uintptr_t)(bpf)->prm.ins); \
+ return 0; \
+ } \
+} while (0)
+
+#define BPF_LD_REG(reg, ins, type) \
+ ((reg)[(ins)->dst_reg] = \
+ *(type *)(uintptr_t)((reg)[(ins)->src_reg] + (ins)->off))
+
+#define BPF_ST_IMM(reg, ins, type) \
+ (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+ (type)(ins)->imm)
+
+#define BPF_ST_REG(reg, ins, type) \
+ (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+ (type)(reg)[(ins)->src_reg])
+
+#define BPF_ST_XADD_REG(reg, ins, tp) \
+ (rte_atomic##tp##_add((rte_atomic##tp##_t *) \
+ (uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off), \
+ reg[ins->src_reg]))
+
+static inline void
+bpf_alu_be(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins)
+{
+ uint64_t *v;
+
+ v = reg + ins->dst_reg;
+ switch (ins->imm) {
+ case 16:
+ *v = rte_cpu_to_be_16(*v);
+ break;
+ case 32:
+ *v = rte_cpu_to_be_32(*v);
+ break;
+ case 64:
+ *v = rte_cpu_to_be_64(*v);
+ break;
+ }
+}
+
+static inline void
+bpf_alu_le(uint64_t reg[EBPF_REG_NUM], const struct ebpf_insn *ins)
+{
+ uint64_t *v;
+
+ v = reg + ins->dst_reg;
+ switch (ins->imm) {
+ case 16:
+ *v = rte_cpu_to_le_16(*v);
+ break;
+ case 32:
+ *v = rte_cpu_to_le_32(*v);
+ break;
+ case 64:
+ *v = rte_cpu_to_le_64(*v);
+ break;
+ }
+}
+
+static inline uint64_t
+bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
+{
+ const struct ebpf_insn *ins;
+
+ for (ins = bpf->prm.ins; ; ins++) {
+ switch (ins->code) {
+ /* 32 bit ALU IMM operations */
+ case (BPF_ALU | BPF_ADD | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, +, uint32_t);
+ break;
+ case (BPF_ALU | BPF_SUB | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, -, uint32_t);
+ break;
+ case (BPF_ALU | BPF_AND | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, &, uint32_t);
+ break;
+ case (BPF_ALU | BPF_OR | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, |, uint32_t);
+ break;
+ case (BPF_ALU | BPF_LSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, <<, uint32_t);
+ break;
+ case (BPF_ALU | BPF_RSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, >>, uint32_t);
+ break;
+ case (BPF_ALU | BPF_XOR | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, ^, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MUL | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, *, uint32_t);
+ break;
+ case (BPF_ALU | BPF_DIV | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, /, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MOD | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, %, uint32_t);
+ break;
+ case (BPF_ALU | EBPF_MOV | BPF_K):
+ EBPF_MOV_ALU_IMM(reg, ins, uint32_t);
+ break;
+ /* 32 bit ALU REG operations */
+ case (BPF_ALU | BPF_ADD | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, +, uint32_t);
+ break;
+ case (BPF_ALU | BPF_SUB | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, -, uint32_t);
+ break;
+ case (BPF_ALU | BPF_AND | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, &, uint32_t);
+ break;
+ case (BPF_ALU | BPF_OR | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, |, uint32_t);
+ break;
+ case (BPF_ALU | BPF_LSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, <<, uint32_t);
+ break;
+ case (BPF_ALU | BPF_RSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, >>, uint32_t);
+ break;
+ case (BPF_ALU | BPF_XOR | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, ^, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MUL | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, *, uint32_t);
+ break;
+ case (BPF_ALU | BPF_DIV | BPF_X):
+ BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+ BPF_OP_ALU_REG(reg, ins, /, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MOD | BPF_X):
+ BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+ BPF_OP_ALU_REG(reg, ins, %, uint32_t);
+ break;
+ case (BPF_ALU | EBPF_MOV | BPF_X):
+ EBPF_MOV_ALU_REG(reg, ins, uint32_t);
+ break;
+ case (BPF_ALU | BPF_NEG):
+ BPF_NEG_ALU(reg, ins, uint32_t);
+ break;
+ case (BPF_ALU | EBPF_END | EBPF_TO_BE):
+ bpf_alu_be(reg, ins);
+ break;
+ case (BPF_ALU | EBPF_END | EBPF_TO_LE):
+ bpf_alu_le(reg, ins);
+ break;
+ /* 64 bit ALU IMM operations */
+ case (EBPF_ALU64 | BPF_ADD | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, +, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_SUB | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, -, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_AND | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, &, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_OR | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, |, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_LSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, <<, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_RSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, >>, uint64_t);
+ break;
+ case (EBPF_ALU64 | EBPF_ARSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, >>, int64_t);
+ break;
+ case (EBPF_ALU64 | BPF_XOR | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, ^, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_MUL | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, *, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_DIV | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, /, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_MOD | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, %, uint64_t);
+ break;
+ case (EBPF_ALU64 | EBPF_MOV | BPF_K):
+ EBPF_MOV_ALU_IMM(reg, ins, uint64_t);
+ break;
+ /* 64 bit ALU REG operations */
+ case (EBPF_ALU64 | BPF_ADD | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, +, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_SUB | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, -, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_AND | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, &, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_OR | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, |, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_LSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, <<, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_RSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, >>, uint64_t);
+ break;
+ case (EBPF_ALU64 | EBPF_ARSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, >>, int64_t);
+ break;
+ case (EBPF_ALU64 | BPF_XOR | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, ^, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_MUL | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, *, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_DIV | BPF_X):
+ BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+ BPF_OP_ALU_REG(reg, ins, /, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_MOD | BPF_X):
+ BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+ BPF_OP_ALU_REG(reg, ins, %, uint64_t);
+ break;
+ case (EBPF_ALU64 | EBPF_MOV | BPF_X):
+ EBPF_MOV_ALU_REG(reg, ins, uint64_t);
+ break;
+ case (EBPF_ALU64 | BPF_NEG):
+ BPF_NEG_ALU(reg, ins, uint64_t);
+ break;
+ /* load instructions */
+ case (BPF_LDX | BPF_MEM | BPF_B):
+ BPF_LD_REG(reg, ins, uint8_t);
+ break;
+ case (BPF_LDX | BPF_MEM | BPF_H):
+ BPF_LD_REG(reg, ins, uint16_t);
+ break;
+ case (BPF_LDX | BPF_MEM | BPF_W):
+ BPF_LD_REG(reg, ins, uint32_t);
+ break;
+ case (BPF_LDX | BPF_MEM | EBPF_DW):
+ BPF_LD_REG(reg, ins, uint64_t);
+ break;
+ /* load 64 bit immediate value */
+ case (BPF_LD | BPF_IMM | EBPF_DW):
+ reg[ins->dst_reg] = (uint32_t)ins[0].imm |
+ (uint64_t)(uint32_t)ins[1].imm << 32;
+ ins++;
+ break;
+ /* store instructions */
+ case (BPF_STX | BPF_MEM | BPF_B):
+ BPF_ST_REG(reg, ins, uint8_t);
+ break;
+ case (BPF_STX | BPF_MEM | BPF_H):
+ BPF_ST_REG(reg, ins, uint16_t);
+ break;
+ case (BPF_STX | BPF_MEM | BPF_W):
+ BPF_ST_REG(reg, ins, uint32_t);
+ break;
+ case (BPF_STX | BPF_MEM | EBPF_DW):
+ BPF_ST_REG(reg, ins, uint64_t);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_B):
+ BPF_ST_IMM(reg, ins, uint8_t);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_H):
+ BPF_ST_IMM(reg, ins, uint16_t);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_W):
+ BPF_ST_IMM(reg, ins, uint32_t);
+ break;
+ case (BPF_ST | BPF_MEM | EBPF_DW):
+ BPF_ST_IMM(reg, ins, uint64_t);
+ break;
+ /* atomic add instructions */
+ case (BPF_STX | EBPF_XADD | BPF_W):
+ BPF_ST_XADD_REG(reg, ins, 32);
+ break;
+ case (BPF_STX | EBPF_XADD | EBPF_DW):
+ BPF_ST_XADD_REG(reg, ins, 64);
+ break;
+ /* jump instructions */
+ case (BPF_JMP | BPF_JA):
+ BPF_JMP_UNC(ins);
+ break;
+ /* jump IMM instructions */
+ case (BPF_JMP | BPF_JEQ | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, ==, uint64_t);
+ break;
+ case (BPF_JMP | EBPF_JNE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, !=, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JGT | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, >, uint64_t);
+ break;
+ case (BPF_JMP | EBPF_JLT | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, <, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JGE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, >=, uint64_t);
+ break;
+ case (BPF_JMP | EBPF_JLE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, <=, uint64_t);
+ break;
+ case (BPF_JMP | EBPF_JSGT | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, >, int64_t);
+ break;
+ case (BPF_JMP | EBPF_JSLT | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, <, int64_t);
+ break;
+ case (BPF_JMP | EBPF_JSGE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, >=, int64_t);
+ break;
+ case (BPF_JMP | EBPF_JSLE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, <=, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSET | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, &, uint64_t);
+ break;
+ /* jump REG instructions */
+ case (BPF_JMP | BPF_JEQ | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, ==, uint64_t);
+ break;
+ case (BPF_JMP | EBPF_JNE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, !=, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JGT | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, >, uint64_t);
+ break;
+ case (BPF_JMP | EBPF_JLT | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, <, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JGE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, >=, uint64_t);
+ break;
+ case (BPF_JMP | EBPF_JLE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, <=, uint64_t);
+ break;
+ case (BPF_JMP | EBPF_JSGT | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, >, int64_t);
+ break;
+ case (BPF_JMP | EBPF_JSLT | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, <, int64_t);
+ break;
+ case (BPF_JMP | EBPF_JSGE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, >=, int64_t);
+ break;
+ case (BPF_JMP | EBPF_JSLE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, <=, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSET | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, &, uint64_t);
+ break;
+ /* call instructions */
+ case (BPF_JMP | EBPF_CALL):
+ reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func.val(
+ reg[EBPF_REG_1], reg[EBPF_REG_2],
+ reg[EBPF_REG_3], reg[EBPF_REG_4],
+ reg[EBPF_REG_5]);
+ break;
+ /* return instruction */
+ case (BPF_JMP | EBPF_EXIT):
+ return reg[EBPF_REG_0];
+ default:
+ RTE_BPF_LOG(ERR,
+ "%s(%p): invalid opcode %#x at pc: %#zx;\n",
+ __func__, bpf, ins->code,
+ (uintptr_t)ins - (uintptr_t)bpf->prm.ins);
+ return 0;
+ }
+ }
+
+ /* should never be reached */
+ RTE_VERIFY(0);
+ return 0;
+}
+
+__rte_experimental uint32_t
+rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[],
+ uint32_t num)
+{
+ uint32_t i;
+ uint64_t reg[EBPF_REG_NUM];
+ uint64_t stack[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+
+ for (i = 0; i != num; i++) {
+
+ reg[EBPF_REG_1] = (uintptr_t)ctx[i];
+ reg[EBPF_REG_10] = (uintptr_t)(stack + RTE_DIM(stack));
+
+ rc[i] = bpf_exec(bpf, reg);
+ }
+
+ return i;
+}
+
+__rte_experimental uint64_t
+rte_bpf_exec(const struct rte_bpf *bpf, void *ctx)
+{
+ uint64_t rc;
+
+ rte_bpf_exec_burst(bpf, &ctx, &rc, 1);
+ return rc;
+}
diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h
new file mode 100644
index 00000000..b577e2cb
--- /dev/null
+++ b/lib/librte_bpf/bpf_impl.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _BPF_H_
+#define _BPF_H_
+
+#include <rte_bpf.h>
+#include <sys/mman.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_BPF_STACK_SIZE 0x200
+
+struct rte_bpf {
+ struct rte_bpf_prm prm;
+ struct rte_bpf_jit jit;
+ size_t sz;
+ uint32_t stack_sz;
+};
+
+extern int bpf_validate(struct rte_bpf *bpf);
+
+extern int bpf_jit(struct rte_bpf *bpf);
+
+#ifdef RTE_ARCH_X86_64
+extern int bpf_jit_x86(struct rte_bpf *);
+#endif
+
+extern int rte_bpf_logtype;
+
+#define RTE_BPF_LOG(lvl, fmt, args...) \
+ rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args)
+
+static inline size_t
+bpf_size(uint32_t bpf_op_sz)
+{
+ if (bpf_op_sz == BPF_B)
+ return sizeof(uint8_t);
+ else if (bpf_op_sz == BPF_H)
+ return sizeof(uint16_t);
+ else if (bpf_op_sz == BPF_W)
+ return sizeof(uint32_t);
+ else if (bpf_op_sz == EBPF_DW)
+ return sizeof(uint64_t);
+ return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BPF_H_ */
diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c
new file mode 100644
index 00000000..68ea389f
--- /dev/null
+++ b/lib/librte_bpf/bpf_jit_x86.c
@@ -0,0 +1,1356 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+#define GET_BPF_OP(op) (BPF_OP(op) >> 4)
+
+enum {
+ RAX = 0, /* scratch, return value */
+ RCX = 1, /* scratch, 4th arg */
+ RDX = 2, /* scratch, 3rd arg */
+ RBX = 3, /* callee saved */
+ RSP = 4, /* stack pointer */
+ RBP = 5, /* frame pointer, callee saved */
+ RSI = 6, /* scratch, 2nd arg */
+ RDI = 7, /* scratch, 1st arg */
+ R8 = 8, /* scratch, 5th arg */
+ R9 = 9, /* scratch, 6th arg */
+ R10 = 10, /* scratch */
+ R11 = 11, /* scratch */
+ R12 = 12, /* callee saved */
+ R13 = 13, /* callee saved */
+ R14 = 14, /* callee saved */
+ R15 = 15, /* callee saved */
+};
+
+#define IS_EXT_REG(r) ((r) >= R8)
+
+enum {
+ REX_PREFIX = 0x40, /* fixed value 0100 */
+ REX_W = 0x8, /* 64bit operand size */
+ REX_R = 0x4, /* extension of the ModRM.reg field */
+ REX_X = 0x2, /* extension of the SIB.index field */
+ REX_B = 0x1, /* extension of the ModRM.rm field */
+};
+
+enum {
+ MOD_INDIRECT = 0,
+ MOD_IDISP8 = 1,
+ MOD_IDISP32 = 2,
+ MOD_DIRECT = 3,
+};
+
+enum {
+ SIB_SCALE_1 = 0,
+ SIB_SCALE_2 = 1,
+ SIB_SCALE_4 = 2,
+ SIB_SCALE_8 = 3,
+};
+
+/*
+ * eBPF to x86_64 register mappings.
+ */
+static const uint32_t ebpf2x86[] = {
+ [EBPF_REG_0] = RAX,
+ [EBPF_REG_1] = RDI,
+ [EBPF_REG_2] = RSI,
+ [EBPF_REG_3] = RDX,
+ [EBPF_REG_4] = RCX,
+ [EBPF_REG_5] = R8,
+ [EBPF_REG_6] = RBX,
+ [EBPF_REG_7] = R13,
+ [EBPF_REG_8] = R14,
+ [EBPF_REG_9] = R15,
+ [EBPF_REG_10] = RBP,
+};
+
+/*
+ * r10 and r11 are used as a scratch temporary registers.
+ */
+enum {
+ REG_DIV_IMM = R9,
+ REG_TMP0 = R11,
+ REG_TMP1 = R10,
+};
+
+/*
+ * callee saved registers list.
+ * keep RBP as the last one.
+ */
+static const uint32_t save_regs[] = {RBX, R12, R13, R14, R15, RBP};
+
+struct bpf_jit_state {
+ uint32_t idx;
+ size_t sz;
+ struct {
+ uint32_t num;
+ int32_t off;
+ } exit;
+ uint32_t reguse;
+ int32_t *off;
+ uint8_t *ins;
+};
+
+#define INUSE(v, r) (((v) >> (r)) & 1)
+#define USED(v, r) ((v) |= 1 << (r))
+
+union bpf_jit_imm {
+ uint32_t u32;
+ uint8_t u8[4];
+};
+
+/*
+ * In many cases for imm8 we can produce shorter code.
+ */
+static size_t
+imm_size(int32_t v)
+{
+ if (v == (int8_t)v)
+ return sizeof(int8_t);
+ return sizeof(int32_t);
+}
+
+static void
+emit_bytes(struct bpf_jit_state *st, const uint8_t ins[], uint32_t sz)
+{
+ uint32_t i;
+
+ if (st->ins != NULL) {
+ for (i = 0; i != sz; i++)
+ st->ins[st->sz + i] = ins[i];
+ }
+ st->sz += sz;
+}
+
+static void
+emit_imm(struct bpf_jit_state *st, const uint32_t imm, uint32_t sz)
+{
+ union bpf_jit_imm v;
+
+ v.u32 = imm;
+ emit_bytes(st, v.u8, sz);
+}
+
+/*
+ * emit REX byte
+ */
+static void
+emit_rex(struct bpf_jit_state *st, uint32_t op, uint32_t reg, uint32_t rm)
+{
+ uint8_t rex;
+
+ /* mark operand registers as used*/
+ USED(st->reguse, reg);
+ USED(st->reguse, rm);
+
+ rex = 0;
+ if (BPF_CLASS(op) == EBPF_ALU64 ||
+ op == (BPF_ST | BPF_MEM | EBPF_DW) ||
+ op == (BPF_STX | BPF_MEM | EBPF_DW) ||
+ op == (BPF_STX | EBPF_XADD | EBPF_DW) ||
+ op == (BPF_LD | BPF_IMM | EBPF_DW) ||
+ (BPF_CLASS(op) == BPF_LDX &&
+ BPF_MODE(op) == BPF_MEM &&
+ BPF_SIZE(op) != BPF_W))
+ rex |= REX_W;
+
+ if (IS_EXT_REG(reg))
+ rex |= REX_R;
+
+ if (IS_EXT_REG(rm))
+ rex |= REX_B;
+
+ /* store using SIL, DIL */
+ if (op == (BPF_STX | BPF_MEM | BPF_B) && (reg == RDI || reg == RSI))
+ rex |= REX_PREFIX;
+
+ if (rex != 0) {
+ rex |= REX_PREFIX;
+ emit_bytes(st, &rex, sizeof(rex));
+ }
+}
+
+/*
+ * emit MODRegRM byte
+ */
+static void
+emit_modregrm(struct bpf_jit_state *st, uint32_t mod, uint32_t reg, uint32_t rm)
+{
+ uint8_t v;
+
+ v = mod << 6 | (reg & 7) << 3 | (rm & 7);
+ emit_bytes(st, &v, sizeof(v));
+}
+
+/*
+ * emit SIB byte
+ */
+static void
+emit_sib(struct bpf_jit_state *st, uint32_t scale, uint32_t idx, uint32_t base)
+{
+ uint8_t v;
+
+ v = scale << 6 | (idx & 7) << 3 | (base & 7);
+ emit_bytes(st, &v, sizeof(v));
+}
+
+/*
+ * emit xchg %<sreg>, %<dreg>
+ */
+static void
+emit_xchg_reg(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg)
+{
+ const uint8_t ops = 0x87;
+
+ emit_rex(st, EBPF_ALU64, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+/*
+ * emit neg %<dreg>
+ */
+static void
+emit_neg(struct bpf_jit_state *st, uint32_t op, uint32_t dreg)
+{
+ const uint8_t ops = 0xF7;
+ const uint8_t mods = 3;
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+}
+
+/*
+ * emit mov %<sreg>, %<dreg>
+ */
+static void
+emit_mov_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ const uint8_t ops = 0x89;
+
+ /* if operands are 32-bit, then it can be used to clear upper 32-bit */
+ if (sreg != dreg || BPF_CLASS(op) == BPF_ALU) {
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+ }
+}
+
+/*
+ * emit movzwl %<sreg>, %<dreg>
+ */
+static void
+emit_movzwl(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg)
+{
+ static const uint8_t ops[] = {0x0F, 0xB7};
+
+ emit_rex(st, BPF_ALU, sreg, dreg);
+ emit_bytes(st, ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+/*
+ * emit ror <imm8>, %<dreg>
+ */
+static void
+emit_ror_imm(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+ const uint8_t prfx = 0x66;
+ const uint8_t ops = 0xC1;
+ const uint8_t mods = 1;
+
+ emit_bytes(st, &prfx, sizeof(prfx));
+ emit_rex(st, BPF_ALU, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+ emit_imm(st, imm, imm_size(imm));
+}
+
+/*
+ * emit bswap %<dreg>
+ */
+static void
+emit_be2le_48(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+ uint32_t rop;
+
+ const uint8_t ops = 0x0F;
+ const uint8_t mods = 1;
+
+ rop = (imm == 64) ? EBPF_ALU64 : BPF_ALU;
+ emit_rex(st, rop, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+}
+
+static void
+emit_be2le(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+ if (imm == 16) {
+ emit_ror_imm(st, dreg, 8);
+ emit_movzwl(st, dreg, dreg);
+ } else
+ emit_be2le_48(st, dreg, imm);
+}
+
+/*
+ * In general it is NOP for x86.
+ * Just clear the upper bits.
+ */
+static void
+emit_le2be(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+ if (imm == 16)
+ emit_movzwl(st, dreg, dreg);
+ else if (imm == 32)
+ emit_mov_reg(st, BPF_ALU | EBPF_MOV | BPF_X, dreg, dreg);
+}
+
+/*
+ * emit one of:
+ * add <imm>, %<dreg>
+ * and <imm>, %<dreg>
+ * or <imm>, %<dreg>
+ * sub <imm>, %<dreg>
+ * xor <imm>, %<dreg>
+ */
+static void
+emit_alu_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+ uint8_t mod, opcode;
+ uint32_t bop, imsz;
+
+ const uint8_t op8 = 0x83;
+ const uint8_t op32 = 0x81;
+ static const uint8_t mods[] = {
+ [GET_BPF_OP(BPF_ADD)] = 0,
+ [GET_BPF_OP(BPF_AND)] = 4,
+ [GET_BPF_OP(BPF_OR)] = 1,
+ [GET_BPF_OP(BPF_SUB)] = 5,
+ [GET_BPF_OP(BPF_XOR)] = 6,
+ };
+
+ bop = GET_BPF_OP(op);
+ mod = mods[bop];
+
+ imsz = imm_size(imm);
+ opcode = (imsz == 1) ? op8 : op32;
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &opcode, sizeof(opcode));
+ emit_modregrm(st, MOD_DIRECT, mod, dreg);
+ emit_imm(st, imm, imsz);
+}
+
+/*
+ * emit one of:
+ * add %<sreg>, %<dreg>
+ * and %<sreg>, %<dreg>
+ * or %<sreg>, %<dreg>
+ * sub %<sreg>, %<dreg>
+ * xor %<sreg>, %<dreg>
+ */
+static void
+emit_alu_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ uint32_t bop;
+
+ static const uint8_t ops[] = {
+ [GET_BPF_OP(BPF_ADD)] = 0x01,
+ [GET_BPF_OP(BPF_AND)] = 0x21,
+ [GET_BPF_OP(BPF_OR)] = 0x09,
+ [GET_BPF_OP(BPF_SUB)] = 0x29,
+ [GET_BPF_OP(BPF_XOR)] = 0x31,
+ };
+
+ bop = GET_BPF_OP(op);
+
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops[bop], sizeof(ops[bop]));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+static void
+emit_shift(struct bpf_jit_state *st, uint32_t op, uint32_t dreg)
+{
+ uint8_t mod;
+ uint32_t bop, opx;
+
+ static const uint8_t ops[] = {0xC1, 0xD3};
+ static const uint8_t mods[] = {
+ [GET_BPF_OP(BPF_LSH)] = 4,
+ [GET_BPF_OP(BPF_RSH)] = 5,
+ [GET_BPF_OP(EBPF_ARSH)] = 7,
+ };
+
+ bop = GET_BPF_OP(op);
+ mod = mods[bop];
+ opx = (BPF_SRC(op) == BPF_X);
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops[opx], sizeof(ops[opx]));
+ emit_modregrm(st, MOD_DIRECT, mod, dreg);
+}
+
+/*
+ * emit one of:
+ * shl <imm>, %<dreg>
+ * shr <imm>, %<dreg>
+ * sar <imm>, %<dreg>
+ */
+static void
+emit_shift_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg,
+ uint32_t imm)
+{
+ emit_shift(st, op, dreg);
+ emit_imm(st, imm, imm_size(imm));
+}
+
+/*
+ * emit one of:
+ * shl %<dreg>
+ * shr %<dreg>
+ * sar %<dreg>
+ * note that rcx is implicitly used as a source register, so few extra
+ * instructions for register spillage might be necessary.
+ */
+static void
+emit_shift_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ if (sreg != RCX)
+ emit_xchg_reg(st, RCX, sreg);
+
+ emit_shift(st, op, (dreg == RCX) ? sreg : dreg);
+
+ if (sreg != RCX)
+ emit_xchg_reg(st, RCX, sreg);
+}
+
+/*
+ * emit mov <imm>, %<dreg>
+ */
+static void
+emit_mov_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+ const uint8_t ops = 0xC7;
+
+ if (imm == 0) {
+ /* replace 'mov 0, %<dst>' with 'xor %<dst>, %<dst>' */
+ op = BPF_CLASS(op) | BPF_XOR | BPF_X;
+ emit_alu_reg(st, op, dreg, dreg);
+ return;
+ }
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, 0, dreg);
+ emit_imm(st, imm, sizeof(imm));
+}
+
+/*
+ * emit mov <imm64>, %<dreg>
+ */
+static void
+emit_ld_imm64(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm0,
+ uint32_t imm1)
+{
+ const uint8_t ops = 0xB8;
+
+ if (imm1 == 0) {
+ emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, dreg, imm0);
+ return;
+ }
+
+ emit_rex(st, EBPF_ALU64, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, 0, dreg);
+
+ emit_imm(st, imm0, sizeof(imm0));
+ emit_imm(st, imm1, sizeof(imm1));
+}
+
+/*
+ * note that rax:rdx are implicitly used as source/destination registers,
+ * so some reg spillage is necessary.
+ * emit:
+ * mov %rax, %r11
+ * mov %rdx, %r10
+ * mov %<dreg>, %rax
+ * either:
+ * mov %<sreg>, %rdx
+ * OR
+ * mov <imm>, %rdx
+ * mul %rdx
+ * mov %r10, %rdx
+ * mov %rax, %<dreg>
+ * mov %r11, %rax
+ */
+static void
+emit_mul(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+ uint32_t imm)
+{
+ const uint8_t ops = 0xF7;
+ const uint8_t mods = 4;
+
+ /* save rax & rdx */
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, REG_TMP0);
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, REG_TMP1);
+
+ /* rax = dreg */
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, dreg, RAX);
+
+ if (BPF_SRC(op) == BPF_X)
+ /* rdx = sreg */
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X,
+ sreg == RAX ? REG_TMP0 : sreg, RDX);
+ else
+ /* rdx = imm */
+ emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, RDX, imm);
+
+ emit_rex(st, op, RAX, RDX);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, RDX);
+
+ if (dreg != RDX)
+ /* restore rdx */
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP1, RDX);
+
+ if (dreg != RAX) {
+ /* dreg = rax */
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, dreg);
+ /* restore rax */
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP0, RAX);
+ }
+}
+
+/*
+ * emit mov <ofs>(%<sreg>), %<dreg>
+ * note that for non 64-bit ops, higher bits have to be cleared.
+ */
+static void
+emit_ld_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+ int32_t ofs)
+{
+ uint32_t mods, opsz;
+ const uint8_t op32 = 0x8B;
+ const uint8_t op16[] = {0x0F, 0xB7};
+ const uint8_t op8[] = {0x0F, 0xB6};
+
+ emit_rex(st, op, dreg, sreg);
+
+ opsz = BPF_SIZE(op);
+ if (opsz == BPF_B)
+ emit_bytes(st, op8, sizeof(op8));
+ else if (opsz == BPF_H)
+ emit_bytes(st, op16, sizeof(op16));
+ else
+ emit_bytes(st, &op32, sizeof(op32));
+
+ mods = (imm_size(ofs) == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+ emit_modregrm(st, mods, dreg, sreg);
+ if (sreg == RSP || sreg == R12)
+ emit_sib(st, SIB_SCALE_1, sreg, sreg);
+ emit_imm(st, ofs, imm_size(ofs));
+}
+
+/*
+ * emit one of:
+ * mov %<sreg>, <ofs>(%<dreg>)
+ * mov <imm>, <ofs>(%<dreg>)
+ */
+static void
+emit_st_common(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg, uint32_t imm, int32_t ofs)
+{
+ uint32_t mods, imsz, opsz, opx;
+ const uint8_t prfx16 = 0x66;
+
+ /* 8 bit instruction opcodes */
+ static const uint8_t op8[] = {0xC6, 0x88};
+
+ /* 16/32/64 bit instruction opcodes */
+ static const uint8_t ops[] = {0xC7, 0x89};
+
+ /* is the instruction has immediate value or src reg? */
+ opx = (BPF_CLASS(op) == BPF_STX);
+
+ opsz = BPF_SIZE(op);
+ if (opsz == BPF_H)
+ emit_bytes(st, &prfx16, sizeof(prfx16));
+
+ emit_rex(st, op, sreg, dreg);
+
+ if (opsz == BPF_B)
+ emit_bytes(st, &op8[opx], sizeof(op8[opx]));
+ else
+ emit_bytes(st, &ops[opx], sizeof(ops[opx]));
+
+ imsz = imm_size(ofs);
+ mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+ emit_modregrm(st, mods, sreg, dreg);
+
+ if (dreg == RSP || dreg == R12)
+ emit_sib(st, SIB_SCALE_1, dreg, dreg);
+
+ emit_imm(st, ofs, imsz);
+
+ if (opx == 0) {
+ imsz = RTE_MIN(bpf_size(opsz), sizeof(imm));
+ emit_imm(st, imm, imsz);
+ }
+}
+
+static void
+emit_st_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm,
+ int32_t ofs)
+{
+ emit_st_common(st, op, 0, dreg, imm, ofs);
+}
+
+static void
+emit_st_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+ int32_t ofs)
+{
+ emit_st_common(st, op, sreg, dreg, 0, ofs);
+}
+
+/*
+ * emit lock add %<sreg>, <ofs>(%<dreg>)
+ */
+static void
+emit_st_xadd(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg, int32_t ofs)
+{
+ uint32_t imsz, mods;
+
+ const uint8_t lck = 0xF0; /* lock prefix */
+ const uint8_t ops = 0x01; /* add opcode */
+
+ imsz = imm_size(ofs);
+ mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+ emit_bytes(st, &lck, sizeof(lck));
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, mods, sreg, dreg);
+ emit_imm(st, ofs, imsz);
+}
+
+/*
+ * emit:
+ * mov <imm64>, (%rax)
+ * call *%rax
+ */
+static void
+emit_call(struct bpf_jit_state *st, uintptr_t trg)
+{
+ const uint8_t ops = 0xFF;
+ const uint8_t mods = 2;
+
+ emit_ld_imm64(st, RAX, trg, trg >> 32);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, RAX);
+}
+
+/*
+ * emit jmp <ofs>
+ * where 'ofs' is the target offset for the native code.
+ */
+static void
+emit_abs_jmp(struct bpf_jit_state *st, int32_t ofs)
+{
+ int32_t joff;
+ uint32_t imsz;
+
+ const uint8_t op8 = 0xEB;
+ const uint8_t op32 = 0xE9;
+
+ const int32_t sz8 = sizeof(op8) + sizeof(uint8_t);
+ const int32_t sz32 = sizeof(op32) + sizeof(uint32_t);
+
+ /* max possible jmp instruction size */
+ const int32_t iszm = RTE_MAX(sz8, sz32);
+
+ joff = ofs - st->sz;
+ imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm));
+
+ if (imsz == 1) {
+ emit_bytes(st, &op8, sizeof(op8));
+ joff -= sz8;
+ } else {
+ emit_bytes(st, &op32, sizeof(op32));
+ joff -= sz32;
+ }
+
+ emit_imm(st, joff, imsz);
+}
+
+/*
+ * emit jmp <ofs>
+ * where 'ofs' is the target offset for the BPF bytecode.
+ */
+static void
+emit_jmp(struct bpf_jit_state *st, int32_t ofs)
+{
+ emit_abs_jmp(st, st->off[st->idx + ofs]);
+}
+
+/*
+ * emit one of:
+ * cmovz %<sreg>, <%dreg>
+ * cmovne %<sreg>, <%dreg>
+ * cmova %<sreg>, <%dreg>
+ * cmovb %<sreg>, <%dreg>
+ * cmovae %<sreg>, <%dreg>
+ * cmovbe %<sreg>, <%dreg>
+ * cmovg %<sreg>, <%dreg>
+ * cmovl %<sreg>, <%dreg>
+ * cmovge %<sreg>, <%dreg>
+ * cmovle %<sreg>, <%dreg>
+ */
+static void
+emit_movcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ uint32_t bop;
+
+ static const uint8_t ops[][2] = {
+ [GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x44}, /* CMOVZ */
+ [GET_BPF_OP(EBPF_JNE)] = {0x0F, 0x45}, /* CMOVNE */
+ [GET_BPF_OP(BPF_JGT)] = {0x0F, 0x47}, /* CMOVA */
+ [GET_BPF_OP(EBPF_JLT)] = {0x0F, 0x42}, /* CMOVB */
+ [GET_BPF_OP(BPF_JGE)] = {0x0F, 0x43}, /* CMOVAE */
+ [GET_BPF_OP(EBPF_JLE)] = {0x0F, 0x46}, /* CMOVBE */
+ [GET_BPF_OP(EBPF_JSGT)] = {0x0F, 0x4F}, /* CMOVG */
+ [GET_BPF_OP(EBPF_JSLT)] = {0x0F, 0x4C}, /* CMOVL */
+ [GET_BPF_OP(EBPF_JSGE)] = {0x0F, 0x4D}, /* CMOVGE */
+ [GET_BPF_OP(EBPF_JSLE)] = {0x0F, 0x4E}, /* CMOVLE */
+ [GET_BPF_OP(BPF_JSET)] = {0x0F, 0x45}, /* CMOVNE */
+ };
+
+ bop = GET_BPF_OP(op);
+
+ emit_rex(st, op, dreg, sreg);
+ emit_bytes(st, ops[bop], sizeof(ops[bop]));
+ emit_modregrm(st, MOD_DIRECT, dreg, sreg);
+}
+
+/*
+ * emit one of:
+ * je <ofs>
+ * jne <ofs>
+ * ja <ofs>
+ * jb <ofs>
+ * jae <ofs>
+ * jbe <ofs>
+ * jg <ofs>
+ * jl <ofs>
+ * jge <ofs>
+ * jle <ofs>
+ * where 'ofs' is the target offset for the native code.
+ */
+static void
+emit_abs_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs)
+{
+ uint32_t bop, imsz;
+ int32_t joff;
+
+ static const uint8_t op8[] = {
+ [GET_BPF_OP(BPF_JEQ)] = 0x74, /* JE */
+ [GET_BPF_OP(EBPF_JNE)] = 0x75, /* JNE */
+ [GET_BPF_OP(BPF_JGT)] = 0x77, /* JA */
+ [GET_BPF_OP(EBPF_JLT)] = 0x72, /* JB */
+ [GET_BPF_OP(BPF_JGE)] = 0x73, /* JAE */
+ [GET_BPF_OP(EBPF_JLE)] = 0x76, /* JBE */
+ [GET_BPF_OP(EBPF_JSGT)] = 0x7F, /* JG */
+ [GET_BPF_OP(EBPF_JSLT)] = 0x7C, /* JL */
+ [GET_BPF_OP(EBPF_JSGE)] = 0x7D, /*JGE */
+ [GET_BPF_OP(EBPF_JSLE)] = 0x7E, /* JLE */
+ [GET_BPF_OP(BPF_JSET)] = 0x75, /*JNE */
+ };
+
+ static const uint8_t op32[][2] = {
+ [GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x84}, /* JE */
+ [GET_BPF_OP(EBPF_JNE)] = {0x0F, 0x85}, /* JNE */
+ [GET_BPF_OP(BPF_JGT)] = {0x0F, 0x87}, /* JA */
+ [GET_BPF_OP(EBPF_JLT)] = {0x0F, 0x82}, /* JB */
+ [GET_BPF_OP(BPF_JGE)] = {0x0F, 0x83}, /* JAE */
+ [GET_BPF_OP(EBPF_JLE)] = {0x0F, 0x86}, /* JBE */
+ [GET_BPF_OP(EBPF_JSGT)] = {0x0F, 0x8F}, /* JG */
+ [GET_BPF_OP(EBPF_JSLT)] = {0x0F, 0x8C}, /* JL */
+ [GET_BPF_OP(EBPF_JSGE)] = {0x0F, 0x8D}, /*JGE */
+ [GET_BPF_OP(EBPF_JSLE)] = {0x0F, 0x8E}, /* JLE */
+ [GET_BPF_OP(BPF_JSET)] = {0x0F, 0x85}, /*JNE */
+ };
+
+ const int32_t sz8 = sizeof(op8[0]) + sizeof(uint8_t);
+ const int32_t sz32 = sizeof(op32[0]) + sizeof(uint32_t);
+
+ /* max possible jcc instruction size */
+ const int32_t iszm = RTE_MAX(sz8, sz32);
+
+ joff = ofs - st->sz;
+ imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm));
+
+ bop = GET_BPF_OP(op);
+
+ if (imsz == 1) {
+ emit_bytes(st, &op8[bop], sizeof(op8[bop]));
+ joff -= sz8;
+ } else {
+ emit_bytes(st, op32[bop], sizeof(op32[bop]));
+ joff -= sz32;
+ }
+
+ emit_imm(st, joff, imsz);
+}
+
+/*
+ * emit one of:
+ * je <ofs>
+ * jne <ofs>
+ * ja <ofs>
+ * jb <ofs>
+ * jae <ofs>
+ * jbe <ofs>
+ * jg <ofs>
+ * jl <ofs>
+ * jge <ofs>
+ * jle <ofs>
+ * where 'ofs' is the target offset for the BPF bytecode.
+ */
+static void
+emit_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs)
+{
+ emit_abs_jcc(st, op, st->off[st->idx + ofs]);
+}
+
+
+/*
+ * emit cmp <imm>, %<dreg>
+ */
+static void
+emit_cmp_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+ uint8_t ops;
+ uint32_t imsz;
+
+ const uint8_t op8 = 0x83;
+ const uint8_t op32 = 0x81;
+ const uint8_t mods = 7;
+
+ imsz = imm_size(imm);
+ ops = (imsz == 1) ? op8 : op32;
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+ emit_imm(st, imm, imsz);
+}
+
+/*
+ * emit test <imm>, %<dreg>
+ */
+static void
+emit_tst_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+ const uint8_t ops = 0xF7;
+ const uint8_t mods = 0;
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+ emit_imm(st, imm, imm_size(imm));
+}
+
+static void
+emit_jcc_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg,
+ uint32_t imm, int32_t ofs)
+{
+ if (BPF_OP(op) == BPF_JSET)
+ emit_tst_imm(st, EBPF_ALU64, dreg, imm);
+ else
+ emit_cmp_imm(st, EBPF_ALU64, dreg, imm);
+
+ emit_jcc(st, op, ofs);
+}
+
+/*
+ * emit test %<sreg>, %<dreg>
+ */
+static void
+emit_tst_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ const uint8_t ops = 0x85;
+
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+/*
+ * emit cmp %<sreg>, %<dreg>
+ */
+static void
+emit_cmp_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ const uint8_t ops = 0x39;
+
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+
+}
+
+static void
+emit_jcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg, int32_t ofs)
+{
+ if (BPF_OP(op) == BPF_JSET)
+ emit_tst_reg(st, EBPF_ALU64, sreg, dreg);
+ else
+ emit_cmp_reg(st, EBPF_ALU64, sreg, dreg);
+
+ emit_jcc(st, op, ofs);
+}
+
+/*
+ * note that rax:rdx are implicitly used as source/destination registers,
+ * so some reg spillage is necessary.
+ * emit:
+ * mov %rax, %r11
+ * mov %rdx, %r10
+ * mov %<dreg>, %rax
+ * xor %rdx, %rdx
+ * for divisor as immediate value:
+ * mov <imm>, %r9
+ * div %<divisor_reg>
+ * mov %r10, %rdx
+ * mov %rax, %<dreg>
+ * mov %r11, %rax
+ * either:
+ * mov %rax, %<dreg>
+ * OR
+ * mov %rdx, %<dreg>
+ * mov %r11, %rax
+ * mov %r10, %rdx
+ */
+static void
+emit_div(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+ uint32_t imm)
+{
+ uint32_t sr;
+
+ const uint8_t ops = 0xF7;
+ const uint8_t mods = 6;
+
+ if (BPF_SRC(op) == BPF_X) {
+
+ /* check that src divisor is not zero */
+ emit_tst_reg(st, BPF_CLASS(op), sreg, sreg);
+
+ /* exit with return value zero */
+ emit_movcc_reg(st, BPF_CLASS(op) | BPF_JEQ | BPF_X, sreg, RAX);
+ emit_abs_jcc(st, BPF_JMP | BPF_JEQ | BPF_K, st->exit.off);
+ }
+
+ /* save rax & rdx */
+ if (dreg != RAX)
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, REG_TMP0);
+ if (dreg != RDX)
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, REG_TMP1);
+
+ /* fill rax & rdx */
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, dreg, RAX);
+ emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, RDX, 0);
+
+ if (BPF_SRC(op) == BPF_X) {
+ sr = sreg;
+ if (sr == RAX)
+ sr = REG_TMP0;
+ else if (sr == RDX)
+ sr = REG_TMP1;
+ } else {
+ sr = REG_DIV_IMM;
+ emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, sr, imm);
+ }
+
+ emit_rex(st, op, 0, sr);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, sr);
+
+ if (BPF_OP(op) == BPF_DIV)
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RAX, dreg);
+ else
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RDX, dreg);
+
+ if (dreg != RAX)
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP0, RAX);
+ if (dreg != RDX)
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, REG_TMP1, RDX);
+}
+
+static void
+emit_prolog(struct bpf_jit_state *st, int32_t stack_size)
+{
+ uint32_t i;
+ int32_t spil, ofs;
+
+ spil = 0;
+ for (i = 0; i != RTE_DIM(save_regs); i++)
+ spil += INUSE(st->reguse, save_regs[i]);
+
+ /* we can avoid touching the stack at all */
+ if (spil == 0)
+ return;
+
+
+ emit_alu_imm(st, EBPF_ALU64 | BPF_SUB | BPF_K, RSP,
+ spil * sizeof(uint64_t));
+
+ ofs = 0;
+ for (i = 0; i != RTE_DIM(save_regs); i++) {
+ if (INUSE(st->reguse, save_regs[i]) != 0) {
+ emit_st_reg(st, BPF_STX | BPF_MEM | EBPF_DW,
+ save_regs[i], RSP, ofs);
+ ofs += sizeof(uint64_t);
+ }
+ }
+
+ if (INUSE(st->reguse, RBP) != 0) {
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X, RSP, RBP);
+ emit_alu_imm(st, EBPF_ALU64 | BPF_SUB | BPF_K, RSP, stack_size);
+ }
+}
+
+/*
+ * emit ret
+ */
+static void
+emit_ret(struct bpf_jit_state *st)
+{
+ const uint8_t ops = 0xC3;
+
+ emit_bytes(st, &ops, sizeof(ops));
+}
+
+static void
+emit_epilog(struct bpf_jit_state *st)
+{
+ uint32_t i;
+ int32_t spil, ofs;
+
+ /* if we allready have an epilog generate a jump to it */
+ if (st->exit.num++ != 0) {
+ emit_abs_jmp(st, st->exit.off);
+ return;
+ }
+
+ /* store offset of epilog block */
+ st->exit.off = st->sz;
+
+ spil = 0;
+ for (i = 0; i != RTE_DIM(save_regs); i++)
+ spil += INUSE(st->reguse, save_regs[i]);
+
+ if (spil != 0) {
+
+ if (INUSE(st->reguse, RBP) != 0)
+ emit_mov_reg(st, EBPF_ALU64 | EBPF_MOV | BPF_X,
+ RBP, RSP);
+
+ ofs = 0;
+ for (i = 0; i != RTE_DIM(save_regs); i++) {
+ if (INUSE(st->reguse, save_regs[i]) != 0) {
+ emit_ld_reg(st, BPF_LDX | BPF_MEM | EBPF_DW,
+ RSP, save_regs[i], ofs);
+ ofs += sizeof(uint64_t);
+ }
+ }
+
+ emit_alu_imm(st, EBPF_ALU64 | BPF_ADD | BPF_K, RSP,
+ spil * sizeof(uint64_t));
+ }
+
+ emit_ret(st);
+}
+
+/*
+ * walk through bpf code and translate them x86_64 one.
+ */
+static int
+emit(struct bpf_jit_state *st, const struct rte_bpf *bpf)
+{
+ uint32_t i, dr, op, sr;
+ const struct ebpf_insn *ins;
+
+ /* reset state fields */
+ st->sz = 0;
+ st->exit.num = 0;
+
+ emit_prolog(st, bpf->stack_sz);
+
+ for (i = 0; i != bpf->prm.nb_ins; i++) {
+
+ st->idx = i;
+ st->off[i] = st->sz;
+
+ ins = bpf->prm.ins + i;
+
+ dr = ebpf2x86[ins->dst_reg];
+ sr = ebpf2x86[ins->src_reg];
+ op = ins->code;
+
+ switch (op) {
+ /* 32 bit ALU IMM operations */
+ case (BPF_ALU | BPF_ADD | BPF_K):
+ case (BPF_ALU | BPF_SUB | BPF_K):
+ case (BPF_ALU | BPF_AND | BPF_K):
+ case (BPF_ALU | BPF_OR | BPF_K):
+ case (BPF_ALU | BPF_XOR | BPF_K):
+ emit_alu_imm(st, op, dr, ins->imm);
+ break;
+ case (BPF_ALU | BPF_LSH | BPF_K):
+ case (BPF_ALU | BPF_RSH | BPF_K):
+ emit_shift_imm(st, op, dr, ins->imm);
+ break;
+ case (BPF_ALU | EBPF_MOV | BPF_K):
+ emit_mov_imm(st, op, dr, ins->imm);
+ break;
+ /* 32 bit ALU REG operations */
+ case (BPF_ALU | BPF_ADD | BPF_X):
+ case (BPF_ALU | BPF_SUB | BPF_X):
+ case (BPF_ALU | BPF_AND | BPF_X):
+ case (BPF_ALU | BPF_OR | BPF_X):
+ case (BPF_ALU | BPF_XOR | BPF_X):
+ emit_alu_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU | BPF_LSH | BPF_X):
+ case (BPF_ALU | BPF_RSH | BPF_X):
+ emit_shift_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU | EBPF_MOV | BPF_X):
+ emit_mov_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU | BPF_NEG):
+ emit_neg(st, op, dr);
+ break;
+ case (BPF_ALU | EBPF_END | EBPF_TO_BE):
+ emit_be2le(st, dr, ins->imm);
+ break;
+ case (BPF_ALU | EBPF_END | EBPF_TO_LE):
+ emit_le2be(st, dr, ins->imm);
+ break;
+ /* 64 bit ALU IMM operations */
+ case (EBPF_ALU64 | BPF_ADD | BPF_K):
+ case (EBPF_ALU64 | BPF_SUB | BPF_K):
+ case (EBPF_ALU64 | BPF_AND | BPF_K):
+ case (EBPF_ALU64 | BPF_OR | BPF_K):
+ case (EBPF_ALU64 | BPF_XOR | BPF_K):
+ emit_alu_imm(st, op, dr, ins->imm);
+ break;
+ case (EBPF_ALU64 | BPF_LSH | BPF_K):
+ case (EBPF_ALU64 | BPF_RSH | BPF_K):
+ case (EBPF_ALU64 | EBPF_ARSH | BPF_K):
+ emit_shift_imm(st, op, dr, ins->imm);
+ break;
+ case (EBPF_ALU64 | EBPF_MOV | BPF_K):
+ emit_mov_imm(st, op, dr, ins->imm);
+ break;
+ /* 64 bit ALU REG operations */
+ case (EBPF_ALU64 | BPF_ADD | BPF_X):
+ case (EBPF_ALU64 | BPF_SUB | BPF_X):
+ case (EBPF_ALU64 | BPF_AND | BPF_X):
+ case (EBPF_ALU64 | BPF_OR | BPF_X):
+ case (EBPF_ALU64 | BPF_XOR | BPF_X):
+ emit_alu_reg(st, op, sr, dr);
+ break;
+ case (EBPF_ALU64 | BPF_LSH | BPF_X):
+ case (EBPF_ALU64 | BPF_RSH | BPF_X):
+ case (EBPF_ALU64 | EBPF_ARSH | BPF_X):
+ emit_shift_reg(st, op, sr, dr);
+ break;
+ case (EBPF_ALU64 | EBPF_MOV | BPF_X):
+ emit_mov_reg(st, op, sr, dr);
+ break;
+ case (EBPF_ALU64 | BPF_NEG):
+ emit_neg(st, op, dr);
+ break;
+ /* multiply instructions */
+ case (BPF_ALU | BPF_MUL | BPF_K):
+ case (BPF_ALU | BPF_MUL | BPF_X):
+ case (EBPF_ALU64 | BPF_MUL | BPF_K):
+ case (EBPF_ALU64 | BPF_MUL | BPF_X):
+ emit_mul(st, op, sr, dr, ins->imm);
+ break;
+ /* divide instructions */
+ case (BPF_ALU | BPF_DIV | BPF_K):
+ case (BPF_ALU | BPF_MOD | BPF_K):
+ case (BPF_ALU | BPF_DIV | BPF_X):
+ case (BPF_ALU | BPF_MOD | BPF_X):
+ case (EBPF_ALU64 | BPF_DIV | BPF_K):
+ case (EBPF_ALU64 | BPF_MOD | BPF_K):
+ case (EBPF_ALU64 | BPF_DIV | BPF_X):
+ case (EBPF_ALU64 | BPF_MOD | BPF_X):
+ emit_div(st, op, sr, dr, ins->imm);
+ break;
+ /* load instructions */
+ case (BPF_LDX | BPF_MEM | BPF_B):
+ case (BPF_LDX | BPF_MEM | BPF_H):
+ case (BPF_LDX | BPF_MEM | BPF_W):
+ case (BPF_LDX | BPF_MEM | EBPF_DW):
+ emit_ld_reg(st, op, sr, dr, ins->off);
+ break;
+ /* load 64 bit immediate value */
+ case (BPF_LD | BPF_IMM | EBPF_DW):
+ emit_ld_imm64(st, dr, ins[0].imm, ins[1].imm);
+ i++;
+ break;
+ /* store instructions */
+ case (BPF_STX | BPF_MEM | BPF_B):
+ case (BPF_STX | BPF_MEM | BPF_H):
+ case (BPF_STX | BPF_MEM | BPF_W):
+ case (BPF_STX | BPF_MEM | EBPF_DW):
+ emit_st_reg(st, op, sr, dr, ins->off);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_B):
+ case (BPF_ST | BPF_MEM | BPF_H):
+ case (BPF_ST | BPF_MEM | BPF_W):
+ case (BPF_ST | BPF_MEM | EBPF_DW):
+ emit_st_imm(st, op, dr, ins->imm, ins->off);
+ break;
+ /* atomic add instructions */
+ case (BPF_STX | EBPF_XADD | BPF_W):
+ case (BPF_STX | EBPF_XADD | EBPF_DW):
+ emit_st_xadd(st, op, sr, dr, ins->off);
+ break;
+ /* jump instructions */
+ case (BPF_JMP | BPF_JA):
+ emit_jmp(st, ins->off + 1);
+ break;
+ /* jump IMM instructions */
+ case (BPF_JMP | BPF_JEQ | BPF_K):
+ case (BPF_JMP | EBPF_JNE | BPF_K):
+ case (BPF_JMP | BPF_JGT | BPF_K):
+ case (BPF_JMP | EBPF_JLT | BPF_K):
+ case (BPF_JMP | BPF_JGE | BPF_K):
+ case (BPF_JMP | EBPF_JLE | BPF_K):
+ case (BPF_JMP | EBPF_JSGT | BPF_K):
+ case (BPF_JMP | EBPF_JSLT | BPF_K):
+ case (BPF_JMP | EBPF_JSGE | BPF_K):
+ case (BPF_JMP | EBPF_JSLE | BPF_K):
+ case (BPF_JMP | BPF_JSET | BPF_K):
+ emit_jcc_imm(st, op, dr, ins->imm, ins->off + 1);
+ break;
+ /* jump REG instructions */
+ case (BPF_JMP | BPF_JEQ | BPF_X):
+ case (BPF_JMP | EBPF_JNE | BPF_X):
+ case (BPF_JMP | BPF_JGT | BPF_X):
+ case (BPF_JMP | EBPF_JLT | BPF_X):
+ case (BPF_JMP | BPF_JGE | BPF_X):
+ case (BPF_JMP | EBPF_JLE | BPF_X):
+ case (BPF_JMP | EBPF_JSGT | BPF_X):
+ case (BPF_JMP | EBPF_JSLT | BPF_X):
+ case (BPF_JMP | EBPF_JSGE | BPF_X):
+ case (BPF_JMP | EBPF_JSLE | BPF_X):
+ case (BPF_JMP | BPF_JSET | BPF_X):
+ emit_jcc_reg(st, op, sr, dr, ins->off + 1);
+ break;
+ /* call instructions */
+ case (BPF_JMP | EBPF_CALL):
+ emit_call(st,
+ (uintptr_t)bpf->prm.xsym[ins->imm].func.val);
+ break;
+ /* return instruction */
+ case (BPF_JMP | EBPF_EXIT):
+ emit_epilog(st);
+ break;
+ default:
+ RTE_BPF_LOG(ERR,
+ "%s(%p): invalid opcode %#x at pc: %u;\n",
+ __func__, bpf, ins->code, i);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * produce a native ISA version of the given BPF code.
+ */
+int
+bpf_jit_x86(struct rte_bpf *bpf)
+{
+ int32_t rc;
+ uint32_t i;
+ size_t sz;
+ struct bpf_jit_state st;
+
+ /* init state */
+ memset(&st, 0, sizeof(st));
+ st.off = malloc(bpf->prm.nb_ins * sizeof(st.off[0]));
+ if (st.off == NULL)
+ return -ENOMEM;
+
+ /* fill with fake offsets */
+ st.exit.off = INT32_MAX;
+ for (i = 0; i != bpf->prm.nb_ins; i++)
+ st.off[i] = INT32_MAX;
+
+ /*
+ * dry runs, used to calculate total code size and valid jump offsets.
+ * stop when we get minimal possible size
+ */
+ do {
+ sz = st.sz;
+ rc = emit(&st, bpf);
+ } while (rc == 0 && sz != st.sz);
+
+ if (rc == 0) {
+
+ /* allocate memory needed */
+ st.ins = mmap(NULL, st.sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (st.ins == MAP_FAILED)
+ rc = -ENOMEM;
+ else
+ /* generate code */
+ rc = emit(&st, bpf);
+ }
+
+ if (rc == 0 && mprotect(st.ins, st.sz, PROT_READ | PROT_EXEC) != 0)
+ rc = -ENOMEM;
+
+ if (rc != 0)
+ munmap(st.ins, st.sz);
+ else {
+ bpf->jit.func = (void *)st.ins;
+ bpf->jit.sz = st.sz;
+ }
+
+ free(st.off);
+ return rc;
+}
diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c
new file mode 100644
index 00000000..2b84fe72
--- /dev/null
+++ b/lib/librte_bpf/bpf_load.c
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+
+#include "bpf_impl.h"
+
+static struct rte_bpf *
+bpf_load(const struct rte_bpf_prm *prm)
+{
+ uint8_t *buf;
+ struct rte_bpf *bpf;
+ size_t sz, bsz, insz, xsz;
+
+ xsz = prm->nb_xsym * sizeof(prm->xsym[0]);
+ insz = prm->nb_ins * sizeof(prm->ins[0]);
+ bsz = sizeof(bpf[0]);
+ sz = insz + xsz + bsz;
+
+ buf = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buf == MAP_FAILED)
+ return NULL;
+
+ bpf = (void *)buf;
+ bpf->sz = sz;
+
+ memcpy(&bpf->prm, prm, sizeof(bpf->prm));
+
+ memcpy(buf + bsz, prm->xsym, xsz);
+ memcpy(buf + bsz + xsz, prm->ins, insz);
+
+ bpf->prm.xsym = (void *)(buf + bsz);
+ bpf->prm.ins = (void *)(buf + bsz + xsz);
+
+ return bpf;
+}
+
+/*
+ * Check that user provided external symbol.
+ */
+static int
+bpf_check_xsym(const struct rte_bpf_xsym *xsym)
+{
+ uint32_t i;
+
+ if (xsym->name == NULL)
+ return -EINVAL;
+
+ if (xsym->type == RTE_BPF_XTYPE_VAR) {
+ if (xsym->var.desc.type == RTE_BPF_ARG_UNDEF)
+ return -EINVAL;
+ } else if (xsym->type == RTE_BPF_XTYPE_FUNC) {
+
+ if (xsym->func.nb_args > EBPF_FUNC_MAX_ARGS)
+ return -EINVAL;
+
+ /* check function arguments */
+ for (i = 0; i != xsym->func.nb_args; i++) {
+ if (xsym->func.args[i].type == RTE_BPF_ARG_UNDEF)
+ return -EINVAL;
+ }
+
+ /* check return value info */
+ if (xsym->func.ret.type != RTE_BPF_ARG_UNDEF &&
+ xsym->func.ret.size == 0)
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
+__rte_experimental struct rte_bpf *
+rte_bpf_load(const struct rte_bpf_prm *prm)
+{
+ struct rte_bpf *bpf;
+ int32_t rc;
+ uint32_t i;
+
+ if (prm == NULL || prm->ins == NULL ||
+ (prm->nb_xsym != 0 && prm->xsym == NULL)) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ rc = 0;
+ for (i = 0; i != prm->nb_xsym && rc == 0; i++)
+ rc = bpf_check_xsym(prm->xsym + i);
+
+ if (rc != 0) {
+ rte_errno = -rc;
+ RTE_BPF_LOG(ERR, "%s: %d-th xsym is invalid\n", __func__, i);
+ return NULL;
+ }
+
+ bpf = bpf_load(prm);
+ if (bpf == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ rc = bpf_validate(bpf);
+ if (rc == 0) {
+ bpf_jit(bpf);
+ if (mprotect(bpf, bpf->sz, PROT_READ) != 0)
+ rc = -ENOMEM;
+ }
+
+ if (rc != 0) {
+ rte_bpf_destroy(bpf);
+ rte_errno = -rc;
+ return NULL;
+ }
+
+ return bpf;
+}
+
+__rte_experimental __attribute__ ((weak)) struct rte_bpf *
+rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
+ const char *sname)
+{
+ if (prm == NULL || fname == NULL || sname == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ RTE_BPF_LOG(ERR, "%s() is not supported with current config\n"
+ "rebuild with libelf installed\n",
+ __func__);
+ rte_errno = ENOTSUP;
+ return NULL;
+}
diff --git a/lib/librte_bpf/bpf_load_elf.c b/lib/librte_bpf/bpf_load_elf.c
new file mode 100644
index 00000000..96d3630f
--- /dev/null
+++ b/lib/librte_bpf/bpf_load_elf.c
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <fcntl.h>
+
+#include <libelf.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+
+#include "bpf_impl.h"
+
+/* To overcome compatibility issue */
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+static uint32_t
+bpf_find_xsym(const char *sn, enum rte_bpf_xtype type,
+ const struct rte_bpf_xsym fp[], uint32_t fn)
+{
+ uint32_t i;
+
+ if (sn == NULL || fp == NULL)
+ return UINT32_MAX;
+
+ for (i = 0; i != fn; i++) {
+ if (fp[i].type == type && strcmp(sn, fp[i].name) == 0)
+ break;
+ }
+
+ return (i != fn) ? i : UINT32_MAX;
+}
+
+/*
+ * update BPF code at offset *ofs* with a proper address(index) for external
+ * symbol *sn*
+ */
+static int
+resolve_xsym(const char *sn, size_t ofs, struct ebpf_insn *ins, size_t ins_sz,
+ const struct rte_bpf_prm *prm)
+{
+ uint32_t idx, fidx;
+ enum rte_bpf_xtype type;
+
+ if (ofs % sizeof(ins[0]) != 0 || ofs >= ins_sz)
+ return -EINVAL;
+
+ idx = ofs / sizeof(ins[0]);
+ if (ins[idx].code == (BPF_JMP | EBPF_CALL))
+ type = RTE_BPF_XTYPE_FUNC;
+ else if (ins[idx].code == (BPF_LD | BPF_IMM | EBPF_DW) &&
+ ofs < ins_sz - sizeof(ins[idx]))
+ type = RTE_BPF_XTYPE_VAR;
+ else
+ return -EINVAL;
+
+ fidx = bpf_find_xsym(sn, type, prm->xsym, prm->nb_xsym);
+ if (fidx == UINT32_MAX)
+ return -ENOENT;
+
+ /* for function we just need an index in our xsym table */
+ if (type == RTE_BPF_XTYPE_FUNC)
+ ins[idx].imm = fidx;
+ /* for variable we need to store its absolute address */
+ else {
+ ins[idx].imm = (uintptr_t)prm->xsym[fidx].var.val;
+ ins[idx + 1].imm =
+ (uint64_t)(uintptr_t)prm->xsym[fidx].var.val >> 32;
+ }
+
+ return 0;
+}
+
+static int
+check_elf_header(const Elf64_Ehdr *eh)
+{
+ const char *err;
+
+ err = NULL;
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ if (eh->e_ident[EI_DATA] != ELFDATA2LSB)
+#else
+ if (eh->e_ident[EI_DATA] != ELFDATA2MSB)
+#endif
+ err = "not native byte order";
+ else if (eh->e_ident[EI_OSABI] != ELFOSABI_NONE)
+ err = "unexpected OS ABI";
+ else if (eh->e_type != ET_REL)
+ err = "unexpected ELF type";
+ else if (eh->e_machine != EM_NONE && eh->e_machine != EM_BPF)
+ err = "unexpected machine type";
+
+ if (err != NULL) {
+ RTE_BPF_LOG(ERR, "%s(): %s\n", __func__, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * helper function, find executable section by name.
+ */
+static int
+find_elf_code(Elf *elf, const char *section, Elf_Data **psd, size_t *pidx)
+{
+ Elf_Scn *sc;
+ const Elf64_Ehdr *eh;
+ const Elf64_Shdr *sh;
+ Elf_Data *sd;
+ const char *sn;
+ int32_t rc;
+
+ eh = elf64_getehdr(elf);
+ if (eh == NULL) {
+ rc = elf_errno();
+ RTE_BPF_LOG(ERR, "%s(%p, %s) error code: %d(%s)\n",
+ __func__, elf, section, rc, elf_errmsg(rc));
+ return -EINVAL;
+ }
+
+ if (check_elf_header(eh) != 0)
+ return -EINVAL;
+
+ /* find given section by name */
+ for (sc = elf_nextscn(elf, NULL); sc != NULL;
+ sc = elf_nextscn(elf, sc)) {
+ sh = elf64_getshdr(sc);
+ sn = elf_strptr(elf, eh->e_shstrndx, sh->sh_name);
+ if (sn != NULL && strcmp(section, sn) == 0 &&
+ sh->sh_type == SHT_PROGBITS &&
+ sh->sh_flags == (SHF_ALLOC | SHF_EXECINSTR))
+ break;
+ }
+
+ sd = elf_getdata(sc, NULL);
+ if (sd == NULL || sd->d_size == 0 ||
+ sd->d_size % sizeof(struct ebpf_insn) != 0) {
+ rc = elf_errno();
+ RTE_BPF_LOG(ERR, "%s(%p, %s) error code: %d(%s)\n",
+ __func__, elf, section, rc, elf_errmsg(rc));
+ return -EINVAL;
+ }
+
+ *psd = sd;
+ *pidx = elf_ndxscn(sc);
+ return 0;
+}
+
+/*
+ * helper function to process data from relocation table.
+ */
+static int
+process_reloc(Elf *elf, size_t sym_idx, Elf64_Rel *re, size_t re_sz,
+ struct ebpf_insn *ins, size_t ins_sz, const struct rte_bpf_prm *prm)
+{
+ int32_t rc;
+ uint32_t i, n;
+ size_t ofs, sym;
+ const char *sn;
+ const Elf64_Ehdr *eh;
+ Elf_Scn *sc;
+ const Elf_Data *sd;
+ Elf64_Sym *sm;
+
+ eh = elf64_getehdr(elf);
+
+ /* get symtable by section index */
+ sc = elf_getscn(elf, sym_idx);
+ sd = elf_getdata(sc, NULL);
+ if (sd == NULL)
+ return -EINVAL;
+ sm = sd->d_buf;
+
+ n = re_sz / sizeof(re[0]);
+ for (i = 0; i != n; i++) {
+
+ ofs = re[i].r_offset;
+
+ /* retrieve index in the symtable */
+ sym = ELF64_R_SYM(re[i].r_info);
+ if (sym * sizeof(sm[0]) >= sd->d_size)
+ return -EINVAL;
+
+ sn = elf_strptr(elf, eh->e_shstrndx, sm[sym].st_name);
+
+ rc = resolve_xsym(sn, ofs, ins, ins_sz, prm);
+ if (rc != 0) {
+ RTE_BPF_LOG(ERR,
+ "resolve_xsym(%s, %zu) error code: %d\n",
+ sn, ofs, rc);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * helper function, find relocation information (if any)
+ * and update bpf code.
+ */
+static int
+elf_reloc_code(Elf *elf, Elf_Data *ed, size_t sidx,
+ const struct rte_bpf_prm *prm)
+{
+ Elf64_Rel *re;
+ Elf_Scn *sc;
+ const Elf64_Shdr *sh;
+ const Elf_Data *sd;
+ int32_t rc;
+
+ rc = 0;
+
+ /* walk through all sections */
+ for (sc = elf_nextscn(elf, NULL); sc != NULL && rc == 0;
+ sc = elf_nextscn(elf, sc)) {
+
+ sh = elf64_getshdr(sc);
+
+ /* relocation data for our code section */
+ if (sh->sh_type == SHT_REL && sh->sh_info == sidx) {
+ sd = elf_getdata(sc, NULL);
+ if (sd == NULL || sd->d_size == 0 ||
+ sd->d_size % sizeof(re[0]) != 0)
+ return -EINVAL;
+ rc = process_reloc(elf, sh->sh_link,
+ sd->d_buf, sd->d_size, ed->d_buf, ed->d_size,
+ prm);
+ }
+ }
+
+ return rc;
+}
+
+static struct rte_bpf *
+bpf_load_elf(const struct rte_bpf_prm *prm, int32_t fd, const char *section)
+{
+ Elf *elf;
+ Elf_Data *sd;
+ size_t sidx;
+ int32_t rc;
+ struct rte_bpf *bpf;
+ struct rte_bpf_prm np;
+
+ elf_version(EV_CURRENT);
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+
+ rc = find_elf_code(elf, section, &sd, &sidx);
+ if (rc == 0)
+ rc = elf_reloc_code(elf, sd, sidx, prm);
+
+ if (rc == 0) {
+ np = prm[0];
+ np.ins = sd->d_buf;
+ np.nb_ins = sd->d_size / sizeof(struct ebpf_insn);
+ bpf = rte_bpf_load(&np);
+ } else {
+ bpf = NULL;
+ rte_errno = -rc;
+ }
+
+ elf_end(elf);
+ return bpf;
+}
+
+__rte_experimental struct rte_bpf *
+rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
+ const char *sname)
+{
+ int32_t fd, rc;
+ struct rte_bpf *bpf;
+
+ if (prm == NULL || fname == NULL || sname == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ fd = open(fname, O_RDONLY);
+ if (fd < 0) {
+ rc = errno;
+ RTE_BPF_LOG(ERR, "%s(%s) error code: %d(%s)\n",
+ __func__, fname, rc, strerror(rc));
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ bpf = bpf_load_elf(prm, fd, sname);
+ close(fd);
+
+ if (bpf == NULL) {
+ RTE_BPF_LOG(ERR,
+ "%s(fname=\"%s\", sname=\"%s\") failed, "
+ "error code: %d\n",
+ __func__, fname, sname, rte_errno);
+ return NULL;
+ }
+
+ RTE_BPF_LOG(INFO, "%s(fname=\"%s\", sname=\"%s\") "
+ "successfully creates %p(jit={.func=%p,.sz=%zu});\n",
+ __func__, fname, sname, bpf, bpf->jit.func, bpf->jit.sz);
+ return bpf;
+}
diff --git a/lib/librte_bpf/bpf_pkt.c b/lib/librte_bpf/bpf_pkt.c
new file mode 100644
index 00000000..ab9daa52
--- /dev/null
+++ b/lib/librte_bpf/bpf_pkt.c
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include <rte_bpf_ethdev.h>
+#include "bpf_impl.h"
+
+/*
+ * information about installed BPF rx/tx callback
+ */
+
+struct bpf_eth_cbi {
+ /* used by both data & control path */
+ uint32_t use; /*usage counter */
+ const struct rte_eth_rxtx_callback *cb; /* callback handle */
+ struct rte_bpf *bpf;
+ struct rte_bpf_jit jit;
+ /* used by control path only */
+ LIST_ENTRY(bpf_eth_cbi) link;
+ uint16_t port;
+ uint16_t queue;
+} __rte_cache_aligned;
+
+/*
+ * Odd number means that callback is used by datapath.
+ * Even number means that callback is not used by datapath.
+ */
+#define BPF_ETH_CBI_INUSE 1
+
+/*
+ * List to manage RX/TX installed callbacks.
+ */
+LIST_HEAD(bpf_eth_cbi_list, bpf_eth_cbi);
+
+enum {
+ BPF_ETH_RX,
+ BPF_ETH_TX,
+ BPF_ETH_NUM,
+};
+
+/*
+ * information about all installed BPF rx/tx callbacks
+ */
+struct bpf_eth_cbh {
+ rte_spinlock_t lock;
+ struct bpf_eth_cbi_list list;
+ uint32_t type;
+};
+
+static struct bpf_eth_cbh rx_cbh = {
+ .lock = RTE_SPINLOCK_INITIALIZER,
+ .list = LIST_HEAD_INITIALIZER(list),
+ .type = BPF_ETH_RX,
+};
+
+static struct bpf_eth_cbh tx_cbh = {
+ .lock = RTE_SPINLOCK_INITIALIZER,
+ .list = LIST_HEAD_INITIALIZER(list),
+ .type = BPF_ETH_TX,
+};
+
+/*
+ * Marks given callback as used by datapath.
+ */
+static __rte_always_inline void
+bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
+{
+ cbi->use++;
+ /* make sure no store/load reordering could happen */
+ rte_smp_mb();
+}
+
+/*
+ * Marks given callback list as not used by datapath.
+ */
+static __rte_always_inline void
+bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
+{
+ /* make sure all previous loads are completed */
+ rte_smp_rmb();
+ cbi->use++;
+}
+
+/*
+ * Waits till datapath finished using given callback.
+ */
+static void
+bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
+{
+ uint32_t nuse, puse;
+
+ /* make sure all previous loads and stores are completed */
+ rte_smp_mb();
+
+ puse = cbi->use;
+
+ /* in use, busy wait till current RX/TX iteration is finished */
+ if ((puse & BPF_ETH_CBI_INUSE) != 0) {
+ do {
+ rte_pause();
+ rte_compiler_barrier();
+ nuse = cbi->use;
+ } while (nuse == puse);
+ }
+}
+
+static void
+bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
+{
+ bc->bpf = NULL;
+ memset(&bc->jit, 0, sizeof(bc->jit));
+}
+
+static struct bpf_eth_cbi *
+bpf_eth_cbh_find(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
+{
+ struct bpf_eth_cbi *cbi;
+
+ LIST_FOREACH(cbi, &cbh->list, link) {
+ if (cbi->port == port && cbi->queue == queue)
+ break;
+ }
+ return cbi;
+}
+
+static struct bpf_eth_cbi *
+bpf_eth_cbh_add(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
+{
+ struct bpf_eth_cbi *cbi;
+
+ /* return an existing one */
+ cbi = bpf_eth_cbh_find(cbh, port, queue);
+ if (cbi != NULL)
+ return cbi;
+
+ cbi = rte_zmalloc(NULL, sizeof(*cbi), RTE_CACHE_LINE_SIZE);
+ if (cbi != NULL) {
+ cbi->port = port;
+ cbi->queue = queue;
+ LIST_INSERT_HEAD(&cbh->list, cbi, link);
+ }
+ return cbi;
+}
+
+/*
+ * BPF packet processing routinies.
+ */
+
+static inline uint32_t
+apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
+ uint32_t drop)
+{
+ uint32_t i, j, k;
+ struct rte_mbuf *dr[num];
+
+ for (i = 0, j = 0, k = 0; i != num; i++) {
+
+ /* filter matches */
+ if (rc[i] != 0)
+ mb[j++] = mb[i];
+ /* no match */
+ else
+ dr[k++] = mb[i];
+ }
+
+ if (drop != 0) {
+ /* free filtered out mbufs */
+ for (i = 0; i != k; i++)
+ rte_pktmbuf_free(dr[i]);
+ } else {
+ /* copy filtered out mbufs beyond good ones */
+ for (i = 0; i != k; i++)
+ mb[j + i] = dr[i];
+ }
+
+ return j;
+}
+
+static inline uint32_t
+pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
+ uint32_t drop)
+{
+ uint32_t i;
+ void *dp[num];
+ uint64_t rc[num];
+
+ for (i = 0; i != num; i++)
+ dp[i] = rte_pktmbuf_mtod(mb[i], void *);
+
+ rte_bpf_exec_burst(bpf, dp, rc, num);
+ return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
+ uint32_t num, uint32_t drop)
+{
+ uint32_t i, n;
+ void *dp;
+ uint64_t rc[num];
+
+ n = 0;
+ for (i = 0; i != num; i++) {
+ dp = rte_pktmbuf_mtod(mb[i], void *);
+ rc[i] = jit->func(dp);
+ n += (rc[i] == 0);
+ }
+
+ if (n != 0)
+ num = apply_filter(mb, rc, num, drop);
+
+ return num;
+}
+
+static inline uint32_t
+pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
+ uint32_t drop)
+{
+ uint64_t rc[num];
+
+ rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
+ return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
+ uint32_t num, uint32_t drop)
+{
+ uint32_t i, n;
+ uint64_t rc[num];
+
+ n = 0;
+ for (i = 0; i != num; i++) {
+ rc[i] = jit->func(mb[i]);
+ n += (rc[i] == 0);
+ }
+
+ if (n != 0)
+ num = apply_filter(mb, rc, num, drop);
+
+ return num;
+}
+
+/*
+ * RX/TX callbacks for raw data bpf.
+ */
+
+static uint16_t
+bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+/*
+ * RX/TX callbacks for mbuf.
+ */
+
+static uint16_t
+bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static rte_rx_callback_fn
+select_rx_callback(enum rte_bpf_arg_type type, uint32_t flags)
+{
+ if (flags & RTE_BPF_ETH_F_JIT) {
+ if (type == RTE_BPF_ARG_PTR)
+ return bpf_rx_callback_jit;
+ else if (type == RTE_BPF_ARG_PTR_MBUF)
+ return bpf_rx_callback_mb_jit;
+ } else if (type == RTE_BPF_ARG_PTR)
+ return bpf_rx_callback_vm;
+ else if (type == RTE_BPF_ARG_PTR_MBUF)
+ return bpf_rx_callback_mb_vm;
+
+ return NULL;
+}
+
+static rte_tx_callback_fn
+select_tx_callback(enum rte_bpf_arg_type type, uint32_t flags)
+{
+ if (flags & RTE_BPF_ETH_F_JIT) {
+ if (type == RTE_BPF_ARG_PTR)
+ return bpf_tx_callback_jit;
+ else if (type == RTE_BPF_ARG_PTR_MBUF)
+ return bpf_tx_callback_mb_jit;
+ } else if (type == RTE_BPF_ARG_PTR)
+ return bpf_tx_callback_vm;
+ else if (type == RTE_BPF_ARG_PTR_MBUF)
+ return bpf_tx_callback_mb_vm;
+
+ return NULL;
+}
+
+/*
+ * helper function to perform BPF unload for given port/queue.
+ * have to introduce extra complexity (and possible slowdown) here,
+ * as right now there is no safe generic way to remove RX/TX callback
+ * while IO is active.
+ * Still don't free memory allocated for callback handle itself,
+ * again right now there is no safe way to do that without stopping RX/TX
+ * on given port/queue first.
+ */
+static void
+bpf_eth_cbi_unload(struct bpf_eth_cbi *bc)
+{
+ /* mark this cbi as empty */
+ bc->cb = NULL;
+ rte_smp_mb();
+
+ /* make sure datapath doesn't use bpf anymore, then destroy bpf */
+ bpf_eth_cbi_wait(bc);
+ rte_bpf_destroy(bc->bpf);
+ bpf_eth_cbi_cleanup(bc);
+}
+
+static void
+bpf_eth_unload(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue)
+{
+ struct bpf_eth_cbi *bc;
+
+ bc = bpf_eth_cbh_find(cbh, port, queue);
+ if (bc == NULL || bc->cb == NULL)
+ return;
+
+ if (cbh->type == BPF_ETH_RX)
+ rte_eth_remove_rx_callback(port, queue, bc->cb);
+ else
+ rte_eth_remove_tx_callback(port, queue, bc->cb);
+
+ bpf_eth_cbi_unload(bc);
+}
+
+
+__rte_experimental void
+rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
+{
+ struct bpf_eth_cbh *cbh;
+
+ cbh = &rx_cbh;
+ rte_spinlock_lock(&cbh->lock);
+ bpf_eth_unload(cbh, port, queue);
+ rte_spinlock_unlock(&cbh->lock);
+}
+
+__rte_experimental void
+rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
+{
+ struct bpf_eth_cbh *cbh;
+
+ cbh = &tx_cbh;
+ rte_spinlock_lock(&cbh->lock);
+ bpf_eth_unload(cbh, port, queue);
+ rte_spinlock_unlock(&cbh->lock);
+}
+
+static int
+bpf_eth_elf_load(struct bpf_eth_cbh *cbh, uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags)
+{
+ int32_t rc;
+ struct bpf_eth_cbi *bc;
+ struct rte_bpf *bpf;
+ rte_rx_callback_fn frx;
+ rte_tx_callback_fn ftx;
+ struct rte_bpf_jit jit;
+
+ frx = NULL;
+ ftx = NULL;
+
+ if (prm == NULL || rte_eth_dev_is_valid_port(port) == 0 ||
+ queue >= RTE_MAX_QUEUES_PER_PORT)
+ return -EINVAL;
+
+ if (cbh->type == BPF_ETH_RX)
+ frx = select_rx_callback(prm->prog_arg.type, flags);
+ else
+ ftx = select_tx_callback(prm->prog_arg.type, flags);
+
+ if (frx == NULL && ftx == NULL) {
+ RTE_BPF_LOG(ERR, "%s(%u, %u): no callback selected;\n",
+ __func__, port, queue);
+ return -EINVAL;
+ }
+
+ bpf = rte_bpf_elf_load(prm, fname, sname);
+ if (bpf == NULL)
+ return -rte_errno;
+
+ rte_bpf_get_jit(bpf, &jit);
+
+ if ((flags & RTE_BPF_ETH_F_JIT) != 0 && jit.func == NULL) {
+ RTE_BPF_LOG(ERR, "%s(%u, %u): no JIT generated;\n",
+ __func__, port, queue);
+ rte_bpf_destroy(bpf);
+ return -ENOTSUP;
+ }
+
+ /* setup/update global callback info */
+ bc = bpf_eth_cbh_add(cbh, port, queue);
+ if (bc == NULL)
+ return -ENOMEM;
+
+ /* remove old one, if any */
+ if (bc->cb != NULL)
+ bpf_eth_unload(cbh, port, queue);
+
+ bc->bpf = bpf;
+ bc->jit = jit;
+
+ if (cbh->type == BPF_ETH_RX)
+ bc->cb = rte_eth_add_rx_callback(port, queue, frx, bc);
+ else
+ bc->cb = rte_eth_add_tx_callback(port, queue, ftx, bc);
+
+ if (bc->cb == NULL) {
+ rc = -rte_errno;
+ rte_bpf_destroy(bpf);
+ bpf_eth_cbi_cleanup(bc);
+ } else
+ rc = 0;
+
+ return rc;
+}
+
+__rte_experimental int
+rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags)
+{
+ int32_t rc;
+ struct bpf_eth_cbh *cbh;
+
+ cbh = &rx_cbh;
+ rte_spinlock_lock(&cbh->lock);
+ rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
+ rte_spinlock_unlock(&cbh->lock);
+
+ return rc;
+}
+
+__rte_experimental int
+rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags)
+{
+ int32_t rc;
+ struct bpf_eth_cbh *cbh;
+
+ cbh = &tx_cbh;
+ rte_spinlock_lock(&cbh->lock);
+ rc = bpf_eth_elf_load(cbh, port, queue, prm, fname, sname, flags);
+ rte_spinlock_unlock(&cbh->lock);
+
+ return rc;
+}
diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c
new file mode 100644
index 00000000..83983efc
--- /dev/null
+++ b/lib/librte_bpf/bpf_validate.c
@@ -0,0 +1,2248 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+struct bpf_reg_val {
+ struct rte_bpf_arg v;
+ uint64_t mask;
+ struct {
+ int64_t min;
+ int64_t max;
+ } s;
+ struct {
+ uint64_t min;
+ uint64_t max;
+ } u;
+};
+
+struct bpf_eval_state {
+ struct bpf_reg_val rv[EBPF_REG_NUM];
+ struct bpf_reg_val sv[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+};
+
+/* possible instruction node colour */
+enum {
+ WHITE,
+ GREY,
+ BLACK,
+ MAX_NODE_COLOUR
+};
+
+/* possible edge types */
+enum {
+ UNKNOWN_EDGE,
+ TREE_EDGE,
+ BACK_EDGE,
+ CROSS_EDGE,
+ MAX_EDGE_TYPE
+};
+
+#define MAX_EDGES 2
+
+struct inst_node {
+ uint8_t colour;
+ uint8_t nb_edge:4;
+ uint8_t cur_edge:4;
+ uint8_t edge_type[MAX_EDGES];
+ uint32_t edge_dest[MAX_EDGES];
+ uint32_t prev_node;
+ struct bpf_eval_state *evst;
+};
+
+struct bpf_verifier {
+ const struct rte_bpf_prm *prm;
+ struct inst_node *in;
+ uint64_t stack_sz;
+ uint32_t nb_nodes;
+ uint32_t nb_jcc_nodes;
+ uint32_t node_colour[MAX_NODE_COLOUR];
+ uint32_t edge_type[MAX_EDGE_TYPE];
+ struct bpf_eval_state *evst;
+ struct inst_node *evin;
+ struct {
+ uint32_t num;
+ uint32_t cur;
+ struct bpf_eval_state *ent;
+ } evst_pool;
+};
+
+struct bpf_ins_check {
+ struct {
+ uint16_t dreg;
+ uint16_t sreg;
+ } mask;
+ struct {
+ uint16_t min;
+ uint16_t max;
+ } off;
+ struct {
+ uint32_t min;
+ uint32_t max;
+ } imm;
+ const char * (*check)(const struct ebpf_insn *);
+ const char * (*eval)(struct bpf_verifier *, const struct ebpf_insn *);
+};
+
+#define ALL_REGS RTE_LEN2MASK(EBPF_REG_NUM, uint16_t)
+#define WRT_REGS RTE_LEN2MASK(EBPF_REG_10, uint16_t)
+#define ZERO_REG RTE_LEN2MASK(EBPF_REG_1, uint16_t)
+
+/*
+ * check and evaluate functions for particular instruction types.
+ */
+
+static const char *
+check_alu_bele(const struct ebpf_insn *ins)
+{
+ if (ins->imm != 16 && ins->imm != 32 && ins->imm != 64)
+ return "invalid imm field";
+ return NULL;
+}
+
+static const char *
+eval_exit(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ RTE_SET_USED(ins);
+ if (bvf->evst->rv[EBPF_REG_0].v.type == RTE_BPF_ARG_UNDEF)
+ return "undefined return value";
+ return NULL;
+}
+
+/* setup max possible with this mask bounds */
+static void
+eval_umax_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+ rv->u.max = mask;
+ rv->u.min = 0;
+}
+
+static void
+eval_smax_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+ rv->s.max = mask >> 1;
+ rv->s.min = rv->s.max ^ UINT64_MAX;
+}
+
+static void
+eval_max_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+ eval_umax_bound(rv, mask);
+ eval_smax_bound(rv, mask);
+}
+
+static void
+eval_fill_max_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+ eval_max_bound(rv, mask);
+ rv->v.type = RTE_BPF_ARG_RAW;
+ rv->mask = mask;
+}
+
+static void
+eval_fill_imm64(struct bpf_reg_val *rv, uint64_t mask, uint64_t val)
+{
+ rv->mask = mask;
+ rv->s.min = val;
+ rv->s.max = val;
+ rv->u.min = val;
+ rv->u.max = val;
+}
+
+static void
+eval_fill_imm(struct bpf_reg_val *rv, uint64_t mask, int32_t imm)
+{
+ uint64_t v;
+
+ v = (uint64_t)imm & mask;
+
+ rv->v.type = RTE_BPF_ARG_RAW;
+ eval_fill_imm64(rv, mask, v);
+}
+
+static const char *
+eval_ld_imm64(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint32_t i;
+ uint64_t val;
+ struct bpf_reg_val *rd;
+
+ val = (uint32_t)ins[0].imm | (uint64_t)(uint32_t)ins[1].imm << 32;
+
+ rd = bvf->evst->rv + ins->dst_reg;
+ rd->v.type = RTE_BPF_ARG_RAW;
+ eval_fill_imm64(rd, UINT64_MAX, val);
+
+ for (i = 0; i != bvf->prm->nb_xsym; i++) {
+
+ /* load of external variable */
+ if (bvf->prm->xsym[i].type == RTE_BPF_XTYPE_VAR &&
+ (uintptr_t)bvf->prm->xsym[i].var.val == val) {
+ rd->v = bvf->prm->xsym[i].var.desc;
+ eval_fill_imm64(rd, UINT64_MAX, 0);
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+eval_apply_mask(struct bpf_reg_val *rv, uint64_t mask)
+{
+ struct bpf_reg_val rt;
+
+ rt.u.min = rv->u.min & mask;
+ rt.u.max = rv->u.max & mask;
+ if (rt.u.min != rv->u.min || rt.u.max != rv->u.max) {
+ rv->u.max = RTE_MAX(rt.u.max, mask);
+ rv->u.min = 0;
+ }
+
+ eval_smax_bound(&rt, mask);
+ rv->s.max = RTE_MIN(rt.s.max, rv->s.max);
+ rv->s.min = RTE_MAX(rt.s.min, rv->s.min);
+
+ rv->mask = mask;
+}
+
+static void
+eval_add(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk)
+{
+ struct bpf_reg_val rv;
+
+ rv.u.min = (rd->u.min + rs->u.min) & msk;
+ rv.u.max = (rd->u.min + rs->u.max) & msk;
+ rv.s.min = (rd->s.min + rs->s.min) & msk;
+ rv.s.max = (rd->s.max + rs->s.max) & msk;
+
+ /*
+ * if at least one of the operands is not constant,
+ * then check for overflow
+ */
+ if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) &&
+ (rv.u.min < rd->u.min || rv.u.max < rd->u.max))
+ eval_umax_bound(&rv, msk);
+
+ if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) &&
+ (((rs->s.min < 0 && rv.s.min > rd->s.min) ||
+ rv.s.min < rd->s.min) ||
+ ((rs->s.max < 0 && rv.s.max > rd->s.max) ||
+ rv.s.max < rd->s.max)))
+ eval_smax_bound(&rv, msk);
+
+ rd->s = rv.s;
+ rd->u = rv.u;
+}
+
+static void
+eval_sub(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk)
+{
+ struct bpf_reg_val rv;
+
+ rv.u.min = (rd->u.min - rs->u.min) & msk;
+ rv.u.max = (rd->u.min - rs->u.max) & msk;
+ rv.s.min = (rd->s.min - rs->s.min) & msk;
+ rv.s.max = (rd->s.max - rs->s.max) & msk;
+
+ /*
+ * if at least one of the operands is not constant,
+ * then check for overflow
+ */
+ if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) &&
+ (rv.u.min > rd->u.min || rv.u.max > rd->u.max))
+ eval_umax_bound(&rv, msk);
+
+ if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) &&
+ (((rs->s.min < 0 && rv.s.min < rd->s.min) ||
+ rv.s.min > rd->s.min) ||
+ ((rs->s.max < 0 && rv.s.max < rd->s.max) ||
+ rv.s.max > rd->s.max)))
+ eval_smax_bound(&rv, msk);
+
+ rd->s = rv.s;
+ rd->u = rv.u;
+}
+
+static void
+eval_lsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* check if shift value is less then max result bits */
+ if (rs->u.max >= opsz) {
+ eval_max_bound(rd, msk);
+ return;
+ }
+
+ /* check for overflow */
+ if (rd->u.max > RTE_LEN2MASK(opsz - rs->u.max, uint64_t))
+ eval_umax_bound(rd, msk);
+ else {
+ rd->u.max <<= rs->u.max;
+ rd->u.min <<= rs->u.min;
+ }
+
+ /* check that dreg values are and would remain always positive */
+ if ((uint64_t)rd->s.min >> (opsz - 1) != 0 || rd->s.max >=
+ RTE_LEN2MASK(opsz - rs->u.max - 1, int64_t))
+ eval_smax_bound(rd, msk);
+ else {
+ rd->s.max <<= rs->u.max;
+ rd->s.min <<= rs->u.min;
+ }
+}
+
+static void
+eval_rsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* check if shift value is less then max result bits */
+ if (rs->u.max >= opsz) {
+ eval_max_bound(rd, msk);
+ return;
+ }
+
+ rd->u.max >>= rs->u.min;
+ rd->u.min >>= rs->u.max;
+
+ /* check that dreg values are always positive */
+ if ((uint64_t)rd->s.min >> (opsz - 1) != 0)
+ eval_smax_bound(rd, msk);
+ else {
+ rd->s.max >>= rs->u.min;
+ rd->s.min >>= rs->u.max;
+ }
+}
+
+static void
+eval_arsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ uint32_t shv;
+
+ /* check if shift value is less then max result bits */
+ if (rs->u.max >= opsz) {
+ eval_max_bound(rd, msk);
+ return;
+ }
+
+ rd->u.max = (int64_t)rd->u.max >> rs->u.min;
+ rd->u.min = (int64_t)rd->u.min >> rs->u.max;
+
+ /* if we have 32-bit values - extend them to 64-bit */
+ if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+ rd->s.min <<= opsz;
+ rd->s.max <<= opsz;
+ shv = opsz;
+ } else
+ shv = 0;
+
+ if (rd->s.min < 0)
+ rd->s.min = (rd->s.min >> (rs->u.min + shv)) & msk;
+ else
+ rd->s.min = (rd->s.min >> (rs->u.max + shv)) & msk;
+
+ if (rd->s.max < 0)
+ rd->s.max = (rd->s.max >> (rs->u.max + shv)) & msk;
+ else
+ rd->s.max = (rd->s.max >> (rs->u.min + shv)) & msk;
+}
+
+static uint64_t
+eval_umax_bits(uint64_t v, size_t opsz)
+{
+ if (v == 0)
+ return 0;
+
+ v = __builtin_clzll(v);
+ return RTE_LEN2MASK(opsz - v, uint64_t);
+}
+
+/* estimate max possible value for (v1 & v2) */
+static uint64_t
+eval_uand_max(uint64_t v1, uint64_t v2, size_t opsz)
+{
+ v1 = eval_umax_bits(v1, opsz);
+ v2 = eval_umax_bits(v2, opsz);
+ return (v1 & v2);
+}
+
+/* estimate max possible value for (v1 | v2) */
+static uint64_t
+eval_uor_max(uint64_t v1, uint64_t v2, size_t opsz)
+{
+ v1 = eval_umax_bits(v1, opsz);
+ v2 = eval_umax_bits(v2, opsz);
+ return (v1 | v2);
+}
+
+static void
+eval_and(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ rd->u.min &= rs->u.min;
+ rd->u.max &= rs->u.max;
+ } else {
+ rd->u.max = eval_uand_max(rd->u.max, rs->u.max, opsz);
+ rd->u.min &= rs->u.min;
+ }
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ rd->s.min &= rs->s.min;
+ rd->s.max &= rs->s.max;
+ /* at least one of operand is non-negative */
+ } else if (rd->s.min >= 0 || rs->s.min >= 0) {
+ rd->s.max = eval_uand_max(rd->s.max & (msk >> 1),
+ rs->s.max & (msk >> 1), opsz);
+ rd->s.min &= rs->s.min;
+ } else
+ eval_smax_bound(rd, msk);
+}
+
+static void
+eval_or(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ rd->u.min |= rs->u.min;
+ rd->u.max |= rs->u.max;
+ } else {
+ rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz);
+ rd->u.min |= rs->u.min;
+ }
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ rd->s.min |= rs->s.min;
+ rd->s.max |= rs->s.max;
+
+ /* both operands are non-negative */
+ } else if (rd->s.min >= 0 || rs->s.min >= 0) {
+ rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz);
+ rd->s.min |= rs->s.min;
+ } else
+ eval_smax_bound(rd, msk);
+}
+
+static void
+eval_xor(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ rd->u.min ^= rs->u.min;
+ rd->u.max ^= rs->u.max;
+ } else {
+ rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz);
+ rd->u.min = 0;
+ }
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ rd->s.min ^= rs->s.min;
+ rd->s.max ^= rs->s.max;
+
+ /* both operands are non-negative */
+ } else if (rd->s.min >= 0 || rs->s.min >= 0) {
+ rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz);
+ rd->s.min = 0;
+ } else
+ eval_smax_bound(rd, msk);
+}
+
+static void
+eval_mul(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ rd->u.min = (rd->u.min * rs->u.min) & msk;
+ rd->u.max = (rd->u.max * rs->u.max) & msk;
+ /* check for overflow */
+ } else if (rd->u.max <= msk >> opsz / 2 && rs->u.max <= msk >> opsz) {
+ rd->u.max *= rs->u.max;
+ rd->u.min *= rd->u.min;
+ } else
+ eval_umax_bound(rd, msk);
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ rd->s.min = (rd->s.min * rs->s.min) & msk;
+ rd->s.max = (rd->s.max * rs->s.max) & msk;
+ /* check that both operands are positive and no overflow */
+ } else if (rd->s.min >= 0 && rs->s.min >= 0) {
+ rd->s.max *= rs->s.max;
+ rd->s.min *= rd->s.min;
+ } else
+ eval_smax_bound(rd, msk);
+}
+
+static const char *
+eval_divmod(uint32_t op, struct bpf_reg_val *rd, struct bpf_reg_val *rs,
+ size_t opsz, uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ if (rs->u.max == 0)
+ return "division by 0";
+ if (op == BPF_DIV) {
+ rd->u.min /= rs->u.min;
+ rd->u.max /= rs->u.max;
+ } else {
+ rd->u.min %= rs->u.min;
+ rd->u.max %= rs->u.max;
+ }
+ } else {
+ if (op == BPF_MOD)
+ rd->u.max = RTE_MIN(rd->u.max, rs->u.max - 1);
+ else
+ rd->u.max = rd->u.max;
+ rd->u.min = 0;
+ }
+
+ /* if we have 32-bit values - extend them to 64-bit */
+ if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+ rd->s.min = (int32_t)rd->s.min;
+ rd->s.max = (int32_t)rd->s.max;
+ rs->s.min = (int32_t)rs->s.min;
+ rs->s.max = (int32_t)rs->s.max;
+ }
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ if (rs->s.max == 0)
+ return "division by 0";
+ if (op == BPF_DIV) {
+ rd->s.min /= rs->s.min;
+ rd->s.max /= rs->s.max;
+ } else {
+ rd->s.min %= rs->s.min;
+ rd->s.max %= rs->s.max;
+ }
+ } else if (op == BPF_MOD) {
+ rd->s.min = RTE_MAX(rd->s.max, 0);
+ rd->s.min = RTE_MIN(rd->s.min, 0);
+ } else
+ eval_smax_bound(rd, msk);
+
+ rd->s.max &= msk;
+ rd->s.min &= msk;
+
+ return NULL;
+}
+
+static void
+eval_neg(struct bpf_reg_val *rd, size_t opsz, uint64_t msk)
+{
+ uint64_t ux, uy;
+ int64_t sx, sy;
+
+ /* if we have 32-bit values - extend them to 64-bit */
+ if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+ rd->u.min = (int32_t)rd->u.min;
+ rd->u.max = (int32_t)rd->u.max;
+ }
+
+ ux = -(int64_t)rd->u.min & msk;
+ uy = -(int64_t)rd->u.max & msk;
+
+ rd->u.max = RTE_MAX(ux, uy);
+ rd->u.min = RTE_MIN(ux, uy);
+
+ /* if we have 32-bit values - extend them to 64-bit */
+ if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+ rd->s.min = (int32_t)rd->s.min;
+ rd->s.max = (int32_t)rd->s.max;
+ }
+
+ sx = -rd->s.min & msk;
+ sy = -rd->s.max & msk;
+
+ rd->s.max = RTE_MAX(sx, sy);
+ rd->s.min = RTE_MIN(sx, sy);
+}
+
+/*
+ * check that destination and source operand are in defined state.
+ */
+static const char *
+eval_defined(const struct bpf_reg_val *dst, const struct bpf_reg_val *src)
+{
+ if (dst != NULL && dst->v.type == RTE_BPF_ARG_UNDEF)
+ return "dest reg value is undefined";
+ if (src != NULL && src->v.type == RTE_BPF_ARG_UNDEF)
+ return "src reg value is undefined";
+ return NULL;
+}
+
+static const char *
+eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint64_t msk;
+ uint32_t op;
+ size_t opsz;
+ const char *err;
+ struct bpf_eval_state *st;
+ struct bpf_reg_val *rd, rs;
+
+ opsz = (BPF_CLASS(ins->code) == BPF_ALU) ?
+ sizeof(uint32_t) : sizeof(uint64_t);
+ opsz = opsz * CHAR_BIT;
+ msk = RTE_LEN2MASK(opsz, uint64_t);
+
+ st = bvf->evst;
+ rd = st->rv + ins->dst_reg;
+
+ if (BPF_SRC(ins->code) == BPF_X) {
+ rs = st->rv[ins->src_reg];
+ eval_apply_mask(&rs, msk);
+ } else
+ eval_fill_imm(&rs, msk, ins->imm);
+
+ eval_apply_mask(rd, msk);
+
+ op = BPF_OP(ins->code);
+
+ err = eval_defined((op != EBPF_MOV) ? rd : NULL,
+ (op != BPF_NEG) ? &rs : NULL);
+ if (err != NULL)
+ return err;
+
+ if (op == BPF_ADD)
+ eval_add(rd, &rs, msk);
+ else if (op == BPF_SUB)
+ eval_sub(rd, &rs, msk);
+ else if (op == BPF_LSH)
+ eval_lsh(rd, &rs, opsz, msk);
+ else if (op == BPF_RSH)
+ eval_rsh(rd, &rs, opsz, msk);
+ else if (op == EBPF_ARSH)
+ eval_arsh(rd, &rs, opsz, msk);
+ else if (op == BPF_AND)
+ eval_and(rd, &rs, opsz, msk);
+ else if (op == BPF_OR)
+ eval_or(rd, &rs, opsz, msk);
+ else if (op == BPF_XOR)
+ eval_xor(rd, &rs, opsz, msk);
+ else if (op == BPF_MUL)
+ eval_mul(rd, &rs, opsz, msk);
+ else if (op == BPF_DIV || op == BPF_MOD)
+ err = eval_divmod(op, rd, &rs, opsz, msk);
+ else if (op == BPF_NEG)
+ eval_neg(rd, opsz, msk);
+ else if (op == EBPF_MOV)
+ *rd = rs;
+ else
+ eval_max_bound(rd, msk);
+
+ return err;
+}
+
+static const char *
+eval_bele(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint64_t msk;
+ struct bpf_eval_state *st;
+ struct bpf_reg_val *rd;
+ const char *err;
+
+ msk = RTE_LEN2MASK(ins->imm, uint64_t);
+
+ st = bvf->evst;
+ rd = st->rv + ins->dst_reg;
+
+ err = eval_defined(rd, NULL);
+ if (err != NULL)
+ return err;
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_BE))
+ eval_max_bound(rd, msk);
+ else
+ eval_apply_mask(rd, msk);
+#else
+ if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_LE))
+ eval_max_bound(rd, msk);
+ else
+ eval_apply_mask(rd, msk);
+#endif
+
+ return NULL;
+}
+
+static const char *
+eval_ptr(struct bpf_verifier *bvf, struct bpf_reg_val *rm, uint32_t opsz,
+ uint32_t align, int16_t off)
+{
+ struct bpf_reg_val rv;
+
+ /* calculate reg + offset */
+ eval_fill_imm(&rv, rm->mask, off);
+ eval_add(rm, &rv, rm->mask);
+
+ if (RTE_BPF_ARG_PTR_TYPE(rm->v.type) == 0)
+ return "destination is not a pointer";
+
+ if (rm->mask != UINT64_MAX)
+ return "pointer truncation";
+
+ if (rm->u.max + opsz > rm->v.size ||
+ (uint64_t)rm->s.max + opsz > rm->v.size ||
+ rm->s.min < 0)
+ return "memory boundary violation";
+
+ if (rm->u.max % align != 0)
+ return "unaligned memory access";
+
+ if (rm->v.type == RTE_BPF_ARG_PTR_STACK) {
+
+ if (rm->u.max != rm->u.min || rm->s.max != rm->s.min ||
+ rm->u.max != (uint64_t)rm->s.max)
+ return "stack access with variable offset";
+
+ bvf->stack_sz = RTE_MAX(bvf->stack_sz, rm->v.size - rm->u.max);
+
+ /* pointer to mbuf */
+ } else if (rm->v.type == RTE_BPF_ARG_PTR_MBUF) {
+
+ if (rm->u.max != rm->u.min || rm->s.max != rm->s.min ||
+ rm->u.max != (uint64_t)rm->s.max)
+ return "mbuf access with variable offset";
+ }
+
+ return NULL;
+}
+
+static void
+eval_max_load(struct bpf_reg_val *rv, uint64_t mask)
+{
+ eval_umax_bound(rv, mask);
+
+ /* full 64-bit load */
+ if (mask == UINT64_MAX)
+ eval_smax_bound(rv, mask);
+
+ /* zero-extend load */
+ rv->s.min = rv->u.min;
+ rv->s.max = rv->u.max;
+}
+
+
+static const char *
+eval_load(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint32_t opsz;
+ uint64_t msk;
+ const char *err;
+ struct bpf_eval_state *st;
+ struct bpf_reg_val *rd, rs;
+ const struct bpf_reg_val *sv;
+
+ st = bvf->evst;
+ rd = st->rv + ins->dst_reg;
+ rs = st->rv[ins->src_reg];
+ opsz = bpf_size(BPF_SIZE(ins->code));
+ msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t);
+
+ err = eval_ptr(bvf, &rs, opsz, 1, ins->off);
+ if (err != NULL)
+ return err;
+
+ if (rs.v.type == RTE_BPF_ARG_PTR_STACK) {
+
+ sv = st->sv + rs.u.max / sizeof(uint64_t);
+ if (sv->v.type == RTE_BPF_ARG_UNDEF || sv->mask < msk)
+ return "undefined value on the stack";
+
+ *rd = *sv;
+
+ /* pointer to mbuf */
+ } else if (rs.v.type == RTE_BPF_ARG_PTR_MBUF) {
+
+ if (rs.u.max == offsetof(struct rte_mbuf, next)) {
+ eval_fill_imm(rd, msk, 0);
+ rd->v = rs.v;
+ } else if (rs.u.max == offsetof(struct rte_mbuf, buf_addr)) {
+ eval_fill_imm(rd, msk, 0);
+ rd->v.type = RTE_BPF_ARG_PTR;
+ rd->v.size = rs.v.buf_size;
+ } else if (rs.u.max == offsetof(struct rte_mbuf, data_off)) {
+ eval_fill_imm(rd, msk, RTE_PKTMBUF_HEADROOM);
+ rd->v.type = RTE_BPF_ARG_RAW;
+ } else {
+ eval_max_load(rd, msk);
+ rd->v.type = RTE_BPF_ARG_RAW;
+ }
+
+ /* pointer to raw data */
+ } else {
+ eval_max_load(rd, msk);
+ rd->v.type = RTE_BPF_ARG_RAW;
+ }
+
+ return NULL;
+}
+
+static const char *
+eval_mbuf_store(const struct bpf_reg_val *rv, uint32_t opsz)
+{
+ uint32_t i;
+
+ static const struct {
+ size_t off;
+ size_t sz;
+ } mbuf_ro_fileds[] = {
+ { .off = offsetof(struct rte_mbuf, buf_addr), },
+ { .off = offsetof(struct rte_mbuf, refcnt), },
+ { .off = offsetof(struct rte_mbuf, nb_segs), },
+ { .off = offsetof(struct rte_mbuf, buf_len), },
+ { .off = offsetof(struct rte_mbuf, pool), },
+ { .off = offsetof(struct rte_mbuf, next), },
+ { .off = offsetof(struct rte_mbuf, priv_size), },
+ };
+
+ for (i = 0; i != RTE_DIM(mbuf_ro_fileds) &&
+ (mbuf_ro_fileds[i].off + mbuf_ro_fileds[i].sz <=
+ rv->u.max || rv->u.max + opsz <= mbuf_ro_fileds[i].off);
+ i++)
+ ;
+
+ if (i != RTE_DIM(mbuf_ro_fileds))
+ return "store to the read-only mbuf field";
+
+ return NULL;
+
+}
+
+static const char *
+eval_store(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint32_t opsz;
+ uint64_t msk;
+ const char *err;
+ struct bpf_eval_state *st;
+ struct bpf_reg_val rd, rs, *sv;
+
+ opsz = bpf_size(BPF_SIZE(ins->code));
+ msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t);
+
+ st = bvf->evst;
+ rd = st->rv[ins->dst_reg];
+
+ if (BPF_CLASS(ins->code) == BPF_STX) {
+ rs = st->rv[ins->src_reg];
+ eval_apply_mask(&rs, msk);
+ } else
+ eval_fill_imm(&rs, msk, ins->imm);
+
+ err = eval_defined(NULL, &rs);
+ if (err != NULL)
+ return err;
+
+ err = eval_ptr(bvf, &rd, opsz, 1, ins->off);
+ if (err != NULL)
+ return err;
+
+ if (rd.v.type == RTE_BPF_ARG_PTR_STACK) {
+
+ sv = st->sv + rd.u.max / sizeof(uint64_t);
+ if (BPF_CLASS(ins->code) == BPF_STX &&
+ BPF_MODE(ins->code) == EBPF_XADD)
+ eval_max_bound(sv, msk);
+ else
+ *sv = rs;
+
+ /* pointer to mbuf */
+ } else if (rd.v.type == RTE_BPF_ARG_PTR_MBUF) {
+ err = eval_mbuf_store(&rd, opsz);
+ if (err != NULL)
+ return err;
+ }
+
+ return NULL;
+}
+
+static const char *
+eval_func_arg(struct bpf_verifier *bvf, const struct rte_bpf_arg *arg,
+ struct bpf_reg_val *rv)
+{
+ uint32_t i, n;
+ struct bpf_eval_state *st;
+ const char *err;
+
+ st = bvf->evst;
+
+ if (rv->v.type == RTE_BPF_ARG_UNDEF)
+ return "Undefined argument type";
+
+ if (arg->type != rv->v.type &&
+ arg->type != RTE_BPF_ARG_RAW &&
+ (arg->type != RTE_BPF_ARG_PTR ||
+ RTE_BPF_ARG_PTR_TYPE(rv->v.type) == 0))
+ return "Invalid argument type";
+
+ err = NULL;
+
+ /* argument is a pointer */
+ if (RTE_BPF_ARG_PTR_TYPE(arg->type) != 0) {
+
+ err = eval_ptr(bvf, rv, arg->size, 1, 0);
+
+ /*
+ * pointer to the variable on the stack is passed
+ * as an argument, mark stack space it occupies as initialized.
+ */
+ if (err == NULL && rv->v.type == RTE_BPF_ARG_PTR_STACK) {
+
+ i = rv->u.max / sizeof(uint64_t);
+ n = i + arg->size / sizeof(uint64_t);
+ while (i != n) {
+ eval_fill_max_bound(st->sv + i, UINT64_MAX);
+ i++;
+ };
+ }
+ }
+
+ return err;
+}
+
+static const char *
+eval_call(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint64_t msk;
+ uint32_t i, idx;
+ struct bpf_reg_val *rv;
+ const struct rte_bpf_xsym *xsym;
+ const char *err;
+
+ idx = ins->imm;
+
+ if (idx >= bvf->prm->nb_xsym ||
+ bvf->prm->xsym[idx].type != RTE_BPF_XTYPE_FUNC)
+ return "invalid external function index";
+
+ /* for now don't support function calls on 32 bit platform */
+ if (sizeof(uint64_t) != sizeof(uintptr_t))
+ return "function calls are supported only for 64 bit apps";
+
+ xsym = bvf->prm->xsym + idx;
+
+ /* evaluate function arguments */
+ err = NULL;
+ for (i = 0; i != xsym->func.nb_args && err == NULL; i++) {
+ err = eval_func_arg(bvf, xsym->func.args + i,
+ bvf->evst->rv + EBPF_REG_1 + i);
+ }
+
+ /* R1-R5 argument/scratch registers */
+ for (i = EBPF_REG_1; i != EBPF_REG_6; i++)
+ bvf->evst->rv[i].v.type = RTE_BPF_ARG_UNDEF;
+
+ /* update return value */
+
+ rv = bvf->evst->rv + EBPF_REG_0;
+ rv->v = xsym->func.ret;
+ msk = (rv->v.type == RTE_BPF_ARG_RAW) ?
+ RTE_LEN2MASK(rv->v.size * CHAR_BIT, uint64_t) : UINTPTR_MAX;
+ eval_max_bound(rv, msk);
+ rv->mask = msk;
+
+ return err;
+}
+
+static void
+eval_jeq_jne(struct bpf_reg_val *trd, struct bpf_reg_val *trs)
+{
+ /* sreg is constant */
+ if (trs->u.min == trs->u.max) {
+ trd->u = trs->u;
+ /* dreg is constant */
+ } else if (trd->u.min == trd->u.max) {
+ trs->u = trd->u;
+ } else {
+ trd->u.max = RTE_MIN(trd->u.max, trs->u.max);
+ trd->u.min = RTE_MAX(trd->u.min, trs->u.min);
+ trs->u = trd->u;
+ }
+
+ /* sreg is constant */
+ if (trs->s.min == trs->s.max) {
+ trd->s = trs->s;
+ /* dreg is constant */
+ } else if (trd->s.min == trd->s.max) {
+ trs->s = trd->s;
+ } else {
+ trd->s.max = RTE_MIN(trd->s.max, trs->s.max);
+ trd->s.min = RTE_MAX(trd->s.min, trs->s.min);
+ trs->s = trd->s;
+ }
+}
+
+static void
+eval_jgt_jle(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+ struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+ frd->u.max = RTE_MIN(frd->u.max, frs->u.min);
+ trd->u.min = RTE_MAX(trd->u.min, trs->u.min + 1);
+}
+
+static void
+eval_jlt_jge(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+ struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+ frd->u.min = RTE_MAX(frd->u.min, frs->u.min);
+ trd->u.max = RTE_MIN(trd->u.max, trs->u.max - 1);
+}
+
+static void
+eval_jsgt_jsle(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+ struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+ frd->s.max = RTE_MIN(frd->s.max, frs->s.min);
+ trd->s.min = RTE_MAX(trd->s.min, trs->s.min + 1);
+}
+
+static void
+eval_jslt_jsge(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+ struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+ frd->s.min = RTE_MAX(frd->s.min, frs->s.min);
+ trd->s.max = RTE_MIN(trd->s.max, trs->s.max - 1);
+}
+
+static const char *
+eval_jcc(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint32_t op;
+ const char *err;
+ struct bpf_eval_state *fst, *tst;
+ struct bpf_reg_val *frd, *frs, *trd, *trs;
+ struct bpf_reg_val rvf, rvt;
+
+ tst = bvf->evst;
+ fst = bvf->evin->evst;
+
+ frd = fst->rv + ins->dst_reg;
+ trd = tst->rv + ins->dst_reg;
+
+ if (BPF_SRC(ins->code) == BPF_X) {
+ frs = fst->rv + ins->src_reg;
+ trs = tst->rv + ins->src_reg;
+ } else {
+ frs = &rvf;
+ trs = &rvt;
+ eval_fill_imm(frs, UINT64_MAX, ins->imm);
+ eval_fill_imm(trs, UINT64_MAX, ins->imm);
+ }
+
+ err = eval_defined(trd, trs);
+ if (err != NULL)
+ return err;
+
+ op = BPF_OP(ins->code);
+
+ if (op == BPF_JEQ)
+ eval_jeq_jne(trd, trs);
+ else if (op == EBPF_JNE)
+ eval_jeq_jne(frd, frs);
+ else if (op == BPF_JGT)
+ eval_jgt_jle(trd, trs, frd, frs);
+ else if (op == EBPF_JLE)
+ eval_jgt_jle(frd, frs, trd, trs);
+ else if (op == EBPF_JLT)
+ eval_jlt_jge(trd, trs, frd, frs);
+ else if (op == BPF_JGE)
+ eval_jlt_jge(frd, frs, trd, trs);
+ else if (op == EBPF_JSGT)
+ eval_jsgt_jsle(trd, trs, frd, frs);
+ else if (op == EBPF_JSLE)
+ eval_jsgt_jsle(frd, frs, trd, trs);
+ else if (op == EBPF_JLT)
+ eval_jslt_jsge(trd, trs, frd, frs);
+ else if (op == EBPF_JSGE)
+ eval_jslt_jsge(frd, frs, trd, trs);
+
+ return NULL;
+}
+
+/*
+ * validate parameters for each instruction type.
+ */
+static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
+ /* ALU IMM 32-bit instructions */
+ [(BPF_ALU | BPF_ADD | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_SUB | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_AND | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_OR | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_LSH | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_RSH | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_XOR | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_MUL | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | EBPF_MOV | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_DIV | BPF_K)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 1, .max = UINT32_MAX},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_MOD | BPF_K)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 1, .max = UINT32_MAX},
+ .eval = eval_alu,
+ },
+ /* ALU IMM 64-bit instructions */
+ [(EBPF_ALU64 | BPF_ADD | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_SUB | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_AND | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_OR | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_LSH | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_RSH | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | EBPF_ARSH | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_XOR | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_MUL | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | EBPF_MOV | BPF_K)] = {
+ .mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_DIV | BPF_K)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 1, .max = UINT32_MAX},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_MOD | BPF_K)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 1, .max = UINT32_MAX},
+ .eval = eval_alu,
+ },
+ /* ALU REG 32-bit instructions */
+ [(BPF_ALU | BPF_ADD | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_SUB | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_AND | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_OR | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_LSH | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_RSH | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_XOR | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_MUL | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_DIV | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_MOD | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | EBPF_MOV | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | BPF_NEG)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(BPF_ALU | EBPF_END | EBPF_TO_BE)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 16, .max = 64},
+ .check = check_alu_bele,
+ .eval = eval_bele,
+ },
+ [(BPF_ALU | EBPF_END | EBPF_TO_LE)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 16, .max = 64},
+ .check = check_alu_bele,
+ .eval = eval_bele,
+ },
+ /* ALU REG 64-bit instructions */
+ [(EBPF_ALU64 | BPF_ADD | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_SUB | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_AND | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_OR | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_LSH | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_RSH | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | EBPF_ARSH | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_XOR | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_MUL | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_DIV | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_MOD | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | EBPF_MOV | BPF_X)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ [(EBPF_ALU64 | BPF_NEG)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
+ },
+ /* load instructions */
+ [(BPF_LDX | BPF_MEM | BPF_B)] = {
+ .mask = {. dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_load,
+ },
+ [(BPF_LDX | BPF_MEM | BPF_H)] = {
+ .mask = {. dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_load,
+ },
+ [(BPF_LDX | BPF_MEM | BPF_W)] = {
+ .mask = {. dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_load,
+ },
+ [(BPF_LDX | BPF_MEM | EBPF_DW)] = {
+ .mask = {. dreg = WRT_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_load,
+ },
+ /* load 64 bit immediate value */
+ [(BPF_LD | BPF_IMM | EBPF_DW)] = {
+ .mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_ld_imm64,
+ },
+ /* store REG instructions */
+ [(BPF_STX | BPF_MEM | BPF_B)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_store,
+ },
+ [(BPF_STX | BPF_MEM | BPF_H)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_store,
+ },
+ [(BPF_STX | BPF_MEM | BPF_W)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_store,
+ },
+ [(BPF_STX | BPF_MEM | EBPF_DW)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_store,
+ },
+ /* atomic add instructions */
+ [(BPF_STX | EBPF_XADD | BPF_W)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_store,
+ },
+ [(BPF_STX | EBPF_XADD | EBPF_DW)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_store,
+ },
+ /* store IMM instructions */
+ [(BPF_ST | BPF_MEM | BPF_B)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_store,
+ },
+ [(BPF_ST | BPF_MEM | BPF_H)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_store,
+ },
+ [(BPF_ST | BPF_MEM | BPF_W)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_store,
+ },
+ [(BPF_ST | BPF_MEM | EBPF_DW)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_store,
+ },
+ /* jump instruction */
+ [(BPF_JMP | BPF_JA)] = {
+ .mask = { .dreg = ZERO_REG, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ },
+ /* jcc IMM instructions */
+ [(BPF_JMP | BPF_JEQ | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JNE | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | BPF_JGT | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JLT | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | BPF_JGE | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JLE | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JSGT | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JSLT | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JSGE | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JSLE | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | BPF_JSET | BPF_K)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
+ },
+ /* jcc REG instructions */
+ [(BPF_JMP | BPF_JEQ | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JNE | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | BPF_JGT | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JLT | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | BPF_JGE | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JLE | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JSGT | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JSLT | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ },
+ [(BPF_JMP | EBPF_JSGE | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | EBPF_JSLE | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ [(BPF_JMP | BPF_JSET | BPF_X)] = {
+ .mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
+ .off = { .min = 0, .max = UINT16_MAX},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
+ },
+ /* call instruction */
+ [(BPF_JMP | EBPF_CALL)] = {
+ .mask = { .dreg = ZERO_REG, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_call,
+ },
+ /* ret instruction */
+ [(BPF_JMP | EBPF_EXIT)] = {
+ .mask = { .dreg = ZERO_REG, .sreg = ZERO_REG},
+ .off = { .min = 0, .max = 0},
+ .imm = { .min = 0, .max = 0},
+ .eval = eval_exit,
+ },
+};
+
+/*
+ * make sure that instruction syntax is valid,
+ * and it fields don't violate partciular instrcution type restrictions.
+ */
+static const char *
+check_syntax(const struct ebpf_insn *ins)
+{
+
+ uint8_t op;
+ uint16_t off;
+ uint32_t imm;
+
+ op = ins->code;
+
+ if (ins_chk[op].mask.dreg == 0)
+ return "invalid opcode";
+
+ if ((ins_chk[op].mask.dreg & 1 << ins->dst_reg) == 0)
+ return "invalid dst-reg field";
+
+ if ((ins_chk[op].mask.sreg & 1 << ins->src_reg) == 0)
+ return "invalid src-reg field";
+
+ off = ins->off;
+ if (ins_chk[op].off.min > off || ins_chk[op].off.max < off)
+ return "invalid off field";
+
+ imm = ins->imm;
+ if (ins_chk[op].imm.min > imm || ins_chk[op].imm.max < imm)
+ return "invalid imm field";
+
+ if (ins_chk[op].check != NULL)
+ return ins_chk[op].check(ins);
+
+ return NULL;
+}
+
+/*
+ * helper function, return instruction index for the given node.
+ */
+static uint32_t
+get_node_idx(const struct bpf_verifier *bvf, const struct inst_node *node)
+{
+ return node - bvf->in;
+}
+
+/*
+ * helper function, used to walk through constructed CFG.
+ */
+static struct inst_node *
+get_next_node(struct bpf_verifier *bvf, struct inst_node *node)
+{
+ uint32_t ce, ne, dst;
+
+ ne = node->nb_edge;
+ ce = node->cur_edge;
+ if (ce == ne)
+ return NULL;
+
+ node->cur_edge++;
+ dst = node->edge_dest[ce];
+ return bvf->in + dst;
+}
+
+static void
+set_node_colour(struct bpf_verifier *bvf, struct inst_node *node,
+ uint32_t new)
+{
+ uint32_t prev;
+
+ prev = node->colour;
+ node->colour = new;
+
+ bvf->node_colour[prev]--;
+ bvf->node_colour[new]++;
+}
+
+/*
+ * helper function, add new edge between two nodes.
+ */
+static int
+add_edge(struct bpf_verifier *bvf, struct inst_node *node, uint32_t nidx)
+{
+ uint32_t ne;
+
+ if (nidx > bvf->prm->nb_ins) {
+ RTE_BPF_LOG(ERR, "%s: program boundary violation at pc: %u, "
+ "next pc: %u\n",
+ __func__, get_node_idx(bvf, node), nidx);
+ return -EINVAL;
+ }
+
+ ne = node->nb_edge;
+ if (ne >= RTE_DIM(node->edge_dest)) {
+ RTE_BPF_LOG(ERR, "%s: internal error at pc: %u\n",
+ __func__, get_node_idx(bvf, node));
+ return -EINVAL;
+ }
+
+ node->edge_dest[ne] = nidx;
+ node->nb_edge = ne + 1;
+ return 0;
+}
+
+/*
+ * helper function, determine type of edge between two nodes.
+ */
+static void
+set_edge_type(struct bpf_verifier *bvf, struct inst_node *node,
+ const struct inst_node *next)
+{
+ uint32_t ce, clr, type;
+
+ ce = node->cur_edge - 1;
+ clr = next->colour;
+
+ type = UNKNOWN_EDGE;
+
+ if (clr == WHITE)
+ type = TREE_EDGE;
+ else if (clr == GREY)
+ type = BACK_EDGE;
+ else if (clr == BLACK)
+ /*
+ * in fact it could be either direct or cross edge,
+ * but for now, we don't need to distinguish between them.
+ */
+ type = CROSS_EDGE;
+
+ node->edge_type[ce] = type;
+ bvf->edge_type[type]++;
+}
+
+static struct inst_node *
+get_prev_node(struct bpf_verifier *bvf, struct inst_node *node)
+{
+ return bvf->in + node->prev_node;
+}
+
+/*
+ * Depth-First Search (DFS) through previously constructed
+ * Control Flow Graph (CFG).
+ * Information collected at this path would be used later
+ * to determine is there any loops, and/or unreachable instructions.
+ */
+static void
+dfs(struct bpf_verifier *bvf)
+{
+ struct inst_node *next, *node;
+
+ node = bvf->in;
+ while (node != NULL) {
+
+ if (node->colour == WHITE)
+ set_node_colour(bvf, node, GREY);
+
+ if (node->colour == GREY) {
+
+ /* find next unprocessed child node */
+ do {
+ next = get_next_node(bvf, node);
+ if (next == NULL)
+ break;
+ set_edge_type(bvf, node, next);
+ } while (next->colour != WHITE);
+
+ if (next != NULL) {
+ /* proceed with next child */
+ next->prev_node = get_node_idx(bvf, node);
+ node = next;
+ } else {
+ /*
+ * finished with current node and all it's kids,
+ * proceed with parent
+ */
+ set_node_colour(bvf, node, BLACK);
+ node->cur_edge = 0;
+ node = get_prev_node(bvf, node);
+ }
+ } else
+ node = NULL;
+ }
+}
+
+/*
+ * report unreachable instructions.
+ */
+static void
+log_unreachable(const struct bpf_verifier *bvf)
+{
+ uint32_t i;
+ struct inst_node *node;
+ const struct ebpf_insn *ins;
+
+ for (i = 0; i != bvf->prm->nb_ins; i++) {
+
+ node = bvf->in + i;
+ ins = bvf->prm->ins + i;
+
+ if (node->colour == WHITE &&
+ ins->code != (BPF_LD | BPF_IMM | EBPF_DW))
+ RTE_BPF_LOG(ERR, "unreachable code at pc: %u;\n", i);
+ }
+}
+
+/*
+ * report loops detected.
+ */
+static void
+log_loop(const struct bpf_verifier *bvf)
+{
+ uint32_t i, j;
+ struct inst_node *node;
+
+ for (i = 0; i != bvf->prm->nb_ins; i++) {
+
+ node = bvf->in + i;
+ if (node->colour != BLACK)
+ continue;
+
+ for (j = 0; j != node->nb_edge; j++) {
+ if (node->edge_type[j] == BACK_EDGE)
+ RTE_BPF_LOG(ERR,
+ "loop at pc:%u --> pc:%u;\n",
+ i, node->edge_dest[j]);
+ }
+ }
+}
+
+/*
+ * First pass goes though all instructions in the set, checks that each
+ * instruction is a valid one (correct syntax, valid field values, etc.)
+ * and constructs control flow graph (CFG).
+ * Then deapth-first search is performed over the constructed graph.
+ * Programs with unreachable instructions and/or loops will be rejected.
+ */
+static int
+validate(struct bpf_verifier *bvf)
+{
+ int32_t rc;
+ uint32_t i;
+ struct inst_node *node;
+ const struct ebpf_insn *ins;
+ const char *err;
+
+ rc = 0;
+ for (i = 0; i < bvf->prm->nb_ins; i++) {
+
+ ins = bvf->prm->ins + i;
+ node = bvf->in + i;
+
+ err = check_syntax(ins);
+ if (err != 0) {
+ RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n",
+ __func__, err, i);
+ rc |= -EINVAL;
+ }
+
+ /*
+ * construct CFG, jcc nodes have to outgoing edges,
+ * 'exit' nodes - none, all others nodes have exaclty one
+ * outgoing edge.
+ */
+ switch (ins->code) {
+ case (BPF_JMP | EBPF_EXIT):
+ break;
+ case (BPF_JMP | BPF_JEQ | BPF_K):
+ case (BPF_JMP | EBPF_JNE | BPF_K):
+ case (BPF_JMP | BPF_JGT | BPF_K):
+ case (BPF_JMP | EBPF_JLT | BPF_K):
+ case (BPF_JMP | BPF_JGE | BPF_K):
+ case (BPF_JMP | EBPF_JLE | BPF_K):
+ case (BPF_JMP | EBPF_JSGT | BPF_K):
+ case (BPF_JMP | EBPF_JSLT | BPF_K):
+ case (BPF_JMP | EBPF_JSGE | BPF_K):
+ case (BPF_JMP | EBPF_JSLE | BPF_K):
+ case (BPF_JMP | BPF_JSET | BPF_K):
+ case (BPF_JMP | BPF_JEQ | BPF_X):
+ case (BPF_JMP | EBPF_JNE | BPF_X):
+ case (BPF_JMP | BPF_JGT | BPF_X):
+ case (BPF_JMP | EBPF_JLT | BPF_X):
+ case (BPF_JMP | BPF_JGE | BPF_X):
+ case (BPF_JMP | EBPF_JLE | BPF_X):
+ case (BPF_JMP | EBPF_JSGT | BPF_X):
+ case (BPF_JMP | EBPF_JSLT | BPF_X):
+ case (BPF_JMP | EBPF_JSGE | BPF_X):
+ case (BPF_JMP | EBPF_JSLE | BPF_X):
+ case (BPF_JMP | BPF_JSET | BPF_X):
+ rc |= add_edge(bvf, node, i + ins->off + 1);
+ rc |= add_edge(bvf, node, i + 1);
+ bvf->nb_jcc_nodes++;
+ break;
+ case (BPF_JMP | BPF_JA):
+ rc |= add_edge(bvf, node, i + ins->off + 1);
+ break;
+ /* load 64 bit immediate value */
+ case (BPF_LD | BPF_IMM | EBPF_DW):
+ rc |= add_edge(bvf, node, i + 2);
+ i++;
+ break;
+ default:
+ rc |= add_edge(bvf, node, i + 1);
+ break;
+ }
+
+ bvf->nb_nodes++;
+ bvf->node_colour[WHITE]++;
+ }
+
+ if (rc != 0)
+ return rc;
+
+ dfs(bvf);
+
+ RTE_BPF_LOG(DEBUG, "%s(%p) stats:\n"
+ "nb_nodes=%u;\n"
+ "nb_jcc_nodes=%u;\n"
+ "node_color={[WHITE]=%u, [GREY]=%u,, [BLACK]=%u};\n"
+ "edge_type={[UNKNOWN]=%u, [TREE]=%u, [BACK]=%u, [CROSS]=%u};\n",
+ __func__, bvf,
+ bvf->nb_nodes,
+ bvf->nb_jcc_nodes,
+ bvf->node_colour[WHITE], bvf->node_colour[GREY],
+ bvf->node_colour[BLACK],
+ bvf->edge_type[UNKNOWN_EDGE], bvf->edge_type[TREE_EDGE],
+ bvf->edge_type[BACK_EDGE], bvf->edge_type[CROSS_EDGE]);
+
+ if (bvf->node_colour[BLACK] != bvf->nb_nodes) {
+ RTE_BPF_LOG(ERR, "%s(%p) unreachable instructions;\n",
+ __func__, bvf);
+ log_unreachable(bvf);
+ return -EINVAL;
+ }
+
+ if (bvf->node_colour[GREY] != 0 || bvf->node_colour[WHITE] != 0 ||
+ bvf->edge_type[UNKNOWN_EDGE] != 0) {
+ RTE_BPF_LOG(ERR, "%s(%p) DFS internal error;\n",
+ __func__, bvf);
+ return -EINVAL;
+ }
+
+ if (bvf->edge_type[BACK_EDGE] != 0) {
+ RTE_BPF_LOG(ERR, "%s(%p) loops detected;\n",
+ __func__, bvf);
+ log_loop(bvf);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * helper functions get/free eval states.
+ */
+static struct bpf_eval_state *
+pull_eval_state(struct bpf_verifier *bvf)
+{
+ uint32_t n;
+
+ n = bvf->evst_pool.cur;
+ if (n == bvf->evst_pool.num)
+ return NULL;
+
+ bvf->evst_pool.cur = n + 1;
+ return bvf->evst_pool.ent + n;
+}
+
+static void
+push_eval_state(struct bpf_verifier *bvf)
+{
+ bvf->evst_pool.cur--;
+}
+
+static void
+evst_pool_fini(struct bpf_verifier *bvf)
+{
+ bvf->evst = NULL;
+ free(bvf->evst_pool.ent);
+ memset(&bvf->evst_pool, 0, sizeof(bvf->evst_pool));
+}
+
+static int
+evst_pool_init(struct bpf_verifier *bvf)
+{
+ uint32_t n;
+
+ n = bvf->nb_jcc_nodes + 1;
+
+ bvf->evst_pool.ent = calloc(n, sizeof(bvf->evst_pool.ent[0]));
+ if (bvf->evst_pool.ent == NULL)
+ return -ENOMEM;
+
+ bvf->evst_pool.num = n;
+ bvf->evst_pool.cur = 0;
+
+ bvf->evst = pull_eval_state(bvf);
+ return 0;
+}
+
+/*
+ * Save current eval state.
+ */
+static int
+save_eval_state(struct bpf_verifier *bvf, struct inst_node *node)
+{
+ struct bpf_eval_state *st;
+
+ /* get new eval_state for this node */
+ st = pull_eval_state(bvf);
+ if (st == NULL) {
+ RTE_BPF_LOG(ERR,
+ "%s: internal error (out of space) at pc: %u\n",
+ __func__, get_node_idx(bvf, node));
+ return -ENOMEM;
+ }
+
+ /* make a copy of current state */
+ memcpy(st, bvf->evst, sizeof(*st));
+
+ /* swap current state with new one */
+ node->evst = bvf->evst;
+ bvf->evst = st;
+
+ RTE_BPF_LOG(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;\n",
+ __func__, bvf, get_node_idx(bvf, node), node->evst, bvf->evst);
+
+ return 0;
+}
+
+/*
+ * Restore previous eval state and mark current eval state as free.
+ */
+static void
+restore_eval_state(struct bpf_verifier *bvf, struct inst_node *node)
+{
+ RTE_BPF_LOG(DEBUG, "%s(bvf=%p,node=%u) old/new states: %p/%p;\n",
+ __func__, bvf, get_node_idx(bvf, node), bvf->evst, node->evst);
+
+ bvf->evst = node->evst;
+ node->evst = NULL;
+ push_eval_state(bvf);
+}
+
+static void
+log_eval_state(const struct bpf_verifier *bvf, const struct ebpf_insn *ins,
+ uint32_t pc, int32_t loglvl)
+{
+ const struct bpf_eval_state *st;
+ const struct bpf_reg_val *rv;
+
+ rte_log(loglvl, rte_bpf_logtype, "%s(pc=%u):\n", __func__, pc);
+
+ st = bvf->evst;
+ rv = st->rv + ins->dst_reg;
+
+ rte_log(loglvl, rte_bpf_logtype,
+ "r%u={\n"
+ "\tv={type=%u, size=%zu},\n"
+ "\tmask=0x%" PRIx64 ",\n"
+ "\tu={min=0x%" PRIx64 ", max=0x%" PRIx64 "},\n"
+ "\ts={min=%" PRId64 ", max=%" PRId64 "},\n"
+ "};\n",
+ ins->dst_reg,
+ rv->v.type, rv->v.size,
+ rv->mask,
+ rv->u.min, rv->u.max,
+ rv->s.min, rv->s.max);
+}
+
+/*
+ * Do second pass through CFG and try to evaluate instructions
+ * via each possible path.
+ * Right now evaluation functionality is quite limited.
+ * Still need to add extra checks for:
+ * - use/return uninitialized registers.
+ * - use uninitialized data from the stack.
+ * - memory boundaries violation.
+ */
+static int
+evaluate(struct bpf_verifier *bvf)
+{
+ int32_t rc;
+ uint32_t idx, op;
+ const char *err;
+ const struct ebpf_insn *ins;
+ struct inst_node *next, *node;
+
+ /* initial state of frame pointer */
+ static const struct bpf_reg_val rvfp = {
+ .v = {
+ .type = RTE_BPF_ARG_PTR_STACK,
+ .size = MAX_BPF_STACK_SIZE,
+ },
+ .mask = UINT64_MAX,
+ .u = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE},
+ .s = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE},
+ };
+
+ bvf->evst->rv[EBPF_REG_1].v = bvf->prm->prog_arg;
+ bvf->evst->rv[EBPF_REG_1].mask = UINT64_MAX;
+ if (bvf->prm->prog_arg.type == RTE_BPF_ARG_RAW)
+ eval_max_bound(bvf->evst->rv + EBPF_REG_1, UINT64_MAX);
+
+ bvf->evst->rv[EBPF_REG_10] = rvfp;
+
+ ins = bvf->prm->ins;
+ node = bvf->in;
+ next = node;
+ rc = 0;
+
+ while (node != NULL && rc == 0) {
+
+ /*
+ * current node evaluation, make sure we evaluate
+ * each node only once.
+ */
+ if (next != NULL) {
+
+ bvf->evin = node;
+ idx = get_node_idx(bvf, node);
+ op = ins[idx].code;
+
+ /* for jcc node make a copy of evaluatoion state */
+ if (node->nb_edge > 1)
+ rc |= save_eval_state(bvf, node);
+
+ if (ins_chk[op].eval != NULL && rc == 0) {
+ err = ins_chk[op].eval(bvf, ins + idx);
+ if (err != NULL) {
+ RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n",
+ __func__, err, idx);
+ rc = -EINVAL;
+ }
+ }
+
+ log_eval_state(bvf, ins + idx, idx, RTE_LOG_DEBUG);
+ bvf->evin = NULL;
+ }
+
+ /* proceed through CFG */
+ next = get_next_node(bvf, node);
+ if (next != NULL) {
+
+ /* proceed with next child */
+ if (node->cur_edge == node->nb_edge &&
+ node->evst != NULL)
+ restore_eval_state(bvf, node);
+
+ next->prev_node = get_node_idx(bvf, node);
+ node = next;
+ } else {
+ /*
+ * finished with current node and all it's kids,
+ * proceed with parent
+ */
+ node->cur_edge = 0;
+ node = get_prev_node(bvf, node);
+
+ /* finished */
+ if (node == bvf->in)
+ node = NULL;
+ }
+ }
+
+ return rc;
+}
+
+int
+bpf_validate(struct rte_bpf *bpf)
+{
+ int32_t rc;
+ struct bpf_verifier bvf;
+
+ /* check input argument type, don't allow mbuf ptr on 32-bit */
+ if (bpf->prm.prog_arg.type != RTE_BPF_ARG_RAW &&
+ bpf->prm.prog_arg.type != RTE_BPF_ARG_PTR &&
+ (sizeof(uint64_t) != sizeof(uintptr_t) ||
+ bpf->prm.prog_arg.type != RTE_BPF_ARG_PTR_MBUF)) {
+ RTE_BPF_LOG(ERR, "%s: unsupported argument type\n", __func__);
+ return -ENOTSUP;
+ }
+
+ memset(&bvf, 0, sizeof(bvf));
+ bvf.prm = &bpf->prm;
+ bvf.in = calloc(bpf->prm.nb_ins, sizeof(bvf.in[0]));
+ if (bvf.in == NULL)
+ return -ENOMEM;
+
+ rc = validate(&bvf);
+
+ if (rc == 0) {
+ rc = evst_pool_init(&bvf);
+ if (rc == 0)
+ rc = evaluate(&bvf);
+ evst_pool_fini(&bvf);
+ }
+
+ free(bvf.in);
+
+ /* copy collected info */
+ if (rc == 0)
+ bpf->stack_sz = bvf.stack_sz;
+
+ return rc;
+}
diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build
new file mode 100644
index 00000000..bc0cd78f
--- /dev/null
+++ b/lib/librte_bpf/meson.build
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+allow_experimental_apis = true
+sources = files('bpf.c',
+ 'bpf_exec.c',
+ 'bpf_load.c',
+ 'bpf_pkt.c',
+ 'bpf_validate.c')
+
+if arch_subdir == 'x86' and cc.sizeof('void *') == 8
+ sources += files('bpf_jit_x86.c')
+endif
+
+install_headers = files('bpf_def.h',
+ 'rte_bpf.h',
+ 'rte_bpf_ethdev.h')
+
+deps += ['mbuf', 'net', 'ethdev']
+
+dep = cc.find_library('elf', required: false)
+if dep.found() == true and cc.has_header('libelf.h', dependencies: dep)
+ sources += files('bpf_load_elf.c')
+ ext_deps += dep
+endif
diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h
new file mode 100644
index 00000000..ad62ef2c
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_H_
+#define _RTE_BPF_H_
+
+/**
+ * @file rte_bpf.h
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * RTE BPF support.
+ * librte_bpf provides a framework to load and execute eBPF bytecode
+ * inside user-space dpdk based applications.
+ * It supports basic set of features from eBPF spec
+ * (https://www.kernel.org/doc/Documentation/networking/filter.txt).
+ */
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <bpf_def.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Possible types for function/BPF program arguments.
+ */
+enum rte_bpf_arg_type {
+ RTE_BPF_ARG_UNDEF, /**< undefined */
+ RTE_BPF_ARG_RAW, /**< scalar value */
+ RTE_BPF_ARG_PTR = 0x10, /**< pointer to data buffer */
+ RTE_BPF_ARG_PTR_MBUF, /**< pointer to rte_mbuf */
+ RTE_BPF_ARG_PTR_STACK,
+};
+
+/**
+ * function argument information
+ */
+struct rte_bpf_arg {
+ enum rte_bpf_arg_type type;
+ /**
+ * for ptr type - max size of data buffer it points to
+ * for raw type - the size (in bytes) of the value
+ */
+ size_t size;
+ size_t buf_size;
+ /**< for mbuf ptr type, max size of rte_mbuf data buffer */
+};
+
+/**
+ * determine is argument a pointer
+ */
+#define RTE_BPF_ARG_PTR_TYPE(x) ((x) & RTE_BPF_ARG_PTR)
+
+/**
+ * Possible types for external symbols.
+ */
+enum rte_bpf_xtype {
+ RTE_BPF_XTYPE_FUNC, /**< function */
+ RTE_BPF_XTYPE_VAR, /**< variable */
+ RTE_BPF_XTYPE_NUM
+};
+
+/**
+ * Definition for external symbols available in the BPF program.
+ */
+struct rte_bpf_xsym {
+ const char *name; /**< name */
+ enum rte_bpf_xtype type; /**< type */
+ union {
+ struct {
+ uint64_t (*val)(uint64_t, uint64_t, uint64_t,
+ uint64_t, uint64_t);
+ uint32_t nb_args;
+ struct rte_bpf_arg args[EBPF_FUNC_MAX_ARGS];
+ /**< Function arguments descriptions. */
+ struct rte_bpf_arg ret; /**< function return value. */
+ } func;
+ struct {
+ void *val; /**< actual memory location */
+ struct rte_bpf_arg desc; /**< type, size, etc. */
+ } var; /**< external variable */
+ };
+};
+
+/**
+ * Input parameters for loading eBPF code.
+ */
+struct rte_bpf_prm {
+ const struct ebpf_insn *ins; /**< array of eBPF instructions */
+ uint32_t nb_ins; /**< number of instructions in ins */
+ const struct rte_bpf_xsym *xsym;
+ /**< array of external symbols that eBPF code is allowed to reference */
+ uint32_t nb_xsym; /**< number of elements in xsym */
+ struct rte_bpf_arg prog_arg; /**< eBPF program input arg description */
+};
+
+/**
+ * Information about compiled into native ISA eBPF code.
+ */
+struct rte_bpf_jit {
+ uint64_t (*func)(void *); /**< JIT-ed native code */
+ size_t sz; /**< size of JIT-ed code */
+};
+
+struct rte_bpf;
+
+/**
+ * De-allocate all memory used by this eBPF execution context.
+ *
+ * @param bpf
+ * BPF handle to destroy.
+ */
+void __rte_experimental
+rte_bpf_destroy(struct rte_bpf *bpf);
+
+/**
+ * Create a new eBPF execution context and load given BPF code into it.
+ *
+ * @param prm
+ * Parameters used to create and initialise the BPF exeution context.
+ * @return
+ * BPF handle that is used in future BPF operations,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOMEM - can't reserve enough memory
+ */
+struct rte_bpf * __rte_experimental
+rte_bpf_load(const struct rte_bpf_prm *prm);
+
+/**
+ * Create a new eBPF execution context and load BPF code from given ELF
+ * file into it.
+ *
+ * @param prm
+ * Parameters used to create and initialise the BPF exeution context.
+ * @param fname
+ * Pathname for a ELF file.
+ * @param sname
+ * Name of the executable section within the file to load.
+ * @return
+ * BPF handle that is used in future BPF operations,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOMEM - can't reserve enough memory
+ */
+struct rte_bpf * __rte_experimental
+rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
+ const char *sname);
+/**
+ * Execute given BPF bytecode.
+ *
+ * @param bpf
+ * handle for the BPF code to execute.
+ * @param ctx
+ * pointer to input context.
+ * @return
+ * BPF execution return value.
+ */
+uint64_t __rte_experimental
+rte_bpf_exec(const struct rte_bpf *bpf, void *ctx);
+
+/**
+ * Execute given BPF bytecode over a set of input contexts.
+ *
+ * @param bpf
+ * handle for the BPF code to execute.
+ * @param ctx
+ * array of pointers to the input contexts.
+ * @param rc
+ * array of return values (one per input).
+ * @param num
+ * number of elements in ctx[] (and rc[]).
+ * @return
+ * number of successfully processed inputs.
+ */
+uint32_t __rte_experimental
+rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[],
+ uint32_t num);
+
+/**
+ * Provide information about natively compield code for given BPF handle.
+ *
+ * @param bpf
+ * handle for the BPF code.
+ * @param jit
+ * pointer to the rte_bpf_jit structure to be filled with related data.
+ * @return
+ * - -EINVAL if the parameters are invalid.
+ * - Zero if operation completed successfully.
+ */
+int __rte_experimental
+rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_H_ */
diff --git a/lib/librte_bpf/rte_bpf_ethdev.h b/lib/librte_bpf/rte_bpf_ethdev.h
new file mode 100644
index 00000000..31731e7a
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf_ethdev.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_ETHDEV_H_
+#define _RTE_BPF_ETHDEV_H_
+
+/**
+ * @file rte_bpf_ethdev.h
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * API to install BPF filter as RX/TX callbacks for eth devices.
+ * Note that right now:
+ * - it is not MT safe, i.e. it is not allowed to do load/unload for the
+ * same port/queue from different threads in parallel.
+ * - though it allows to do load/unload at runtime
+ * (while RX/TX is ongoing on given port/queue).
+ * - allows only one BPF program per port/queue,
+ * i.e. new load will replace previously loaded for that port/queue BPF program.
+ * Filter behaviour - if BPF program returns zero value for a given packet,
+ * then it will be dropped inside callback and no further processing
+ * on RX - it will be dropped inside callback and no further processing
+ * for that packet will happen.
+ * on TX - packet will remain unsent, and it is responsibility of the user
+ * to handle such situation (drop, try to send again, etc.).
+ */
+
+#include <rte_bpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ RTE_BPF_ETH_F_NONE = 0,
+ RTE_BPF_ETH_F_JIT = 0x1, /*< use compiled into native ISA code */
+};
+
+/**
+ * Unload previously loaded BPF program (if any) from given RX port/queue
+ * and remove appropriate RX port/queue callback.
+ *
+ * @param port
+ * The identifier of the ethernet port
+ * @param queue
+ * The identifier of the RX queue on the given port
+ */
+void __rte_experimental
+rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue);
+
+/**
+ * Unload previously loaded BPF program (if any) from given TX port/queue
+ * and remove appropriate TX port/queue callback.
+ *
+ * @param port
+ * The identifier of the ethernet port
+ * @param queue
+ * The identifier of the TX queue on the given port
+ */
+void __rte_experimental
+rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue);
+
+/**
+ * Load BPF program from the ELF file and install callback to execute it
+ * on given RX port/queue.
+ *
+ * @param port
+ * The identifier of the ethernet port
+ * @param queue
+ * The identifier of the RX queue on the given port
+ * @param fname
+ * Pathname for a ELF file.
+ * @param sname
+ * Name of the executable section within the file to load.
+ * @param prm
+ * Parameters used to create and initialise the BPF exeution context.
+ * @param flags
+ * Flags that define expected expected behavior of the loaded filter
+ * (i.e. jited/non-jited version to use).
+ * @return
+ * Zero on successful completion or negative error code otherwise.
+ */
+int __rte_experimental
+rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags);
+
+/**
+ * Load BPF program from the ELF file and install callback to execute it
+ * on given TX port/queue.
+ *
+ * @param port
+ * The identifier of the ethernet port
+ * @param queue
+ * The identifier of the TX queue on the given port
+ * @param fname
+ * Pathname for a ELF file.
+ * @param sname
+ * Name of the executable section within the file to load.
+ * @param prm
+ * Parameters used to create and initialise the BPF exeution context.
+ * @param flags
+ * Flags that define expected expected behavior of the loaded filter
+ * (i.e. jited/non-jited version to use).
+ * @return
+ * Zero on successful completion or negative error code otherwise.
+ */
+int __rte_experimental
+rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_ETHDEV_H_ */
diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map
new file mode 100644
index 00000000..a203e088
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf_version.map
@@ -0,0 +1,16 @@
+EXPERIMENTAL {
+ global:
+
+ rte_bpf_destroy;
+ rte_bpf_elf_load;
+ rte_bpf_eth_rx_elf_load;
+ rte_bpf_eth_rx_unload;
+ rte_bpf_eth_tx_elf_load;
+ rte_bpf_eth_tx_unload;
+ rte_bpf_exec;
+ rte_bpf_exec_burst;
+ rte_bpf_get_jit;
+ rte_bpf_load;
+
+ local: *;
+};