aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2018-08-14 18:55:37 +0100
committerLuca Boccassi <luca.boccassi@gmail.com>2018-08-14 18:57:39 +0100
commitae1479de3027a4a0f68d90bf65ac6ff2b862b837 (patch)
tree5dbca675b6717b6b61abbe1a9583d5166fed942b /lib
parent8cee230dd1f0f9f31f4b0339c671d0b1ee14e111 (diff)
parentb63264c8342e6a1b6971c79550d2af2024b6a4de (diff)
Merge branch 'upstream' into 18.08.x
Change-Id: Ifbda2d554199dd4d11e01f0090881b5f0103ae12 Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile5
-rw-r--r--lib/librte_bbdev/rte_bbdev.c4
-rw-r--r--lib/librte_bitratestats/rte_bitrate.c6
-rw-r--r--lib/librte_bpf/bpf.c5
-rw-r--r--lib/librte_bpf/bpf_def.h5
-rw-r--r--lib/librte_bpf/bpf_exec.c2
-rw-r--r--lib/librte_bpf/bpf_impl.h14
-rw-r--r--lib/librte_bpf/bpf_jit_x86.c17
-rw-r--r--lib/librte_bpf/bpf_load.c49
-rw-r--r--lib/librte_bpf/bpf_load_elf.c4
-rw-r--r--lib/librte_bpf/bpf_validate.c1136
-rw-r--r--lib/librte_bpf/meson.build2
-rw-r--r--lib/librte_bpf/rte_bpf.h21
-rw-r--r--lib/librte_cmdline/cmdline_parse.c9
-rw-r--r--lib/librte_compat/Makefile33
-rw-r--r--lib/librte_compressdev/rte_comp.c12
-rw-r--r--lib/librte_compressdev/rte_comp.h35
-rw-r--r--lib/librte_compressdev/rte_compressdev.c5
-rw-r--r--lib/librte_cryptodev/Makefile1
-rw-r--r--lib/librte_cryptodev/meson.build3
-rw-r--r--lib/librte_cryptodev/rte_crypto.h88
-rw-r--r--lib/librte_cryptodev/rte_crypto_asym.h496
-rw-r--r--lib/librte_cryptodev/rte_cryptodev.c352
-rw-r--r--lib/librte_cryptodev/rte_cryptodev.h391
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pmd.c12
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pmd.h155
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_version.map22
-rw-r--r--lib/librte_eal/bsdapp/eal/Makefile5
-rw-r--r--lib/librte_eal/bsdapp/eal/eal.c76
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_alarm.c299
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_alarm_private.h19
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_hugepage_info.c4
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_interrupts.c464
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_memory.c265
-rw-r--r--lib/librte_eal/bsdapp/eal/meson.build2
-rw-r--r--lib/librte_eal/common/Makefile4
-rw-r--r--lib/librte_eal/common/eal_common_class.c64
-rw-r--r--lib/librte_eal/common/eal_common_dev.c247
-rw-r--r--lib/librte_eal/common/eal_common_devargs.c193
-rw-r--r--lib/librte_eal/common/eal_common_fbarray.c564
-rw-r--r--lib/librte_eal/common/eal_common_log.c4
-rw-r--r--lib/librte_eal/common/eal_common_memory.c504
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c70
-rw-r--r--lib/librte_eal/common/eal_common_options.c78
-rw-r--r--lib/librte_eal/common/eal_common_proc.c222
-rw-r--r--lib/librte_eal/common/eal_common_thread.c9
-rw-r--r--lib/librte_eal/common/eal_common_uuid.c193
-rw-r--r--lib/librte_eal/common/eal_filesystem.h10
-rw-r--r--lib/librte_eal/common/eal_internal_cfg.h6
-rw-r--r--lib/librte_eal/common/eal_options.h4
-rw-r--r--lib/librte_eal/common/eal_private.h46
-rw-r--r--lib/librte_eal/common/include/rte_bitmap.h8
-rw-r--r--lib/librte_eal/common/include/rte_bus.h4
-rw-r--r--lib/librte_eal/common/include/rte_class.h134
-rw-r--r--lib/librte_eal/common/include/rte_common.h29
-rw-r--r--lib/librte_eal/common/include/rte_dev.h103
-rw-r--r--lib/librte_eal/common/include/rte_devargs.h53
-rw-r--r--lib/librte_eal/common/include/rte_eal.h16
-rw-r--r--lib/librte_eal/common/include/rte_fbarray.h114
-rw-r--r--lib/librte_eal/common/include/rte_memory.h54
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h24
-rw-r--r--lib/librte_eal/common/include/rte_service.h56
-rw-r--r--lib/librte_eal/common/include/rte_tailq.h3
-rw-r--r--lib/librte_eal/common/include/rte_uuid.h129
-rw-r--r--lib/librte_eal/common/include/rte_version.h2
-rw-r--r--lib/librte_eal/common/include/rte_vfio.h40
-rw-r--r--lib/librte_eal/common/malloc_elem.c93
-rw-r--r--lib/librte_eal/common/malloc_elem.h6
-rw-r--r--lib/librte_eal/common/malloc_heap.c126
-rw-r--r--lib/librte_eal/common/malloc_heap.h4
-rw-r--r--lib/librte_eal/common/meson.build4
-rw-r--r--lib/librte_eal/common/rte_service.c75
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile5
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c107
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_alarm.c9
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_hugepage_info.c95
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c16
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_log.c13
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memalloc.c212
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c380
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_thread.c4
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.c128
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.h1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c8
-rw-r--r--lib/librte_eal/linuxapp/eal/meson.build1
-rw-r--r--lib/librte_eal/meson.build5
-rw-r--r--lib/librte_eal/rte_eal_version.map54
-rw-r--r--lib/librte_ethdev/Makefile2
-rw-r--r--lib/librte_ethdev/meson.build2
-rw-r--r--lib/librte_ethdev/rte_ethdev.c569
-rw-r--r--lib/librte_ethdev/rte_ethdev.h140
-rw-r--r--lib/librte_ethdev/rte_ethdev_driver.h27
-rw-r--r--lib/librte_ethdev/rte_ethdev_pci.h4
-rw-r--r--lib/librte_ethdev/rte_ethdev_version.map8
-rw-r--r--lib/librte_ethdev/rte_flow.c109
-rw-r--r--lib/librte_ethdev/rte_flow_driver.h63
-rw-r--r--lib/librte_ethdev/rte_tm.h4
-rw-r--r--lib/librte_eventdev/Makefile9
-rw-r--r--lib/librte_eventdev/meson.build9
-rw-r--r--lib/librte_eventdev/rte_event_crypto_adapter.c4
-rw-r--r--lib/librte_eventdev/rte_event_eth_rx_adapter.c1612
-rw-r--r--lib/librte_eventdev/rte_event_eth_rx_adapter.h86
-rw-r--r--lib/librte_eventdev/rte_event_ring.c15
-rw-r--r--lib/librte_eventdev/rte_event_timer_adapter.c4
-rw-r--r--lib/librte_eventdev/rte_eventdev.c15
-rw-r--r--lib/librte_eventdev/rte_eventdev_version.map25
-rw-r--r--lib/librte_flow_classify/rte_flow_classify.c5
-rw-r--r--lib/librte_gso/Makefile1
-rw-r--r--lib/librte_gso/gso_common.h3
-rw-r--r--lib/librte_gso/gso_udp4.c81
-rw-r--r--lib/librte_gso/gso_udp4.h42
-rw-r--r--lib/librte_gso/meson.build2
-rw-r--r--lib/librte_gso/rte_gso.c24
-rw-r--r--lib/librte_gso/rte_gso.h6
-rw-r--r--lib/librte_hash/meson.build1
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.c702
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.h22
-rw-r--r--lib/librte_hash/rte_cuckoo_hash_x86.h164
-rw-r--r--lib/librte_hash/rte_hash.h86
-rw-r--r--lib/librte_hash/rte_hash_version.map8
-rw-r--r--lib/librte_kni/meson.build2
-rw-r--r--lib/librte_kni/rte_kni.c3
-rw-r--r--lib/librte_kvargs/Makefile2
-rw-r--r--lib/librte_kvargs/meson.build5
-rw-r--r--lib/librte_kvargs/rte_kvargs.c57
-rw-r--r--lib/librte_kvargs/rte_kvargs.h58
-rw-r--r--lib/librte_kvargs/rte_kvargs_version.map8
-rw-r--r--lib/librte_latencystats/rte_latencystats.c8
-rw-r--r--lib/librte_mbuf/Makefile1
-rw-r--r--lib/librte_mbuf/meson.build1
-rw-r--r--lib/librte_mbuf/rte_mbuf.c2
-rw-r--r--lib/librte_mbuf/rte_mbuf.h20
-rw-r--r--lib/librte_mbuf/rte_mbuf_pool_ops.c10
-rw-r--r--lib/librte_mbuf/rte_mbuf_pool_ops.h13
-rw-r--r--lib/librte_mbuf/rte_mbuf_ptype.h6
-rw-r--r--lib/librte_mbuf/rte_mbuf_version.map4
-rw-r--r--lib/librte_member/rte_member.c5
-rw-r--r--lib/librte_mempool/Makefile5
-rw-r--r--lib/librte_mempool/meson.build6
-rw-r--r--lib/librte_mempool/rte_mempool.c210
-rw-r--r--lib/librte_mempool/rte_mempool.h201
-rw-r--r--lib/librte_mempool/rte_mempool_ops_default.c25
-rw-r--r--lib/librte_mempool/rte_mempool_version.map6
-rw-r--r--lib/librte_meter/rte_meter.c4
-rw-r--r--lib/librte_meter/rte_meter.h5
-rw-r--r--lib/librte_meter/rte_meter_version.map2
-rw-r--r--lib/librte_metrics/rte_metrics.c15
-rw-r--r--lib/librte_net/rte_ip.h28
-rw-r--r--lib/librte_power/channel_commands.h3
-rw-r--r--lib/librte_power/power_acpi_cpufreq.c21
-rw-r--r--lib/librte_power/power_acpi_cpufreq.h16
-rw-r--r--lib/librte_power/power_kvm_vm.c8
-rw-r--r--lib/librte_power/power_kvm_vm.h17
-rw-r--r--lib/librte_power/rte_power.c3
-rw-r--r--lib/librte_power/rte_power.h32
-rw-r--r--lib/librte_power/rte_power_version.map9
-rw-r--r--lib/librte_rawdev/Makefile1
-rw-r--r--lib/librte_rawdev/meson.build1
-rw-r--r--lib/librte_rawdev/rte_rawdev.c73
-rw-r--r--lib/librte_rawdev/rte_rawdev.h55
-rw-r--r--lib/librte_rawdev/rte_rawdev_pmd.h28
-rw-r--r--lib/librte_rawdev/rte_rawdev_version.map3
-rw-r--r--lib/librte_ring/rte_ring.h7
-rw-r--r--lib/librte_ring/rte_ring_c11_mem.h8
-rw-r--r--lib/librte_security/rte_security.c37
-rw-r--r--lib/librte_security/rte_security.h34
-rw-r--r--lib/librte_security/rte_security_driver.h34
-rw-r--r--lib/librte_vhost/iotlb.c10
-rw-r--r--lib/librte_vhost/iotlb.h2
-rw-r--r--lib/librte_vhost/rte_vhost.h8
-rw-r--r--lib/librte_vhost/socket.c6
-rw-r--r--lib/librte_vhost/vhost.c161
-rw-r--r--lib/librte_vhost/vhost.h143
-rw-r--r--lib/librte_vhost/vhost_crypto.c6
-rw-r--r--lib/librte_vhost/vhost_user.c303
-rw-r--r--lib/librte_vhost/vhost_user.h13
-rw-r--r--lib/librte_vhost/virtio_net.c1552
-rw-r--r--lib/meson.build18
178 files changed, 11110 insertions, 4419 deletions
diff --git a/lib/Makefile b/lib/Makefile
index d82462ba..afa604e2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -4,7 +4,10 @@
include $(RTE_SDK)/mk/rte.vars.mk
DIRS-y += librte_compat
+DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
+DEPDIRS-librte_kvargs := librte_compat
DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal
+DEPDIRS-librte_eal := librte_kvargs
DIRS-$(CONFIG_RTE_LIBRTE_PCI) += librte_pci
DEPDIRS-librte_pci := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring
@@ -76,8 +79,6 @@ DEPDIRS-librte_flow_classify := librte_net librte_table librte_acl
DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched
DEPDIRS-librte_sched := librte_eal librte_mempool librte_mbuf librte_net
DEPDIRS-librte_sched += librte_timer
-DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs
-DEPDIRS-librte_kvargs := librte_eal
DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor
DEPDIRS-librte_distributor := librte_eal librte_mbuf librte_ethdev
DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
diff --git a/lib/librte_bbdev/rte_bbdev.c b/lib/librte_bbdev/rte_bbdev.c
index 28434e08..c4cc18d9 100644
--- a/lib/librte_bbdev/rte_bbdev.c
+++ b/lib/librte_bbdev/rte_bbdev.c
@@ -1125,9 +1125,7 @@ rte_bbdev_op_type_str(enum rte_bbdev_op_type op_type)
return NULL;
}
-RTE_INIT(rte_bbdev_init_log);
-static void
-rte_bbdev_init_log(void)
+RTE_INIT(rte_bbdev_init_log)
{
bbdev_logtype = rte_log_register("lib.bbdev");
if (bbdev_logtype >= 0)
diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c
index 964e3c39..c4b28f62 100644
--- a/lib/librte_bitratestats/rte_bitrate.c
+++ b/lib/librte_bitratestats/rte_bitrate.c
@@ -47,6 +47,9 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data)
};
int return_value;
+ if (bitrate_data == NULL)
+ return -EINVAL;
+
return_value = rte_metrics_reg_names(&names[0], ARRAY_SIZE(names));
if (return_value >= 0)
bitrate_data->id_stats_set = return_value;
@@ -65,6 +68,9 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
const int64_t alpha_percent = 20;
uint64_t values[6];
+ if (bitrate_data == NULL)
+ return -EINVAL;
+
ret_code = rte_eth_stats_get(port_id, &eth_stats);
if (ret_code != 0)
return ret_code;
diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c
index dc6d1099..f590c8c3 100644
--- a/lib/librte_bpf/bpf.c
+++ b/lib/librte_bpf/bpf.c
@@ -53,10 +53,7 @@ bpf_jit(struct rte_bpf *bpf)
return rc;
}
-RTE_INIT(rte_bpf_init_log);
-
-static void
-rte_bpf_init_log(void)
+RTE_INIT(rte_bpf_init_log)
{
rte_bpf_logtype = rte_log_register("lib.bpf");
if (rte_bpf_logtype >= 0)
diff --git a/lib/librte_bpf/bpf_def.h b/lib/librte_bpf/bpf_def.h
index 6b69de34..c10f3aec 100644
--- a/lib/librte_bpf/bpf_def.h
+++ b/lib/librte_bpf/bpf_def.h
@@ -131,6 +131,11 @@ struct ebpf_insn {
int32_t imm;
};
+/*
+ * eBPF allows functions with R1-R5 as arguments.
+ */
+#define EBPF_FUNC_MAX_ARGS (EBPF_REG_6 - EBPF_REG_1)
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c
index e373b1f3..6a79139c 100644
--- a/lib/librte_bpf/bpf_exec.c
+++ b/lib/librte_bpf/bpf_exec.c
@@ -402,7 +402,7 @@ bpf_exec(const struct rte_bpf *bpf, uint64_t reg[EBPF_REG_NUM])
break;
/* call instructions */
case (BPF_JMP | EBPF_CALL):
- reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func(
+ reg[EBPF_REG_0] = bpf->prm.xsym[ins->imm].func.val(
reg[EBPF_REG_1], reg[EBPF_REG_2],
reg[EBPF_REG_3], reg[EBPF_REG_4],
reg[EBPF_REG_5]);
diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h
index 5d7e65c3..b577e2cb 100644
--- a/lib/librte_bpf/bpf_impl.h
+++ b/lib/librte_bpf/bpf_impl.h
@@ -34,6 +34,20 @@ extern int rte_bpf_logtype;
#define RTE_BPF_LOG(lvl, fmt, args...) \
rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args)
+static inline size_t
+bpf_size(uint32_t bpf_op_sz)
+{
+ if (bpf_op_sz == BPF_B)
+ return sizeof(uint8_t);
+ else if (bpf_op_sz == BPF_H)
+ return sizeof(uint16_t);
+ else if (bpf_op_sz == BPF_W)
+ return sizeof(uint32_t);
+ else if (bpf_op_sz == EBPF_DW)
+ return sizeof(uint64_t);
+ return 0;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c
index 111e028d..68ea389f 100644
--- a/lib/librte_bpf/bpf_jit_x86.c
+++ b/lib/librte_bpf/bpf_jit_x86.c
@@ -113,20 +113,6 @@ union bpf_jit_imm {
uint8_t u8[4];
};
-static size_t
-bpf_size(uint32_t bpf_op_sz)
-{
- if (bpf_op_sz == BPF_B)
- return sizeof(uint8_t);
- else if (bpf_op_sz == BPF_H)
- return sizeof(uint16_t);
- else if (bpf_op_sz == BPF_W)
- return sizeof(uint32_t);
- else if (bpf_op_sz == EBPF_DW)
- return sizeof(uint64_t);
- return 0;
-}
-
/*
* In many cases for imm8 we can produce shorter code.
*/
@@ -1294,7 +1280,8 @@ emit(struct bpf_jit_state *st, const struct rte_bpf *bpf)
break;
/* call instructions */
case (BPF_JMP | EBPF_CALL):
- emit_call(st, (uintptr_t)bpf->prm.xsym[ins->imm].func);
+ emit_call(st,
+ (uintptr_t)bpf->prm.xsym[ins->imm].func.val);
break;
/* return instruction */
case (BPF_JMP | EBPF_EXIT):
diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c
index d1c9abd7..2b84fe72 100644
--- a/lib/librte_bpf/bpf_load.c
+++ b/lib/librte_bpf/bpf_load.c
@@ -51,17 +51,64 @@ bpf_load(const struct rte_bpf_prm *prm)
return bpf;
}
+/*
+ * Check that user provided external symbol.
+ */
+static int
+bpf_check_xsym(const struct rte_bpf_xsym *xsym)
+{
+ uint32_t i;
+
+ if (xsym->name == NULL)
+ return -EINVAL;
+
+ if (xsym->type == RTE_BPF_XTYPE_VAR) {
+ if (xsym->var.desc.type == RTE_BPF_ARG_UNDEF)
+ return -EINVAL;
+ } else if (xsym->type == RTE_BPF_XTYPE_FUNC) {
+
+ if (xsym->func.nb_args > EBPF_FUNC_MAX_ARGS)
+ return -EINVAL;
+
+ /* check function arguments */
+ for (i = 0; i != xsym->func.nb_args; i++) {
+ if (xsym->func.args[i].type == RTE_BPF_ARG_UNDEF)
+ return -EINVAL;
+ }
+
+ /* check return value info */
+ if (xsym->func.ret.type != RTE_BPF_ARG_UNDEF &&
+ xsym->func.ret.size == 0)
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
__rte_experimental struct rte_bpf *
rte_bpf_load(const struct rte_bpf_prm *prm)
{
struct rte_bpf *bpf;
int32_t rc;
+ uint32_t i;
- if (prm == NULL || prm->ins == NULL) {
+ if (prm == NULL || prm->ins == NULL ||
+ (prm->nb_xsym != 0 && prm->xsym == NULL)) {
rte_errno = EINVAL;
return NULL;
}
+ rc = 0;
+ for (i = 0; i != prm->nb_xsym && rc == 0; i++)
+ rc = bpf_check_xsym(prm->xsym + i);
+
+ if (rc != 0) {
+ rte_errno = -rc;
+ RTE_BPF_LOG(ERR, "%s: %d-th xsym is invalid\n", __func__, i);
+ return NULL;
+ }
+
bpf = bpf_load(prm);
if (bpf == NULL) {
rte_errno = ENOMEM;
diff --git a/lib/librte_bpf/bpf_load_elf.c b/lib/librte_bpf/bpf_load_elf.c
index 6ab03d86..96d3630f 100644
--- a/lib/librte_bpf/bpf_load_elf.c
+++ b/lib/librte_bpf/bpf_load_elf.c
@@ -81,9 +81,9 @@ resolve_xsym(const char *sn, size_t ofs, struct ebpf_insn *ins, size_t ins_sz,
ins[idx].imm = fidx;
/* for variable we need to store its absolute address */
else {
- ins[idx].imm = (uintptr_t)prm->xsym[fidx].var;
+ ins[idx].imm = (uintptr_t)prm->xsym[fidx].var.val;
ins[idx + 1].imm =
- (uint64_t)(uintptr_t)prm->xsym[fidx].var >> 32;
+ (uint64_t)(uintptr_t)prm->xsym[fidx].var.val >> 32;
}
return 0;
diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c
index b7081c85..83983efc 100644
--- a/lib/librte_bpf/bpf_validate.c
+++ b/lib/librte_bpf/bpf_validate.c
@@ -11,9 +11,28 @@
#include <rte_common.h>
#include <rte_eal.h>
+#include <rte_byteorder.h>
#include "bpf_impl.h"
+struct bpf_reg_val {
+ struct rte_bpf_arg v;
+ uint64_t mask;
+ struct {
+ int64_t min;
+ int64_t max;
+ } s;
+ struct {
+ uint64_t min;
+ uint64_t max;
+ } u;
+};
+
+struct bpf_eval_state {
+ struct bpf_reg_val rv[EBPF_REG_NUM];
+ struct bpf_reg_val sv[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+};
+
/* possible instruction node colour */
enum {
WHITE,
@@ -31,14 +50,6 @@ enum {
MAX_EDGE_TYPE
};
-struct bpf_reg_state {
- uint64_t val;
-};
-
-struct bpf_eval_state {
- struct bpf_reg_state rs[EBPF_REG_NUM];
-};
-
#define MAX_EDGES 2
struct inst_node {
@@ -54,12 +65,13 @@ struct inst_node {
struct bpf_verifier {
const struct rte_bpf_prm *prm;
struct inst_node *in;
- int32_t stack_sz;
+ uint64_t stack_sz;
uint32_t nb_nodes;
uint32_t nb_jcc_nodes;
uint32_t node_colour[MAX_NODE_COLOUR];
uint32_t edge_type[MAX_EDGE_TYPE];
struct bpf_eval_state *evst;
+ struct inst_node *evin;
struct {
uint32_t num;
uint32_t cur;
@@ -101,40 +113,823 @@ check_alu_bele(const struct ebpf_insn *ins)
}
static const char *
-eval_stack(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+eval_exit(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ RTE_SET_USED(ins);
+ if (bvf->evst->rv[EBPF_REG_0].v.type == RTE_BPF_ARG_UNDEF)
+ return "undefined return value";
+ return NULL;
+}
+
+/* setup max possible with this mask bounds */
+static void
+eval_umax_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+ rv->u.max = mask;
+ rv->u.min = 0;
+}
+
+static void
+eval_smax_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+ rv->s.max = mask >> 1;
+ rv->s.min = rv->s.max ^ UINT64_MAX;
+}
+
+static void
+eval_max_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+ eval_umax_bound(rv, mask);
+ eval_smax_bound(rv, mask);
+}
+
+static void
+eval_fill_max_bound(struct bpf_reg_val *rv, uint64_t mask)
+{
+ eval_max_bound(rv, mask);
+ rv->v.type = RTE_BPF_ARG_RAW;
+ rv->mask = mask;
+}
+
+static void
+eval_fill_imm64(struct bpf_reg_val *rv, uint64_t mask, uint64_t val)
+{
+ rv->mask = mask;
+ rv->s.min = val;
+ rv->s.max = val;
+ rv->u.min = val;
+ rv->u.max = val;
+}
+
+static void
+eval_fill_imm(struct bpf_reg_val *rv, uint64_t mask, int32_t imm)
+{
+ uint64_t v;
+
+ v = (uint64_t)imm & mask;
+
+ rv->v.type = RTE_BPF_ARG_RAW;
+ eval_fill_imm64(rv, mask, v);
+}
+
+static const char *
+eval_ld_imm64(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
{
- int32_t ofs;
+ uint32_t i;
+ uint64_t val;
+ struct bpf_reg_val *rd;
+
+ val = (uint32_t)ins[0].imm | (uint64_t)(uint32_t)ins[1].imm << 32;
- ofs = ins->off;
+ rd = bvf->evst->rv + ins->dst_reg;
+ rd->v.type = RTE_BPF_ARG_RAW;
+ eval_fill_imm64(rd, UINT64_MAX, val);
- if (ofs >= 0 || ofs < -MAX_BPF_STACK_SIZE)
- return "stack boundary violation";
+ for (i = 0; i != bvf->prm->nb_xsym; i++) {
+
+ /* load of external variable */
+ if (bvf->prm->xsym[i].type == RTE_BPF_XTYPE_VAR &&
+ (uintptr_t)bvf->prm->xsym[i].var.val == val) {
+ rd->v = bvf->prm->xsym[i].var.desc;
+ eval_fill_imm64(rd, UINT64_MAX, 0);
+ break;
+ }
+ }
- ofs = -ofs;
- bvf->stack_sz = RTE_MAX(bvf->stack_sz, ofs);
return NULL;
}
+static void
+eval_apply_mask(struct bpf_reg_val *rv, uint64_t mask)
+{
+ struct bpf_reg_val rt;
+
+ rt.u.min = rv->u.min & mask;
+ rt.u.max = rv->u.max & mask;
+ if (rt.u.min != rv->u.min || rt.u.max != rv->u.max) {
+ rv->u.max = RTE_MAX(rt.u.max, mask);
+ rv->u.min = 0;
+ }
+
+ eval_smax_bound(&rt, mask);
+ rv->s.max = RTE_MIN(rt.s.max, rv->s.max);
+ rv->s.min = RTE_MAX(rt.s.min, rv->s.min);
+
+ rv->mask = mask;
+}
+
+static void
+eval_add(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk)
+{
+ struct bpf_reg_val rv;
+
+ rv.u.min = (rd->u.min + rs->u.min) & msk;
+ rv.u.max = (rd->u.min + rs->u.max) & msk;
+ rv.s.min = (rd->s.min + rs->s.min) & msk;
+ rv.s.max = (rd->s.max + rs->s.max) & msk;
+
+ /*
+ * if at least one of the operands is not constant,
+ * then check for overflow
+ */
+ if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) &&
+ (rv.u.min < rd->u.min || rv.u.max < rd->u.max))
+ eval_umax_bound(&rv, msk);
+
+ if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) &&
+ (((rs->s.min < 0 && rv.s.min > rd->s.min) ||
+ rv.s.min < rd->s.min) ||
+ ((rs->s.max < 0 && rv.s.max > rd->s.max) ||
+ rv.s.max < rd->s.max)))
+ eval_smax_bound(&rv, msk);
+
+ rd->s = rv.s;
+ rd->u = rv.u;
+}
+
+static void
+eval_sub(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, uint64_t msk)
+{
+ struct bpf_reg_val rv;
+
+ rv.u.min = (rd->u.min - rs->u.min) & msk;
+ rv.u.max = (rd->u.min - rs->u.max) & msk;
+ rv.s.min = (rd->s.min - rs->s.min) & msk;
+ rv.s.max = (rd->s.max - rs->s.max) & msk;
+
+ /*
+ * if at least one of the operands is not constant,
+ * then check for overflow
+ */
+ if ((rd->u.min != rd->u.max || rs->u.min != rs->u.max) &&
+ (rv.u.min > rd->u.min || rv.u.max > rd->u.max))
+ eval_umax_bound(&rv, msk);
+
+ if ((rd->s.min != rd->s.max || rs->s.min != rs->s.max) &&
+ (((rs->s.min < 0 && rv.s.min < rd->s.min) ||
+ rv.s.min > rd->s.min) ||
+ ((rs->s.max < 0 && rv.s.max < rd->s.max) ||
+ rv.s.max > rd->s.max)))
+ eval_smax_bound(&rv, msk);
+
+ rd->s = rv.s;
+ rd->u = rv.u;
+}
+
+static void
+eval_lsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* check if shift value is less then max result bits */
+ if (rs->u.max >= opsz) {
+ eval_max_bound(rd, msk);
+ return;
+ }
+
+ /* check for overflow */
+ if (rd->u.max > RTE_LEN2MASK(opsz - rs->u.max, uint64_t))
+ eval_umax_bound(rd, msk);
+ else {
+ rd->u.max <<= rs->u.max;
+ rd->u.min <<= rs->u.min;
+ }
+
+ /* check that dreg values are and would remain always positive */
+ if ((uint64_t)rd->s.min >> (opsz - 1) != 0 || rd->s.max >=
+ RTE_LEN2MASK(opsz - rs->u.max - 1, int64_t))
+ eval_smax_bound(rd, msk);
+ else {
+ rd->s.max <<= rs->u.max;
+ rd->s.min <<= rs->u.min;
+ }
+}
+
+static void
+eval_rsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* check if shift value is less then max result bits */
+ if (rs->u.max >= opsz) {
+ eval_max_bound(rd, msk);
+ return;
+ }
+
+ rd->u.max >>= rs->u.min;
+ rd->u.min >>= rs->u.max;
+
+ /* check that dreg values are always positive */
+ if ((uint64_t)rd->s.min >> (opsz - 1) != 0)
+ eval_smax_bound(rd, msk);
+ else {
+ rd->s.max >>= rs->u.min;
+ rd->s.min >>= rs->u.max;
+ }
+}
+
+static void
+eval_arsh(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ uint32_t shv;
+
+ /* check if shift value is less then max result bits */
+ if (rs->u.max >= opsz) {
+ eval_max_bound(rd, msk);
+ return;
+ }
+
+ rd->u.max = (int64_t)rd->u.max >> rs->u.min;
+ rd->u.min = (int64_t)rd->u.min >> rs->u.max;
+
+ /* if we have 32-bit values - extend them to 64-bit */
+ if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+ rd->s.min <<= opsz;
+ rd->s.max <<= opsz;
+ shv = opsz;
+ } else
+ shv = 0;
+
+ if (rd->s.min < 0)
+ rd->s.min = (rd->s.min >> (rs->u.min + shv)) & msk;
+ else
+ rd->s.min = (rd->s.min >> (rs->u.max + shv)) & msk;
+
+ if (rd->s.max < 0)
+ rd->s.max = (rd->s.max >> (rs->u.max + shv)) & msk;
+ else
+ rd->s.max = (rd->s.max >> (rs->u.min + shv)) & msk;
+}
+
+static uint64_t
+eval_umax_bits(uint64_t v, size_t opsz)
+{
+ if (v == 0)
+ return 0;
+
+ v = __builtin_clzll(v);
+ return RTE_LEN2MASK(opsz - v, uint64_t);
+}
+
+/* estimate max possible value for (v1 & v2) */
+static uint64_t
+eval_uand_max(uint64_t v1, uint64_t v2, size_t opsz)
+{
+ v1 = eval_umax_bits(v1, opsz);
+ v2 = eval_umax_bits(v2, opsz);
+ return (v1 & v2);
+}
+
+/* estimate max possible value for (v1 | v2) */
+static uint64_t
+eval_uor_max(uint64_t v1, uint64_t v2, size_t opsz)
+{
+ v1 = eval_umax_bits(v1, opsz);
+ v2 = eval_umax_bits(v2, opsz);
+ return (v1 | v2);
+}
+
+static void
+eval_and(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ rd->u.min &= rs->u.min;
+ rd->u.max &= rs->u.max;
+ } else {
+ rd->u.max = eval_uand_max(rd->u.max, rs->u.max, opsz);
+ rd->u.min &= rs->u.min;
+ }
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ rd->s.min &= rs->s.min;
+ rd->s.max &= rs->s.max;
+ /* at least one of operand is non-negative */
+ } else if (rd->s.min >= 0 || rs->s.min >= 0) {
+ rd->s.max = eval_uand_max(rd->s.max & (msk >> 1),
+ rs->s.max & (msk >> 1), opsz);
+ rd->s.min &= rs->s.min;
+ } else
+ eval_smax_bound(rd, msk);
+}
+
+static void
+eval_or(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ rd->u.min |= rs->u.min;
+ rd->u.max |= rs->u.max;
+ } else {
+ rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz);
+ rd->u.min |= rs->u.min;
+ }
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ rd->s.min |= rs->s.min;
+ rd->s.max |= rs->s.max;
+
+ /* both operands are non-negative */
+ } else if (rd->s.min >= 0 || rs->s.min >= 0) {
+ rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz);
+ rd->s.min |= rs->s.min;
+ } else
+ eval_smax_bound(rd, msk);
+}
+
+static void
+eval_xor(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ rd->u.min ^= rs->u.min;
+ rd->u.max ^= rs->u.max;
+ } else {
+ rd->u.max = eval_uor_max(rd->u.max, rs->u.max, opsz);
+ rd->u.min = 0;
+ }
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ rd->s.min ^= rs->s.min;
+ rd->s.max ^= rs->s.max;
+
+ /* both operands are non-negative */
+ } else if (rd->s.min >= 0 || rs->s.min >= 0) {
+ rd->s.max = eval_uor_max(rd->s.max, rs->s.max, opsz);
+ rd->s.min = 0;
+ } else
+ eval_smax_bound(rd, msk);
+}
+
+static void
+eval_mul(struct bpf_reg_val *rd, const struct bpf_reg_val *rs, size_t opsz,
+ uint64_t msk)
+{
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ rd->u.min = (rd->u.min * rs->u.min) & msk;
+ rd->u.max = (rd->u.max * rs->u.max) & msk;
+ /* check for overflow */
+ } else if (rd->u.max <= msk >> opsz / 2 && rs->u.max <= msk >> opsz) {
+ rd->u.max *= rs->u.max;
+ rd->u.min *= rd->u.min;
+ } else
+ eval_umax_bound(rd, msk);
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ rd->s.min = (rd->s.min * rs->s.min) & msk;
+ rd->s.max = (rd->s.max * rs->s.max) & msk;
+ /* check that both operands are positive and no overflow */
+ } else if (rd->s.min >= 0 && rs->s.min >= 0) {
+ rd->s.max *= rs->s.max;
+ rd->s.min *= rd->s.min;
+ } else
+ eval_smax_bound(rd, msk);
+}
+
static const char *
-eval_store(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+eval_divmod(uint32_t op, struct bpf_reg_val *rd, struct bpf_reg_val *rs,
+ size_t opsz, uint64_t msk)
{
- if (ins->dst_reg == EBPF_REG_10)
- return eval_stack(bvf, ins);
+ /* both operands are constants */
+ if (rd->u.min == rd->u.max && rs->u.min == rs->u.max) {
+ if (rs->u.max == 0)
+ return "division by 0";
+ if (op == BPF_DIV) {
+ rd->u.min /= rs->u.min;
+ rd->u.max /= rs->u.max;
+ } else {
+ rd->u.min %= rs->u.min;
+ rd->u.max %= rs->u.max;
+ }
+ } else {
+ if (op == BPF_MOD)
+ rd->u.max = RTE_MIN(rd->u.max, rs->u.max - 1);
+ else
+ rd->u.max = rd->u.max;
+ rd->u.min = 0;
+ }
+
+ /* if we have 32-bit values - extend them to 64-bit */
+ if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+ rd->s.min = (int32_t)rd->s.min;
+ rd->s.max = (int32_t)rd->s.max;
+ rs->s.min = (int32_t)rs->s.min;
+ rs->s.max = (int32_t)rs->s.max;
+ }
+
+ /* both operands are constants */
+ if (rd->s.min == rd->s.max && rs->s.min == rs->s.max) {
+ if (rs->s.max == 0)
+ return "division by 0";
+ if (op == BPF_DIV) {
+ rd->s.min /= rs->s.min;
+ rd->s.max /= rs->s.max;
+ } else {
+ rd->s.min %= rs->s.min;
+ rd->s.max %= rs->s.max;
+ }
+ } else if (op == BPF_MOD) {
+ rd->s.min = RTE_MAX(rd->s.max, 0);
+ rd->s.min = RTE_MIN(rd->s.min, 0);
+ } else
+ eval_smax_bound(rd, msk);
+
+ rd->s.max &= msk;
+ rd->s.min &= msk;
+
return NULL;
}
+static void
+eval_neg(struct bpf_reg_val *rd, size_t opsz, uint64_t msk)
+{
+ uint64_t ux, uy;
+ int64_t sx, sy;
+
+ /* if we have 32-bit values - extend them to 64-bit */
+ if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+ rd->u.min = (int32_t)rd->u.min;
+ rd->u.max = (int32_t)rd->u.max;
+ }
+
+ ux = -(int64_t)rd->u.min & msk;
+ uy = -(int64_t)rd->u.max & msk;
+
+ rd->u.max = RTE_MAX(ux, uy);
+ rd->u.min = RTE_MIN(ux, uy);
+
+ /* if we have 32-bit values - extend them to 64-bit */
+ if (opsz == sizeof(uint32_t) * CHAR_BIT) {
+ rd->s.min = (int32_t)rd->s.min;
+ rd->s.max = (int32_t)rd->s.max;
+ }
+
+ sx = -rd->s.min & msk;
+ sy = -rd->s.max & msk;
+
+ rd->s.max = RTE_MAX(sx, sy);
+ rd->s.min = RTE_MIN(sx, sy);
+}
+
+/*
+ * check that destination and source operand are in defined state.
+ */
+static const char *
+eval_defined(const struct bpf_reg_val *dst, const struct bpf_reg_val *src)
+{
+ if (dst != NULL && dst->v.type == RTE_BPF_ARG_UNDEF)
+ return "dest reg value is undefined";
+ if (src != NULL && src->v.type == RTE_BPF_ARG_UNDEF)
+ return "src reg value is undefined";
+ return NULL;
+}
+
+static const char *
+eval_alu(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint64_t msk;
+ uint32_t op;
+ size_t opsz;
+ const char *err;
+ struct bpf_eval_state *st;
+ struct bpf_reg_val *rd, rs;
+
+ opsz = (BPF_CLASS(ins->code) == BPF_ALU) ?
+ sizeof(uint32_t) : sizeof(uint64_t);
+ opsz = opsz * CHAR_BIT;
+ msk = RTE_LEN2MASK(opsz, uint64_t);
+
+ st = bvf->evst;
+ rd = st->rv + ins->dst_reg;
+
+ if (BPF_SRC(ins->code) == BPF_X) {
+ rs = st->rv[ins->src_reg];
+ eval_apply_mask(&rs, msk);
+ } else
+ eval_fill_imm(&rs, msk, ins->imm);
+
+ eval_apply_mask(rd, msk);
+
+ op = BPF_OP(ins->code);
+
+ err = eval_defined((op != EBPF_MOV) ? rd : NULL,
+ (op != BPF_NEG) ? &rs : NULL);
+ if (err != NULL)
+ return err;
+
+ if (op == BPF_ADD)
+ eval_add(rd, &rs, msk);
+ else if (op == BPF_SUB)
+ eval_sub(rd, &rs, msk);
+ else if (op == BPF_LSH)
+ eval_lsh(rd, &rs, opsz, msk);
+ else if (op == BPF_RSH)
+ eval_rsh(rd, &rs, opsz, msk);
+ else if (op == EBPF_ARSH)
+ eval_arsh(rd, &rs, opsz, msk);
+ else if (op == BPF_AND)
+ eval_and(rd, &rs, opsz, msk);
+ else if (op == BPF_OR)
+ eval_or(rd, &rs, opsz, msk);
+ else if (op == BPF_XOR)
+ eval_xor(rd, &rs, opsz, msk);
+ else if (op == BPF_MUL)
+ eval_mul(rd, &rs, opsz, msk);
+ else if (op == BPF_DIV || op == BPF_MOD)
+ err = eval_divmod(op, rd, &rs, opsz, msk);
+ else if (op == BPF_NEG)
+ eval_neg(rd, opsz, msk);
+ else if (op == EBPF_MOV)
+ *rd = rs;
+ else
+ eval_max_bound(rd, msk);
+
+ return err;
+}
+
+static const char *
+eval_bele(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint64_t msk;
+ struct bpf_eval_state *st;
+ struct bpf_reg_val *rd;
+ const char *err;
+
+ msk = RTE_LEN2MASK(ins->imm, uint64_t);
+
+ st = bvf->evst;
+ rd = st->rv + ins->dst_reg;
+
+ err = eval_defined(rd, NULL);
+ if (err != NULL)
+ return err;
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_BE))
+ eval_max_bound(rd, msk);
+ else
+ eval_apply_mask(rd, msk);
+#else
+ if (ins->code == (BPF_ALU | EBPF_END | EBPF_TO_LE))
+ eval_max_bound(rd, msk);
+ else
+ eval_apply_mask(rd, msk);
+#endif
+
+ return NULL;
+}
+
+static const char *
+eval_ptr(struct bpf_verifier *bvf, struct bpf_reg_val *rm, uint32_t opsz,
+ uint32_t align, int16_t off)
+{
+ struct bpf_reg_val rv;
+
+ /* calculate reg + offset */
+ eval_fill_imm(&rv, rm->mask, off);
+ eval_add(rm, &rv, rm->mask);
+
+ if (RTE_BPF_ARG_PTR_TYPE(rm->v.type) == 0)
+ return "destination is not a pointer";
+
+ if (rm->mask != UINT64_MAX)
+ return "pointer truncation";
+
+ if (rm->u.max + opsz > rm->v.size ||
+ (uint64_t)rm->s.max + opsz > rm->v.size ||
+ rm->s.min < 0)
+ return "memory boundary violation";
+
+ if (rm->u.max % align != 0)
+ return "unaligned memory access";
+
+ if (rm->v.type == RTE_BPF_ARG_PTR_STACK) {
+
+ if (rm->u.max != rm->u.min || rm->s.max != rm->s.min ||
+ rm->u.max != (uint64_t)rm->s.max)
+ return "stack access with variable offset";
+
+ bvf->stack_sz = RTE_MAX(bvf->stack_sz, rm->v.size - rm->u.max);
+
+ /* pointer to mbuf */
+ } else if (rm->v.type == RTE_BPF_ARG_PTR_MBUF) {
+
+ if (rm->u.max != rm->u.min || rm->s.max != rm->s.min ||
+ rm->u.max != (uint64_t)rm->s.max)
+ return "mbuf access with variable offset";
+ }
+
+ return NULL;
+}
+
+static void
+eval_max_load(struct bpf_reg_val *rv, uint64_t mask)
+{
+ eval_umax_bound(rv, mask);
+
+ /* full 64-bit load */
+ if (mask == UINT64_MAX)
+ eval_smax_bound(rv, mask);
+
+ /* zero-extend load */
+ rv->s.min = rv->u.min;
+ rv->s.max = rv->u.max;
+}
+
+
static const char *
eval_load(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
{
- if (ins->src_reg == EBPF_REG_10)
- return eval_stack(bvf, ins);
+ uint32_t opsz;
+ uint64_t msk;
+ const char *err;
+ struct bpf_eval_state *st;
+ struct bpf_reg_val *rd, rs;
+ const struct bpf_reg_val *sv;
+
+ st = bvf->evst;
+ rd = st->rv + ins->dst_reg;
+ rs = st->rv[ins->src_reg];
+ opsz = bpf_size(BPF_SIZE(ins->code));
+ msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t);
+
+ err = eval_ptr(bvf, &rs, opsz, 1, ins->off);
+ if (err != NULL)
+ return err;
+
+ if (rs.v.type == RTE_BPF_ARG_PTR_STACK) {
+
+ sv = st->sv + rs.u.max / sizeof(uint64_t);
+ if (sv->v.type == RTE_BPF_ARG_UNDEF || sv->mask < msk)
+ return "undefined value on the stack";
+
+ *rd = *sv;
+
+ /* pointer to mbuf */
+ } else if (rs.v.type == RTE_BPF_ARG_PTR_MBUF) {
+
+ if (rs.u.max == offsetof(struct rte_mbuf, next)) {
+ eval_fill_imm(rd, msk, 0);
+ rd->v = rs.v;
+ } else if (rs.u.max == offsetof(struct rte_mbuf, buf_addr)) {
+ eval_fill_imm(rd, msk, 0);
+ rd->v.type = RTE_BPF_ARG_PTR;
+ rd->v.size = rs.v.buf_size;
+ } else if (rs.u.max == offsetof(struct rte_mbuf, data_off)) {
+ eval_fill_imm(rd, msk, RTE_PKTMBUF_HEADROOM);
+ rd->v.type = RTE_BPF_ARG_RAW;
+ } else {
+ eval_max_load(rd, msk);
+ rd->v.type = RTE_BPF_ARG_RAW;
+ }
+
+ /* pointer to raw data */
+ } else {
+ eval_max_load(rd, msk);
+ rd->v.type = RTE_BPF_ARG_RAW;
+ }
+
return NULL;
}
static const char *
+eval_mbuf_store(const struct bpf_reg_val *rv, uint32_t opsz)
+{
+ uint32_t i;
+
+ static const struct {
+ size_t off;
+ size_t sz;
+ } mbuf_ro_fileds[] = {
+ { .off = offsetof(struct rte_mbuf, buf_addr), },
+ { .off = offsetof(struct rte_mbuf, refcnt), },
+ { .off = offsetof(struct rte_mbuf, nb_segs), },
+ { .off = offsetof(struct rte_mbuf, buf_len), },
+ { .off = offsetof(struct rte_mbuf, pool), },
+ { .off = offsetof(struct rte_mbuf, next), },
+ { .off = offsetof(struct rte_mbuf, priv_size), },
+ };
+
+ for (i = 0; i != RTE_DIM(mbuf_ro_fileds) &&
+ (mbuf_ro_fileds[i].off + mbuf_ro_fileds[i].sz <=
+ rv->u.max || rv->u.max + opsz <= mbuf_ro_fileds[i].off);
+ i++)
+ ;
+
+ if (i != RTE_DIM(mbuf_ro_fileds))
+ return "store to the read-only mbuf field";
+
+ return NULL;
+
+}
+
+static const char *
+eval_store(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint32_t opsz;
+ uint64_t msk;
+ const char *err;
+ struct bpf_eval_state *st;
+ struct bpf_reg_val rd, rs, *sv;
+
+ opsz = bpf_size(BPF_SIZE(ins->code));
+ msk = RTE_LEN2MASK(opsz * CHAR_BIT, uint64_t);
+
+ st = bvf->evst;
+ rd = st->rv[ins->dst_reg];
+
+ if (BPF_CLASS(ins->code) == BPF_STX) {
+ rs = st->rv[ins->src_reg];
+ eval_apply_mask(&rs, msk);
+ } else
+ eval_fill_imm(&rs, msk, ins->imm);
+
+ err = eval_defined(NULL, &rs);
+ if (err != NULL)
+ return err;
+
+ err = eval_ptr(bvf, &rd, opsz, 1, ins->off);
+ if (err != NULL)
+ return err;
+
+ if (rd.v.type == RTE_BPF_ARG_PTR_STACK) {
+
+ sv = st->sv + rd.u.max / sizeof(uint64_t);
+ if (BPF_CLASS(ins->code) == BPF_STX &&
+ BPF_MODE(ins->code) == EBPF_XADD)
+ eval_max_bound(sv, msk);
+ else
+ *sv = rs;
+
+ /* pointer to mbuf */
+ } else if (rd.v.type == RTE_BPF_ARG_PTR_MBUF) {
+ err = eval_mbuf_store(&rd, opsz);
+ if (err != NULL)
+ return err;
+ }
+
+ return NULL;
+}
+
+static const char *
+eval_func_arg(struct bpf_verifier *bvf, const struct rte_bpf_arg *arg,
+ struct bpf_reg_val *rv)
+{
+ uint32_t i, n;
+ struct bpf_eval_state *st;
+ const char *err;
+
+ st = bvf->evst;
+
+ if (rv->v.type == RTE_BPF_ARG_UNDEF)
+ return "Undefined argument type";
+
+ if (arg->type != rv->v.type &&
+ arg->type != RTE_BPF_ARG_RAW &&
+ (arg->type != RTE_BPF_ARG_PTR ||
+ RTE_BPF_ARG_PTR_TYPE(rv->v.type) == 0))
+ return "Invalid argument type";
+
+ err = NULL;
+
+ /* argument is a pointer */
+ if (RTE_BPF_ARG_PTR_TYPE(arg->type) != 0) {
+
+ err = eval_ptr(bvf, rv, arg->size, 1, 0);
+
+ /*
+ * pointer to the variable on the stack is passed
+ * as an argument, mark stack space it occupies as initialized.
+ */
+ if (err == NULL && rv->v.type == RTE_BPF_ARG_PTR_STACK) {
+
+ i = rv->u.max / sizeof(uint64_t);
+ n = i + arg->size / sizeof(uint64_t);
+ while (i != n) {
+ eval_fill_max_bound(st->sv + i, UINT64_MAX);
+ i++;
+ };
+ }
+ }
+
+ return err;
+}
+
+static const char *
eval_call(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
{
- uint32_t idx;
+ uint64_t msk;
+ uint32_t i, idx;
+ struct bpf_reg_val *rv;
+ const struct rte_bpf_xsym *xsym;
+ const char *err;
idx = ins->imm;
@@ -145,6 +940,144 @@ eval_call(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
/* for now don't support function calls on 32 bit platform */
if (sizeof(uint64_t) != sizeof(uintptr_t))
return "function calls are supported only for 64 bit apps";
+
+ xsym = bvf->prm->xsym + idx;
+
+ /* evaluate function arguments */
+ err = NULL;
+ for (i = 0; i != xsym->func.nb_args && err == NULL; i++) {
+ err = eval_func_arg(bvf, xsym->func.args + i,
+ bvf->evst->rv + EBPF_REG_1 + i);
+ }
+
+ /* R1-R5 argument/scratch registers */
+ for (i = EBPF_REG_1; i != EBPF_REG_6; i++)
+ bvf->evst->rv[i].v.type = RTE_BPF_ARG_UNDEF;
+
+ /* update return value */
+
+ rv = bvf->evst->rv + EBPF_REG_0;
+ rv->v = xsym->func.ret;
+ msk = (rv->v.type == RTE_BPF_ARG_RAW) ?
+ RTE_LEN2MASK(rv->v.size * CHAR_BIT, uint64_t) : UINTPTR_MAX;
+ eval_max_bound(rv, msk);
+ rv->mask = msk;
+
+ return err;
+}
+
+static void
+eval_jeq_jne(struct bpf_reg_val *trd, struct bpf_reg_val *trs)
+{
+ /* sreg is constant */
+ if (trs->u.min == trs->u.max) {
+ trd->u = trs->u;
+ /* dreg is constant */
+ } else if (trd->u.min == trd->u.max) {
+ trs->u = trd->u;
+ } else {
+ trd->u.max = RTE_MIN(trd->u.max, trs->u.max);
+ trd->u.min = RTE_MAX(trd->u.min, trs->u.min);
+ trs->u = trd->u;
+ }
+
+ /* sreg is constant */
+ if (trs->s.min == trs->s.max) {
+ trd->s = trs->s;
+ /* dreg is constant */
+ } else if (trd->s.min == trd->s.max) {
+ trs->s = trd->s;
+ } else {
+ trd->s.max = RTE_MIN(trd->s.max, trs->s.max);
+ trd->s.min = RTE_MAX(trd->s.min, trs->s.min);
+ trs->s = trd->s;
+ }
+}
+
+static void
+eval_jgt_jle(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+ struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+ frd->u.max = RTE_MIN(frd->u.max, frs->u.min);
+ trd->u.min = RTE_MAX(trd->u.min, trs->u.min + 1);
+}
+
+static void
+eval_jlt_jge(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+ struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+ frd->u.min = RTE_MAX(frd->u.min, frs->u.min);
+ trd->u.max = RTE_MIN(trd->u.max, trs->u.max - 1);
+}
+
+static void
+eval_jsgt_jsle(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+ struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+ frd->s.max = RTE_MIN(frd->s.max, frs->s.min);
+ trd->s.min = RTE_MAX(trd->s.min, trs->s.min + 1);
+}
+
+static void
+eval_jslt_jsge(struct bpf_reg_val *trd, struct bpf_reg_val *trs,
+ struct bpf_reg_val *frd, struct bpf_reg_val *frs)
+{
+ frd->s.min = RTE_MAX(frd->s.min, frs->s.min);
+ trd->s.max = RTE_MIN(trd->s.max, trs->s.max - 1);
+}
+
+static const char *
+eval_jcc(struct bpf_verifier *bvf, const struct ebpf_insn *ins)
+{
+ uint32_t op;
+ const char *err;
+ struct bpf_eval_state *fst, *tst;
+ struct bpf_reg_val *frd, *frs, *trd, *trs;
+ struct bpf_reg_val rvf, rvt;
+
+ tst = bvf->evst;
+ fst = bvf->evin->evst;
+
+ frd = fst->rv + ins->dst_reg;
+ trd = tst->rv + ins->dst_reg;
+
+ if (BPF_SRC(ins->code) == BPF_X) {
+ frs = fst->rv + ins->src_reg;
+ trs = tst->rv + ins->src_reg;
+ } else {
+ frs = &rvf;
+ trs = &rvt;
+ eval_fill_imm(frs, UINT64_MAX, ins->imm);
+ eval_fill_imm(trs, UINT64_MAX, ins->imm);
+ }
+
+ err = eval_defined(trd, trs);
+ if (err != NULL)
+ return err;
+
+ op = BPF_OP(ins->code);
+
+ if (op == BPF_JEQ)
+ eval_jeq_jne(trd, trs);
+ else if (op == EBPF_JNE)
+ eval_jeq_jne(frd, frs);
+ else if (op == BPF_JGT)
+ eval_jgt_jle(trd, trs, frd, frs);
+ else if (op == EBPF_JLE)
+ eval_jgt_jle(frd, frs, trd, trs);
+ else if (op == EBPF_JLT)
+ eval_jlt_jge(trd, trs, frd, frs);
+ else if (op == BPF_JGE)
+ eval_jlt_jge(frd, frs, trd, trs);
+ else if (op == EBPF_JSGT)
+ eval_jsgt_jsle(trd, trs, frd, frs);
+ else if (op == EBPF_JSLE)
+ eval_jsgt_jsle(frd, frs, trd, trs);
+ else if (op == EBPF_JLT)
+ eval_jslt_jsge(trd, trs, frd, frs);
+ else if (op == EBPF_JSGE)
+ eval_jslt_jsge(frd, frs, trd, trs);
+
return NULL;
}
@@ -157,256 +1090,306 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_SUB | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_AND | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_OR | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_LSH | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_RSH | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_XOR | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_MUL | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | EBPF_MOV | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_DIV | BPF_K)] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 1, .max = UINT32_MAX},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_MOD | BPF_K)] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 1, .max = UINT32_MAX},
+ .eval = eval_alu,
},
/* ALU IMM 64-bit instructions */
[(EBPF_ALU64 | BPF_ADD | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_SUB | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_AND | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_OR | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_LSH | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_RSH | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | EBPF_ARSH | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_XOR | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_MUL | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | EBPF_MOV | BPF_K)] = {
.mask = {.dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX,},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_DIV | BPF_K)] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 1, .max = UINT32_MAX},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_MOD | BPF_K)] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 1, .max = UINT32_MAX},
+ .eval = eval_alu,
},
/* ALU REG 32-bit instructions */
[(BPF_ALU | BPF_ADD | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_SUB | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_AND | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_OR | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_LSH | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_RSH | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_XOR | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_MUL | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_DIV | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_MOD | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | EBPF_MOV | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | BPF_NEG)] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(BPF_ALU | EBPF_END | EBPF_TO_BE)] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 16, .max = 64},
.check = check_alu_bele,
+ .eval = eval_bele,
},
[(BPF_ALU | EBPF_END | EBPF_TO_LE)] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 16, .max = 64},
.check = check_alu_bele,
+ .eval = eval_bele,
},
/* ALU REG 64-bit instructions */
[(EBPF_ALU64 | BPF_ADD | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_SUB | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_AND | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_OR | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_LSH | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_RSH | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | EBPF_ARSH | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_XOR | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_MUL | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_DIV | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_MOD | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | EBPF_MOV | BPF_X)] = {
.mask = { .dreg = WRT_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
[(EBPF_ALU64 | BPF_NEG)] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_alu,
},
/* load instructions */
[(BPF_LDX | BPF_MEM | BPF_B)] = {
@@ -438,6 +1421,7 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
.mask = { .dreg = WRT_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_ld_imm64,
},
/* store REG instructions */
[(BPF_STX | BPF_MEM | BPF_B)] = {
@@ -513,92 +1497,110 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JNE | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | BPF_JGT | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JLT | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | BPF_JGE | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JLE | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JSGT | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JSLT | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JSGE | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JSLE | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
[(BPF_JMP | BPF_JSET | BPF_K)] = {
.mask = { .dreg = ALL_REGS, .sreg = ZERO_REG},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = UINT32_MAX},
+ .eval = eval_jcc,
},
/* jcc REG instructions */
[(BPF_JMP | BPF_JEQ | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JNE | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | BPF_JGT | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JLT | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | BPF_JGE | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JLE | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JSGT | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JSLT | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
@@ -609,16 +1611,19 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | EBPF_JSLE | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
[(BPF_JMP | BPF_JSET | BPF_X)] = {
.mask = { .dreg = ALL_REGS, .sreg = ALL_REGS},
.off = { .min = 0, .max = UINT16_MAX},
.imm = { .min = 0, .max = 0},
+ .eval = eval_jcc,
},
/* call instruction */
[(BPF_JMP | EBPF_CALL)] = {
@@ -632,6 +1637,7 @@ static const struct bpf_ins_check ins_chk[UINT8_MAX] = {
.mask = { .dreg = ZERO_REG, .sreg = ZERO_REG},
.off = { .min = 0, .max = 0},
.imm = { .min = 0, .max = 0},
+ .eval = eval_exit,
},
};
@@ -1046,7 +2052,7 @@ save_eval_state(struct bpf_verifier *bvf, struct inst_node *node)
st = pull_eval_state(bvf);
if (st == NULL) {
RTE_BPF_LOG(ERR,
- "%s: internal error (out of space) at pc: %u",
+ "%s: internal error (out of space) at pc: %u\n",
__func__, get_node_idx(bvf, node));
return -ENOMEM;
}
@@ -1078,6 +2084,32 @@ restore_eval_state(struct bpf_verifier *bvf, struct inst_node *node)
push_eval_state(bvf);
}
+static void
+log_eval_state(const struct bpf_verifier *bvf, const struct ebpf_insn *ins,
+ uint32_t pc, int32_t loglvl)
+{
+ const struct bpf_eval_state *st;
+ const struct bpf_reg_val *rv;
+
+ rte_log(loglvl, rte_bpf_logtype, "%s(pc=%u):\n", __func__, pc);
+
+ st = bvf->evst;
+ rv = st->rv + ins->dst_reg;
+
+ rte_log(loglvl, rte_bpf_logtype,
+ "r%u={\n"
+ "\tv={type=%u, size=%zu},\n"
+ "\tmask=0x%" PRIx64 ",\n"
+ "\tu={min=0x%" PRIx64 ", max=0x%" PRIx64 "},\n"
+ "\ts={min=%" PRId64 ", max=%" PRId64 "},\n"
+ "};\n",
+ ins->dst_reg,
+ rv->v.type, rv->v.size,
+ rv->mask,
+ rv->u.min, rv->u.max,
+ rv->s.min, rv->s.max);
+}
+
/*
* Do second pass through CFG and try to evaluate instructions
* via each possible path.
@@ -1096,23 +2128,56 @@ evaluate(struct bpf_verifier *bvf)
const struct ebpf_insn *ins;
struct inst_node *next, *node;
- node = bvf->in;
+ /* initial state of frame pointer */
+ static const struct bpf_reg_val rvfp = {
+ .v = {
+ .type = RTE_BPF_ARG_PTR_STACK,
+ .size = MAX_BPF_STACK_SIZE,
+ },
+ .mask = UINT64_MAX,
+ .u = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE},
+ .s = {.min = MAX_BPF_STACK_SIZE, .max = MAX_BPF_STACK_SIZE},
+ };
+
+ bvf->evst->rv[EBPF_REG_1].v = bvf->prm->prog_arg;
+ bvf->evst->rv[EBPF_REG_1].mask = UINT64_MAX;
+ if (bvf->prm->prog_arg.type == RTE_BPF_ARG_RAW)
+ eval_max_bound(bvf->evst->rv + EBPF_REG_1, UINT64_MAX);
+
+ bvf->evst->rv[EBPF_REG_10] = rvfp;
+
ins = bvf->prm->ins;
+ node = bvf->in;
+ next = node;
rc = 0;
while (node != NULL && rc == 0) {
- /* current node evaluation */
- idx = get_node_idx(bvf, node);
- op = ins[idx].code;
+ /*
+ * current node evaluation, make sure we evaluate
+ * each node only once.
+ */
+ if (next != NULL) {
+
+ bvf->evin = node;
+ idx = get_node_idx(bvf, node);
+ op = ins[idx].code;
- if (ins_chk[op].eval != NULL) {
- err = ins_chk[op].eval(bvf, ins + idx);
- if (err != NULL) {
- RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n",
- __func__, err, idx);
- rc = -EINVAL;
+ /* for jcc node make a copy of evaluatoion state */
+ if (node->nb_edge > 1)
+ rc |= save_eval_state(bvf, node);
+
+ if (ins_chk[op].eval != NULL && rc == 0) {
+ err = ins_chk[op].eval(bvf, ins + idx);
+ if (err != NULL) {
+ RTE_BPF_LOG(ERR, "%s: %s at pc: %u\n",
+ __func__, err, idx);
+ rc = -EINVAL;
+ }
}
+
+ log_eval_state(bvf, ins + idx, idx, RTE_LOG_DEBUG);
+ bvf->evin = NULL;
}
/* proceed through CFG */
@@ -1120,9 +2185,8 @@ evaluate(struct bpf_verifier *bvf)
if (next != NULL) {
/* proceed with next child */
- if (node->cur_edge != node->nb_edge)
- rc |= save_eval_state(bvf, node);
- else if (node->evst != NULL)
+ if (node->cur_edge == node->nb_edge &&
+ node->evst != NULL)
restore_eval_state(bvf, node);
next->prev_node = get_node_idx(bvf, node);
diff --git a/lib/librte_bpf/meson.build b/lib/librte_bpf/meson.build
index de9de009..bc0cd78f 100644
--- a/lib/librte_bpf/meson.build
+++ b/lib/librte_bpf/meson.build
@@ -8,7 +8,7 @@ sources = files('bpf.c',
'bpf_pkt.c',
'bpf_validate.c')
-if arch_subdir == 'x86'
+if arch_subdir == 'x86' and cc.sizeof('void *') == 8
sources += files('bpf_jit_x86.c')
endif
diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h
index 1249a992..ad62ef2c 100644
--- a/lib/librte_bpf/rte_bpf.h
+++ b/lib/librte_bpf/rte_bpf.h
@@ -40,7 +40,11 @@ enum rte_bpf_arg_type {
*/
struct rte_bpf_arg {
enum rte_bpf_arg_type type;
- size_t size; /**< for pointer types, size of data it points to */
+ /**
+ * for ptr type - max size of data buffer it points to
+ * for raw type - the size (in bytes) of the value
+ */
+ size_t size;
size_t buf_size;
/**< for mbuf ptr type, max size of rte_mbuf data buffer */
};
@@ -66,10 +70,19 @@ struct rte_bpf_xsym {
const char *name; /**< name */
enum rte_bpf_xtype type; /**< type */
union {
- uint64_t (*func)(uint64_t, uint64_t, uint64_t,
+ struct {
+ uint64_t (*val)(uint64_t, uint64_t, uint64_t,
uint64_t, uint64_t);
- void *var;
- }; /**< value */
+ uint32_t nb_args;
+ struct rte_bpf_arg args[EBPF_FUNC_MAX_ARGS];
+ /**< Function arguments descriptions. */
+ struct rte_bpf_arg ret; /**< function return value. */
+ } func;
+ struct {
+ void *val; /**< actual memory location */
+ struct rte_bpf_arg desc; /**< type, size, etc. */
+ } var; /**< external variable */
+ };
};
/**
diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
index 961f9bef..9666e90c 100644
--- a/lib/librte_cmdline/cmdline_parse.c
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -208,9 +208,6 @@ cmdline_parse(struct cmdline *cl, const char * buf)
int err = CMDLINE_PARSE_NOMATCH;
int tok;
cmdline_parse_ctx_t *ctx;
-#ifdef RTE_LIBRTE_CMDLINE_DEBUG
- char debug_buf[BUFSIZ];
-#endif
char *result_buf = result.buf;
if (!cl || !buf)
@@ -250,10 +247,8 @@ cmdline_parse(struct cmdline *cl, const char * buf)
return linelen;
}
-#ifdef RTE_LIBRTE_CMDLINE_DEBUG
- strlcpy(debug_buf, buf, (linelen > 64 ? 64 : linelen));
- debug_printf("Parse line : len=%d, <%s>\n", linelen, debug_buf);
-#endif
+ debug_printf("Parse line : len=%d, <%.*s>\n",
+ linelen, linelen > 64 ? 64 : linelen, buf);
/* parse it !! */
inst = ctx[inst_num];
diff --git a/lib/librte_compat/Makefile b/lib/librte_compat/Makefile
index 0c57533c..61089fe7 100644
--- a/lib/librte_compat/Makefile
+++ b/lib/librte_compat/Makefile
@@ -1,33 +1,6 @@
-# BSD LICENSE
-#
-# Copyright(c) 2013 Neil Horman <nhorman@tuxdriver.com>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-# * Neither the name of Intel Corporation nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2013 Neil Horman <nhorman@tuxdriver.com>
+# All rights reserved.
include $(RTE_SDK)/mk/rte.vars.mk
diff --git a/lib/librte_compressdev/rte_comp.c b/lib/librte_compressdev/rte_comp.c
index d596ba87..98ad0cfd 100644
--- a/lib/librte_compressdev/rte_comp.c
+++ b/lib/librte_compressdev/rte_comp.c
@@ -14,8 +14,12 @@ rte_comp_get_feature_name(uint64_t flag)
return "STATEFUL_COMPRESSION";
case RTE_COMP_FF_STATEFUL_DECOMPRESSION:
return "STATEFUL_DECOMPRESSION";
- case RTE_COMP_FF_MBUF_SCATTER_GATHER:
- return "MBUF_SCATTER_GATHER";
+ case RTE_COMP_FF_OOP_SGL_IN_SGL_OUT:
+ return "OOP_SGL_IN_SGL_OUT";
+ case RTE_COMP_FF_OOP_SGL_IN_LB_OUT:
+ return "OOP_SGL_IN_LB_OUT";
+ case RTE_COMP_FF_OOP_LB_IN_SGL_OUT:
+ return "OOP_LB_IN_SGL_OUT";
case RTE_COMP_FF_MULTI_PKT_CHECKSUM:
return "MULTI_PKT_CHECKSUM";
case RTE_COMP_FF_ADLER32_CHECKSUM:
@@ -32,6 +36,10 @@ rte_comp_get_feature_name(uint64_t flag)
return "SHA2_SHA256_HASH";
case RTE_COMP_FF_SHAREABLE_PRIV_XFORM:
return "SHAREABLE_PRIV_XFORM";
+ case RTE_COMP_FF_HUFFMAN_FIXED:
+ return "HUFFMAN_FIXED";
+ case RTE_COMP_FF_HUFFMAN_DYNAMIC:
+ return "HUFFMAN_DYNAMIC";
default:
return NULL;
}
diff --git a/lib/librte_compressdev/rte_comp.h b/lib/librte_compressdev/rte_comp.h
index 5b513c77..ee9056ea 100644
--- a/lib/librte_compressdev/rte_comp.h
+++ b/lib/librte_compressdev/rte_comp.h
@@ -30,28 +30,43 @@ extern "C" {
/**< Stateful compression is supported */
#define RTE_COMP_FF_STATEFUL_DECOMPRESSION (1ULL << 1)
/**< Stateful decompression is supported */
-#define RTE_COMP_FF_MBUF_SCATTER_GATHER (1ULL << 2)
-/**< Scatter-gather mbufs are supported */
-#define RTE_COMP_FF_ADLER32_CHECKSUM (1ULL << 3)
+#define RTE_COMP_FF_OOP_SGL_IN_SGL_OUT (1ULL << 2)
+/**< Out-of-place Scatter-gather (SGL) buffers,
+ * with multiple segments, are supported in input and output
+ */
+#define RTE_COMP_FF_OOP_SGL_IN_LB_OUT (1ULL << 3)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in input, combined with linear buffers (LB), with a
+ * single segment, in output
+ */
+#define RTE_COMP_FF_OOP_LB_IN_SGL_OUT (1ULL << 4)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in output, combined with linear buffers (LB) in input
+ */
+#define RTE_COMP_FF_ADLER32_CHECKSUM (1ULL << 5)
/**< Adler-32 Checksum is supported */
-#define RTE_COMP_FF_CRC32_CHECKSUM (1ULL << 4)
+#define RTE_COMP_FF_CRC32_CHECKSUM (1ULL << 6)
/**< CRC32 Checksum is supported */
-#define RTE_COMP_FF_CRC32_ADLER32_CHECKSUM (1ULL << 5)
+#define RTE_COMP_FF_CRC32_ADLER32_CHECKSUM (1ULL << 7)
/**< Adler-32/CRC32 Checksum is supported */
-#define RTE_COMP_FF_MULTI_PKT_CHECKSUM (1ULL << 6)
+#define RTE_COMP_FF_MULTI_PKT_CHECKSUM (1ULL << 8)
/**< Generation of checksum across multiple stateless packets is supported */
-#define RTE_COMP_FF_SHA1_HASH (1ULL << 7)
+#define RTE_COMP_FF_SHA1_HASH (1ULL << 9)
/**< SHA1 Hash is supported */
-#define RTE_COMP_FF_SHA2_SHA256_HASH (1ULL << 8)
+#define RTE_COMP_FF_SHA2_SHA256_HASH (1ULL << 10)
/**< SHA256 Hash of SHA2 family is supported */
-#define RTE_COMP_FF_NONCOMPRESSED_BLOCKS (1ULL << 9)
+#define RTE_COMP_FF_NONCOMPRESSED_BLOCKS (1ULL << 11)
/**< Creation of non-compressed blocks using RTE_COMP_LEVEL_NONE is supported */
-#define RTE_COMP_FF_SHAREABLE_PRIV_XFORM (1ULL << 10)
+#define RTE_COMP_FF_SHAREABLE_PRIV_XFORM (1ULL << 12)
/**< Private xforms created by the PMD can be shared
* across multiple stateless operations. If not set, then app needs
* to create as many priv_xforms as it expects to have stateless
* operations in-flight.
*/
+#define RTE_COMP_FF_HUFFMAN_FIXED (1ULL << 13)
+/**< Fixed huffman encoding is supported */
+#define RTE_COMP_FF_HUFFMAN_DYNAMIC (1ULL << 14)
+/**< Dynamic huffman encoding is supported */
/** Status of comp operation */
enum rte_comp_op_status {
diff --git a/lib/librte_compressdev/rte_compressdev.c b/lib/librte_compressdev/rte_compressdev.c
index 6a38917d..9091dd6e 100644
--- a/lib/librte_compressdev/rte_compressdev.c
+++ b/lib/librte_compressdev/rte_compressdev.c
@@ -764,10 +764,7 @@ rte_compressdev_name_get(uint8_t dev_id)
return dev->data->name;
}
-RTE_INIT(rte_compressdev_log);
-
-static void
-rte_compressdev_log(void)
+RTE_INIT(rte_compressdev_log)
{
compressdev_logtype = rte_log_register("lib.compressdev");
if (compressdev_logtype >= 0)
diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile
index bba8dee9..c1148887 100644
--- a/lib/librte_cryptodev/Makefile
+++ b/lib/librte_cryptodev/Makefile
@@ -23,6 +23,7 @@ SYMLINK-y-include += rte_crypto.h
SYMLINK-y-include += rte_crypto_sym.h
SYMLINK-y-include += rte_cryptodev.h
SYMLINK-y-include += rte_cryptodev_pmd.h
+SYMLINK-y-include += rte_crypto_asym.h
# versioning export map
EXPORT_MAP := rte_cryptodev_version.map
diff --git a/lib/librte_cryptodev/meson.build b/lib/librte_cryptodev/meson.build
index bd5fed89..295f509e 100644
--- a/lib/librte_cryptodev/meson.build
+++ b/lib/librte_cryptodev/meson.build
@@ -6,5 +6,6 @@ sources = files('rte_cryptodev.c', 'rte_cryptodev_pmd.c')
headers = files('rte_cryptodev.h',
'rte_cryptodev_pmd.h',
'rte_crypto.h',
- 'rte_crypto_sym.h')
+ 'rte_crypto_sym.h',
+ 'rte_crypto_asym.h')
deps += ['kvargs', 'mbuf']
diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h
index 25404264..fd5ef3a8 100644
--- a/lib/librte_cryptodev/rte_crypto.h
+++ b/lib/librte_cryptodev/rte_crypto.h
@@ -23,6 +23,7 @@ extern "C" {
#include <rte_common.h>
#include "rte_crypto_sym.h"
+#include "rte_crypto_asym.h"
/** Crypto operation types */
enum rte_crypto_op_type {
@@ -30,6 +31,8 @@ enum rte_crypto_op_type {
/**< Undefined operation type */
RTE_CRYPTO_OP_TYPE_SYMMETRIC,
/**< Symmetric operation */
+ RTE_CRYPTO_OP_TYPE_ASYMMETRIC
+ /**< Asymmetric operation */
};
/** Status of crypto operation */
@@ -73,26 +76,37 @@ enum rte_crypto_op_sess_type {
* rte_cryptodev_enqueue_burst() / rte_cryptodev_dequeue_burst() .
*/
struct rte_crypto_op {
- uint8_t type;
- /**< operation type */
- uint8_t status;
- /**<
- * operation status - this is reset to
- * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation from mempool and
- * will be set to RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation
- * is successfully processed by a crypto PMD
- */
- uint8_t sess_type;
- /**< operation session type */
- uint16_t private_data_offset;
- /**< Offset to indicate start of private data (if any). The offset
- * is counted from the start of the rte_crypto_op including IV.
- * The private data may be used by the application to store
- * information which should remain untouched in the library/driver
- */
-
- uint8_t reserved[3];
- /**< Reserved bytes to fill 64 bits for future additions */
+ __extension__
+ union {
+ uint64_t raw;
+ __extension__
+ struct {
+ uint8_t type;
+ /**< operation type */
+ uint8_t status;
+ /**<
+ * operation status - this is reset to
+ * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation
+ * from mempool and will be set to
+ * RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation
+ * is successfully processed by a crypto PMD
+ */
+ uint8_t sess_type;
+ /**< operation session type */
+ uint8_t reserved[3];
+ /**< Reserved bytes to fill 64 bits for
+ * future additions
+ */
+ uint16_t private_data_offset;
+ /**< Offset to indicate start of private data (if any).
+ * The offset is counted from the start of the
+ * rte_crypto_op including IV.
+ * The private data may be used by the application
+ * to store information which should remain untouched
+ * in the library/driver
+ */
+ };
+ };
struct rte_mempool *mempool;
/**< crypto operation mempool which operation is allocated from */
@@ -103,6 +117,10 @@ struct rte_crypto_op {
union {
struct rte_crypto_sym_op sym[0];
/**< Symmetric operation parameters */
+
+ struct rte_crypto_asym_op asym[0];
+ /**< Asymmetric operation parameters */
+
}; /**< operation specific parameters */
};
@@ -123,6 +141,9 @@ __rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type)
case RTE_CRYPTO_OP_TYPE_SYMMETRIC:
__rte_crypto_sym_op_reset(op->sym);
break;
+ case RTE_CRYPTO_OP_TYPE_ASYMMETRIC:
+ memset(op->asym, 0, sizeof(struct rte_crypto_asym_op));
+ break;
case RTE_CRYPTO_OP_TYPE_UNDEFINED:
default:
break;
@@ -289,9 +310,14 @@ __rte_crypto_op_get_priv_data(struct rte_crypto_op *op, uint32_t size)
if (likely(op->mempool != NULL)) {
priv_size = __rte_crypto_op_get_priv_data_size(op->mempool);
- if (likely(priv_size >= size))
- return (void *)((uint8_t *)(op + 1) +
+ if (likely(priv_size >= size)) {
+ if (op->type == RTE_CRYPTO_OP_TYPE_SYMMETRIC)
+ return (void *)((uint8_t *)(op + 1) +
sizeof(struct rte_crypto_sym_op));
+ if (op->type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC)
+ return (void *)((uint8_t *)(op + 1) +
+ sizeof(struct rte_crypto_asym_op));
+ }
}
return NULL;
@@ -394,6 +420,24 @@ rte_crypto_op_attach_sym_session(struct rte_crypto_op *op,
return __rte_crypto_sym_op_attach_sym_session(op->sym, sess);
}
+/**
+ * Attach a asymmetric session to a crypto operation
+ *
+ * @param op crypto operation, must be of type asymmetric
+ * @param sess cryptodev session
+ */
+static inline int
+rte_crypto_op_attach_asym_session(struct rte_crypto_op *op,
+ struct rte_cryptodev_asym_session *sess)
+{
+ if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_ASYMMETRIC))
+ return -1;
+
+ op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+ op->asym->session = sess;
+ return 0;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_cryptodev/rte_crypto_asym.h b/lib/librte_cryptodev/rte_crypto_asym.h
new file mode 100644
index 00000000..5e185b2d
--- /dev/null
+++ b/lib/librte_cryptodev/rte_crypto_asym.h
@@ -0,0 +1,496 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium Networks
+ */
+
+#ifndef _RTE_CRYPTO_ASYM_H_
+#define _RTE_CRYPTO_ASYM_H_
+
+/**
+ * @file rte_crypto_asym.h
+ *
+ * RTE Definitions for Asymmetric Cryptography
+ *
+ * Defines asymmetric algorithms and modes, as well as supported
+ * asymmetric crypto operations.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <string.h>
+#include <stdint.h>
+
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_common.h>
+
+typedef struct rte_crypto_param_t {
+ uint8_t *data;
+ /**< pointer to buffer holding data */
+ rte_iova_t iova;
+ /**< IO address of data buffer */
+ size_t length;
+ /**< length of data in bytes */
+} rte_crypto_param;
+
+/** asym xform type name strings */
+extern const char *
+rte_crypto_asym_xform_strings[];
+
+/** asym operations type name strings */
+extern const char *
+rte_crypto_asym_op_strings[];
+
+/**
+ * Asymmetric crypto transformation types.
+ * Each xform type maps to one asymmetric algorithm
+ * performing specific operation
+ *
+ */
+enum rte_crypto_asym_xform_type {
+ RTE_CRYPTO_ASYM_XFORM_UNSPECIFIED = 0,
+ /**< Invalid xform. */
+ RTE_CRYPTO_ASYM_XFORM_NONE,
+ /**< Xform type None.
+ * May be supported by PMD to support
+ * passthrough op for debugging purpose.
+ * if xform_type none , op_type is disregarded.
+ */
+ RTE_CRYPTO_ASYM_XFORM_RSA,
+ /**< RSA. Performs Encrypt, Decrypt, Sign and Verify.
+ * Refer to rte_crypto_asym_op_type
+ */
+ RTE_CRYPTO_ASYM_XFORM_DH,
+ /**< Diffie-Hellman.
+ * Performs Key Generate and Shared Secret Compute.
+ * Refer to rte_crypto_asym_op_type
+ */
+ RTE_CRYPTO_ASYM_XFORM_DSA,
+ /**< Digital Signature Algorithm
+ * Performs Signature Generation and Verification.
+ * Refer to rte_crypto_asym_op_type
+ */
+ RTE_CRYPTO_ASYM_XFORM_MODINV,
+ /**< Modular Inverse
+ * Perform Modulus inverse b^(-1) mod n
+ */
+ RTE_CRYPTO_ASYM_XFORM_MODEX,
+ /**< Modular Exponentiation
+ * Perform Modular Exponentiation b^e mod n
+ */
+ RTE_CRYPTO_ASYM_XFORM_TYPE_LIST_END
+ /**< End of list */
+};
+
+/**
+ * Asymmetric crypto operation type variants
+ */
+enum rte_crypto_asym_op_type {
+ RTE_CRYPTO_ASYM_OP_ENCRYPT,
+ /**< Asymmetric Encrypt operation */
+ RTE_CRYPTO_ASYM_OP_DECRYPT,
+ /**< Asymmetric Decrypt operation */
+ RTE_CRYPTO_ASYM_OP_SIGN,
+ /**< Signature Generation operation */
+ RTE_CRYPTO_ASYM_OP_VERIFY,
+ /**< Signature Verification operation */
+ RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE,
+ /**< DH Private Key generation operation */
+ RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE,
+ /**< DH Public Key generation operation */
+ RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE,
+ /**< DH Shared Secret compute operation */
+ RTE_CRYPTO_ASYM_OP_LIST_END
+};
+
+/**
+ * Padding types for RSA signature.
+ */
+enum rte_crypto_rsa_padding_type {
+ RTE_CRYPTO_RSA_PADDING_NONE = 0,
+ /**< RSA no padding scheme */
+ RTE_CRYPTO_RSA_PKCS1_V1_5_BT0,
+ /**< RSA PKCS#1 V1.5 Block Type 0 padding scheme
+ * as descibed in rfc2313
+ */
+ RTE_CRYPTO_RSA_PKCS1_V1_5_BT1,
+ /**< RSA PKCS#1 V1.5 Block Type 01 padding scheme
+ * as descibed in rfc2313
+ */
+ RTE_CRYPTO_RSA_PKCS1_V1_5_BT2,
+ /**< RSA PKCS#1 V1.5 Block Type 02 padding scheme
+ * as descibed in rfc2313
+ */
+ RTE_CRYPTO_RSA_PADDING_OAEP,
+ /**< RSA PKCS#1 OAEP padding scheme */
+ RTE_CRYPTO_RSA_PADDING_PSS,
+ /**< RSA PKCS#1 PSS padding scheme */
+ RTE_CRYPTO_RSA_PADDING_TYPE_LIST_END
+};
+
+/**
+ * RSA private key type enumeration
+ *
+ * enumerates private key format required to perform RSA crypto
+ * transform.
+ *
+ */
+enum rte_crypto_rsa_priv_key_type {
+ RTE_RSA_KEY_TYPE_EXP,
+ /**< RSA private key is an exponent */
+ RTE_RSA_KET_TYPE_QT,
+ /**< RSA private key is in quintuple format
+ * See rte_crypto_rsa_priv_key_qt
+ */
+};
+
+/**
+ * Structure describing RSA private key in quintuple format.
+ * See PKCS V1.5 RSA Cryptography Standard.
+ */
+struct rte_crypto_rsa_priv_key_qt {
+ rte_crypto_param p;
+ /**< p - Private key component P
+ * Private key component of RSA parameter required for CRT method
+ * of private key operations in Octet-string network byte order
+ * format.
+ */
+
+ rte_crypto_param q;
+ /**< q - Private key component Q
+ * Private key component of RSA parameter required for CRT method
+ * of private key operations in Octet-string network byte order
+ * format.
+ */
+
+ rte_crypto_param dP;
+ /**< dP - Private CRT component
+ * Private CRT component of RSA parameter required for CRT method
+ * RSA private key operations in Octet-string network byte order
+ * format.
+ * dP = d mod ( p - 1 )
+ */
+
+ rte_crypto_param dQ;
+ /**< dQ - Private CRT component
+ * Private CRT component of RSA parameter required for CRT method
+ * RSA private key operations in Octet-string network byte order
+ * format.
+ * dQ = d mod ( q - 1 )
+ */
+
+ rte_crypto_param qInv;
+ /**< qInv - Private CRT component
+ * Private CRT component of RSA parameter required for CRT method
+ * RSA private key operations in Octet-string network byte order
+ * format.
+ * qInv = inv q mod p
+ */
+};
+
+/**
+ * Asymmetric RSA transform data
+ *
+ * Structure describing RSA xform params
+ *
+ */
+struct rte_crypto_rsa_xform {
+ rte_crypto_param n;
+ /**< n - Prime modulus
+ * Prime modulus data of RSA operation in Octet-string network
+ * byte order format.
+ */
+
+ rte_crypto_param e;
+ /**< e - Public key exponent
+ * Public key exponent used for RSA public key operations in Octet-
+ * string network byte order format.
+ */
+
+ enum rte_crypto_rsa_priv_key_type key_type;
+
+ __extension__
+ union {
+ rte_crypto_param d;
+ /**< d - Private key exponent
+ * Private key exponent used for RSA
+ * private key operations in
+ * Octet-string network byte order format.
+ */
+
+ struct rte_crypto_rsa_priv_key_qt qt;
+ /**< qt - Private key in quintuple format */
+ };
+};
+
+/**
+ * Asymmetric Modular exponentiation transform data
+ *
+ * Structure describing modular exponentation xform param
+ *
+ */
+struct rte_crypto_modex_xform {
+ rte_crypto_param modulus;
+ /**< modulus
+ * Prime modulus of the modexp transform operation in octet-string
+ * network byte order format.
+ */
+
+ rte_crypto_param exponent;
+ /**< exponent
+ * Private exponent of the modexp transform operation in
+ * octet-string network byte order format.
+ */
+};
+
+/**
+ * Asymmetric modular inverse transform operation
+ *
+ * Structure describing modulus inverse xform params
+ *
+ */
+struct rte_crypto_modinv_xform {
+ rte_crypto_param modulus;
+ /**<
+ * Pointer to the prime modulus data for modular
+ * inverse operation in octet-string network byte
+ * order format.
+ */
+};
+
+/**
+ * Asymmetric DH transform data
+ *
+ * Structure describing deffie-hellman xform params
+ *
+ */
+struct rte_crypto_dh_xform {
+ enum rte_crypto_asym_op_type type;
+ /**< Setup xform for key generate or shared secret compute */
+
+ rte_crypto_param p;
+ /**< p : Prime modulus data
+ * DH prime modulous data in octet-string network byte order format.
+ *
+ */
+
+ rte_crypto_param g;
+ /**< g : Generator
+ * DH group generator data in octet-string network byte order
+ * format.
+ *
+ */
+};
+
+/**
+ * Asymmetric Digital Signature transform operation
+ *
+ * Structure describing DSA xform params
+ *
+ */
+struct rte_crypto_dsa_xform {
+ rte_crypto_param p;
+ /**< p - Prime modulus
+ * Prime modulus data for DSA operation in Octet-string network byte
+ * order format.
+ */
+ rte_crypto_param q;
+ /**< q : Order of the subgroup.
+ * Order of the subgroup data in Octet-string network byte order
+ * format.
+ * (p-1) % q = 0
+ */
+ rte_crypto_param g;
+ /**< g: Generator of the subgroup
+ * Generator data in Octet-string network byte order format.
+ */
+ rte_crypto_param x;
+ /**< x: Private key of the signer in octet-string network
+ * byte order format.
+ * Used when app has pre-defined private key.
+ * Valid only when xform chain is DSA ONLY.
+ * if xform chain is DH private key generate + DSA, then DSA sign
+ * compute will use internally generated key.
+ */
+};
+
+/**
+ * Operations params for modular operations:
+ * exponentiation and invert
+ *
+ */
+struct rte_crypto_mod_op_param {
+ rte_crypto_param base;
+ /**<
+ * Pointer to base of modular exponentiation/inversion data in
+ * Octet-string network byte order format.
+ */
+};
+
+/**
+ * Asymmetric crypto transform data
+ *
+ * Structure describing asym xforms.
+ */
+struct rte_crypto_asym_xform {
+ struct rte_crypto_asym_xform *next;
+ /**< Pointer to next xform to set up xform chain.*/
+ enum rte_crypto_asym_xform_type xform_type;
+ /**< Asymmetric crypto transform */
+
+ __extension__
+ union {
+ struct rte_crypto_rsa_xform rsa;
+ /**< RSA xform parameters */
+
+ struct rte_crypto_modex_xform modex;
+ /**< Modular Exponentiation xform parameters */
+
+ struct rte_crypto_modinv_xform modinv;
+ /**< Modulus Inverse xform parameters */
+
+ struct rte_crypto_dh_xform dh;
+ /**< DH xform parameters */
+
+ struct rte_crypto_dsa_xform dsa;
+ /**< DSA xform parameters */
+ };
+};
+
+struct rte_cryptodev_asym_session;
+
+/**
+ * RSA operation params
+ *
+ */
+struct rte_crypto_rsa_op_param {
+ enum rte_crypto_asym_op_type op_type;
+ /**< Type of RSA operation for transform */;
+
+ rte_crypto_param message;
+ /**<
+ * Pointer to data
+ * - to be encrypted for RSA public encrypt.
+ * - to be decrypted for RSA private decrypt.
+ * - to be signed for RSA sign generation.
+ * - to be authenticated for RSA sign verification.
+ */
+
+ rte_crypto_param sign;
+ /**<
+ * Pointer to RSA signature data. If operation is RSA
+ * sign @ref RTE_CRYPTO_ASYM_OP_SIGN, buffer will be
+ * over-written with generated signature.
+ *
+ * Length of the signature data will be equal to the
+ * RSA prime modulus length.
+ */
+
+ enum rte_crypto_rsa_padding_type pad;
+ /**< RSA padding scheme to be used for transform */
+
+ enum rte_crypto_auth_algorithm md;
+ /**< Hash algorithm to be used for data hash if padding
+ * scheme is either OAEP or PSS. Valid hash algorithms
+ * are:
+ * MD5, SHA1, SHA224, SHA256, SHA384, SHA512
+ */
+
+ enum rte_crypto_auth_algorithm mgf1md;
+ /**<
+ * Hash algorithm to be used for mask generation if
+ * padding scheme is either OAEP or PSS. If padding
+ * scheme is unspecified data hash algorithm is used
+ * for mask generation. Valid hash algorithms are:
+ * MD5, SHA1, SHA224, SHA256, SHA384, SHA512
+ */
+};
+
+/**
+ * Diffie-Hellman Operations params.
+ * @note:
+ */
+struct rte_crypto_dh_op_param {
+ rte_crypto_param pub_key;
+ /**<
+ * Output generated public key when xform type is
+ * DH PUB_KEY_GENERATION.
+ * Input peer public key when xform type is DH
+ * SHARED_SECRET_COMPUTATION
+ * pub_key is in octet-string network byte order format.
+ *
+ */
+
+ rte_crypto_param priv_key;
+ /**<
+ * Output generated private key if xform type is
+ * DH PRIVATE_KEY_GENERATION
+ * Input when xform type is DH SHARED_SECRET_COMPUTATION.
+ * priv_key is in octet-string network byte order format.
+ *
+ */
+
+ rte_crypto_param shared_secret;
+ /**<
+ * Output with calculated shared secret
+ * when dh xform set up with op type = SHARED_SECRET_COMPUTATION.
+ * shared_secret is an octet-string network byte order format.
+ *
+ */
+};
+
+/**
+ * DSA Operations params
+ *
+ */
+struct rte_crypto_dsa_op_param {
+ enum rte_crypto_asym_op_type op_type;
+ /**< Signature Generation or Verification */
+ rte_crypto_param message;
+ /**< input message to be signed or verified */
+ rte_crypto_param r;
+ /**< dsa sign component 'r' value
+ *
+ * output if op_type = sign generate,
+ * input if op_type = sign verify
+ */
+ rte_crypto_param s;
+ /**< dsa sign component 's' value
+ *
+ * output if op_type = sign generate,
+ * input if op_type = sign verify
+ */
+ rte_crypto_param y;
+ /**< y : Public key of the signer.
+ * Public key data of the signer in Octet-string network byte order
+ * format.
+ * y = g^x mod p
+ */
+};
+
+/**
+ * Asymmetric Cryptographic Operation.
+ *
+ * Structure describing asymmetric crypto operation params.
+ *
+ */
+struct rte_crypto_asym_op {
+ struct rte_cryptodev_asym_session *session;
+ /**< Handle for the initialised session context */
+
+ __extension__
+ union {
+ struct rte_crypto_rsa_op_param rsa;
+ struct rte_crypto_mod_op_param modex;
+ struct rte_crypto_mod_op_param modinv;
+ struct rte_crypto_dh_op_param dh;
+ struct rte_crypto_dsa_op_param dsa;
+ };
+} __rte_cache_aligned;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CRYPTO_ASYM_H_ */
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c
index 7e582124..63ae23f0 100644
--- a/lib/librte_cryptodev/rte_cryptodev.c
+++ b/lib/librte_cryptodev/rte_cryptodev.c
@@ -166,6 +166,31 @@ rte_crypto_aead_operation_strings[] = {
[RTE_CRYPTO_AEAD_OP_DECRYPT] = "decrypt"
};
+/**
+ * Asymmetric crypto transform operation strings identifiers.
+ */
+const char *rte_crypto_asym_xform_strings[] = {
+ [RTE_CRYPTO_ASYM_XFORM_NONE] = "none",
+ [RTE_CRYPTO_ASYM_XFORM_RSA] = "rsa",
+ [RTE_CRYPTO_ASYM_XFORM_MODEX] = "modexp",
+ [RTE_CRYPTO_ASYM_XFORM_MODINV] = "modinv",
+ [RTE_CRYPTO_ASYM_XFORM_DH] = "dh",
+ [RTE_CRYPTO_ASYM_XFORM_DSA] = "dsa",
+};
+
+/**
+ * Asymmetric crypto operation strings identifiers.
+ */
+const char *rte_crypto_asym_op_strings[] = {
+ [RTE_CRYPTO_ASYM_OP_ENCRYPT] = "encrypt",
+ [RTE_CRYPTO_ASYM_OP_DECRYPT] = "decrypt",
+ [RTE_CRYPTO_ASYM_OP_SIGN] = "sign",
+ [RTE_CRYPTO_ASYM_OP_VERIFY] = "verify",
+ [RTE_CRYPTO_ASYM_OP_PRIVATE_KEY_GENERATE] = "priv_key_generate",
+ [RTE_CRYPTO_ASYM_OP_PUBLIC_KEY_GENERATE] = "pub_key_generate",
+ [RTE_CRYPTO_ASYM_OP_SHARED_SECRET_COMPUTE] = "sharedsecret_compute",
+};
+
int
rte_cryptodev_get_cipher_algo_enum(enum rte_crypto_cipher_algorithm *algo_enum,
const char *algo_string)
@@ -217,6 +242,24 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
return -1;
}
+int __rte_experimental
+rte_cryptodev_asym_get_xform_enum(enum rte_crypto_asym_xform_type *xform_enum,
+ const char *xform_string)
+{
+ unsigned int i;
+
+ for (i = 1; i < RTE_DIM(rte_crypto_asym_xform_strings); i++) {
+ if (strcmp(xform_string,
+ rte_crypto_asym_xform_strings[i]) == 0) {
+ *xform_enum = (enum rte_crypto_asym_xform_type) i;
+ return 0;
+ }
+ }
+
+ /* Invalid string */
+ return -1;
+}
+
/**
* The crypto auth operation strings identifiers.
* It could be used in application command line.
@@ -287,6 +330,28 @@ param_range_check(uint16_t size, const struct rte_crypto_param_range *range)
return -1;
}
+const struct rte_cryptodev_asymmetric_xform_capability * __rte_experimental
+rte_cryptodev_asym_capability_get(uint8_t dev_id,
+ const struct rte_cryptodev_asym_capability_idx *idx)
+{
+ const struct rte_cryptodev_capabilities *capability;
+ struct rte_cryptodev_info dev_info;
+ unsigned int i = 0;
+
+ memset(&dev_info, 0, sizeof(struct rte_cryptodev_info));
+ rte_cryptodev_info_get(dev_id, &dev_info);
+
+ while ((capability = &dev_info.capabilities[i++])->op !=
+ RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+ if (capability->op != RTE_CRYPTO_OP_TYPE_ASYMMETRIC)
+ continue;
+
+ if (capability->asym.xform_capa.xform_type == idx->type)
+ return &capability->asym.xform_capa;
+ }
+ return NULL;
+};
+
int
rte_cryptodev_sym_capability_check_cipher(
const struct rte_cryptodev_symmetric_capability *capability,
@@ -338,6 +403,42 @@ rte_cryptodev_sym_capability_check_aead(
return 0;
}
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_optype(
+ const struct rte_cryptodev_asymmetric_xform_capability *capability,
+ enum rte_crypto_asym_op_type op_type)
+{
+ if (capability->op_types & (1 << op_type))
+ return 1;
+
+ return 0;
+}
+
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_modlen(
+ const struct rte_cryptodev_asymmetric_xform_capability *capability,
+ uint16_t modlen)
+{
+ /* no need to check for limits, if min or max = 0 */
+ if (capability->modlen.min != 0) {
+ if (modlen < capability->modlen.min)
+ return -1;
+ }
+
+ if (capability->modlen.max != 0) {
+ if (modlen > capability->modlen.max)
+ return -1;
+ }
+
+ /* in any case, check if given modlen is module increment */
+ if (capability->modlen.increment != 0) {
+ if (modlen % (capability->modlen.increment))
+ return -1;
+ }
+
+ return 0;
+}
+
const char *
rte_cryptodev_get_feature_name(uint64_t flag)
@@ -361,8 +462,16 @@ rte_cryptodev_get_feature_name(uint64_t flag)
return "CPU_AESNI";
case RTE_CRYPTODEV_FF_HW_ACCELERATED:
return "HW_ACCELERATED";
- case RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER:
- return "MBUF_SCATTER_GATHER";
+ case RTE_CRYPTODEV_FF_IN_PLACE_SGL:
+ return "IN_PLACE_SGL";
+ case RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT:
+ return "OOP_SGL_IN_SGL_OUT";
+ case RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT:
+ return "OOP_SGL_IN_LB_OUT";
+ case RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT:
+ return "OOP_LB_IN_SGL_OUT";
+ case RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT:
+ return "OOP_LB_IN_LB_OUT";
case RTE_CRYPTODEV_FF_CPU_NEON:
return "CPU_NEON";
case RTE_CRYPTODEV_FF_CPU_ARM_CE:
@@ -703,50 +812,6 @@ rte_cryptodev_queue_pairs_config(struct rte_cryptodev *dev, uint16_t nb_qpairs,
}
int
-rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id)
-{
- struct rte_cryptodev *dev;
-
- if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
- CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
- return -EINVAL;
- }
-
- dev = &rte_crypto_devices[dev_id];
- if (queue_pair_id >= dev->data->nb_queue_pairs) {
- CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
- return -EINVAL;
- }
-
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_start, -ENOTSUP);
-
- return dev->dev_ops->queue_pair_start(dev, queue_pair_id);
-
-}
-
-int
-rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id)
-{
- struct rte_cryptodev *dev;
-
- if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
- CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id);
- return -EINVAL;
- }
-
- dev = &rte_crypto_devices[dev_id];
- if (queue_pair_id >= dev->data->nb_queue_pairs) {
- CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id);
- return -EINVAL;
- }
-
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_stop, -ENOTSUP);
-
- return dev->dev_ops->queue_pair_stop(dev, queue_pair_id);
-
-}
-
-int
rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config)
{
struct rte_cryptodev *dev;
@@ -966,6 +1031,7 @@ rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info)
(*dev->dev_ops->dev_infos_get)(dev, dev_info);
dev_info->driver_name = dev->device->driver->name;
+ dev_info->device = dev->device;
}
@@ -1098,8 +1164,46 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
index = dev->driver_id;
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->sym_session_configure, -ENOTSUP);
+
if (sess->sess_private_data[index] == NULL) {
- ret = dev->dev_ops->session_configure(dev, xforms, sess, mp);
+ ret = dev->dev_ops->sym_session_configure(dev, xforms,
+ sess, mp);
+ if (ret < 0) {
+ CDEV_LOG_ERR(
+ "dev_id %d failed to configure session details",
+ dev_id);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int __rte_experimental
+rte_cryptodev_asym_session_init(uint8_t dev_id,
+ struct rte_cryptodev_asym_session *sess,
+ struct rte_crypto_asym_xform *xforms,
+ struct rte_mempool *mp)
+{
+ struct rte_cryptodev *dev;
+ uint8_t index;
+ int ret;
+
+ dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+ if (sess == NULL || xforms == NULL || dev == NULL)
+ return -EINVAL;
+
+ index = dev->driver_id;
+
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->asym_session_configure,
+ -ENOTSUP);
+
+ if (sess->sess_private_data[index] == NULL) {
+ ret = dev->dev_ops->asym_session_configure(dev,
+ xforms,
+ sess, mp);
if (ret < 0) {
CDEV_LOG_ERR(
"dev_id %d failed to configure session details",
@@ -1123,70 +1227,53 @@ rte_cryptodev_sym_session_create(struct rte_mempool *mp)
}
/* Clear device session pointer.
- * Include the flag indicating presence of private data
+ * Include the flag indicating presence of user data
*/
memset(sess, 0, (sizeof(void *) * nb_drivers) + sizeof(uint8_t));
return sess;
}
-int
-rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id,
- struct rte_cryptodev_sym_session *sess)
+struct rte_cryptodev_asym_session * __rte_experimental
+rte_cryptodev_asym_session_create(struct rte_mempool *mp)
{
- struct rte_cryptodev *dev;
+ struct rte_cryptodev_asym_session *sess;
- if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
- CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
- return -EINVAL;
+ /* Allocate a session structure from the session pool */
+ if (rte_mempool_get(mp, (void **)&sess)) {
+ CDEV_LOG_ERR("couldn't get object from session mempool");
+ return NULL;
}
- dev = &rte_crypto_devices[dev_id];
-
- /* The API is optional, not returning error if driver do not suuport */
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_attach_session, 0);
-
- void *sess_priv = get_session_private_data(sess, dev->driver_id);
-
- if (dev->dev_ops->qp_attach_session(dev, qp_id, sess_priv)) {
- CDEV_LOG_ERR("dev_id %d failed to attach qp: %d with session",
- dev_id, qp_id);
- return -EPERM;
- }
+ /* Clear device session pointer.
+ * Include the flag indicating presence of private data
+ */
+ memset(sess, 0, (sizeof(void *) * nb_drivers) + sizeof(uint8_t));
- return 0;
+ return sess;
}
int
-rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id,
+rte_cryptodev_sym_session_clear(uint8_t dev_id,
struct rte_cryptodev_sym_session *sess)
{
struct rte_cryptodev *dev;
- if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) {
- CDEV_LOG_ERR("Invalid dev_id=%d", dev_id);
- return -EINVAL;
- }
-
- dev = &rte_crypto_devices[dev_id];
+ dev = rte_cryptodev_pmd_get_dev(dev_id);
- /* The API is optional, not returning error if driver do not suuport */
- RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->qp_detach_session, 0);
+ if (dev == NULL || sess == NULL)
+ return -EINVAL;
- void *sess_priv = get_session_private_data(sess, dev->driver_id);
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->sym_session_clear, -ENOTSUP);
- if (dev->dev_ops->qp_detach_session(dev, qp_id, sess_priv)) {
- CDEV_LOG_ERR("dev_id %d failed to detach qp: %d from session",
- dev_id, qp_id);
- return -EPERM;
- }
+ dev->dev_ops->sym_session_clear(dev, sess);
return 0;
}
-int
-rte_cryptodev_sym_session_clear(uint8_t dev_id,
- struct rte_cryptodev_sym_session *sess)
+int __rte_experimental
+rte_cryptodev_asym_session_clear(uint8_t dev_id,
+ struct rte_cryptodev_asym_session *sess)
{
struct rte_cryptodev *dev;
@@ -1195,7 +1282,9 @@ rte_cryptodev_sym_session_clear(uint8_t dev_id,
if (dev == NULL || sess == NULL)
return -EINVAL;
- dev->dev_ops->session_clear(dev, sess);
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->asym_session_clear, -ENOTSUP);
+
+ dev->dev_ops->asym_session_clear(dev, sess);
return 0;
}
@@ -1212,7 +1301,7 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess)
/* Check that all device private data has been freed */
for (i = 0; i < nb_drivers; i++) {
- sess_priv = get_session_private_data(sess, i);
+ sess_priv = get_sym_session_private_data(sess, i);
if (sess_priv != NULL)
return -EBUSY;
}
@@ -1224,27 +1313,51 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess)
return 0;
}
-unsigned int
-rte_cryptodev_get_header_session_size(void)
+int __rte_experimental
+rte_cryptodev_asym_session_free(struct rte_cryptodev_asym_session *sess)
{
- return rte_cryptodev_sym_get_header_session_size();
+ uint8_t i;
+ void *sess_priv;
+ struct rte_mempool *sess_mp;
+
+ if (sess == NULL)
+ return -EINVAL;
+
+ /* Check that all device private data has been freed */
+ for (i = 0; i < nb_drivers; i++) {
+ sess_priv = get_asym_session_private_data(sess, i);
+ if (sess_priv != NULL)
+ return -EBUSY;
+ }
+
+ /* Return session to mempool */
+ sess_mp = rte_mempool_from_obj(sess);
+ rte_mempool_put(sess_mp, sess);
+
+ return 0;
}
+
unsigned int
rte_cryptodev_sym_get_header_session_size(void)
{
/*
* Header contains pointers to the private data
* of all registered drivers, and a flag which
- * indicates presence of private data
+ * indicates presence of user data
*/
return ((sizeof(void *) * nb_drivers) + sizeof(uint8_t));
}
-unsigned int
-rte_cryptodev_get_private_session_size(uint8_t dev_id)
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_header_session_size(void)
{
- return rte_cryptodev_sym_get_private_session_size(dev_id);
+ /*
+ * Header contains pointers to the private data
+ * of all registered drivers, and a flag which
+ * indicates presence of private data
+ */
+ return ((sizeof(void *) * nb_drivers) + sizeof(uint8_t));
}
unsigned int
@@ -1259,10 +1372,10 @@ rte_cryptodev_sym_get_private_session_size(uint8_t dev_id)
dev = rte_cryptodev_pmd_get_dev(dev_id);
- if (*dev->dev_ops->session_get_size == NULL)
+ if (*dev->dev_ops->sym_session_get_size == NULL)
return 0;
- priv_sess_size = (*dev->dev_ops->session_get_size)(dev);
+ priv_sess_size = (*dev->dev_ops->sym_session_get_size)(dev);
/*
* If size is less than session header size,
@@ -1276,32 +1389,55 @@ rte_cryptodev_sym_get_private_session_size(uint8_t dev_id)
}
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_private_session_size(uint8_t dev_id)
+{
+ struct rte_cryptodev *dev;
+ unsigned int header_size = sizeof(void *) * nb_drivers;
+ unsigned int priv_sess_size;
+
+ if (!rte_cryptodev_pmd_is_valid_dev(dev_id))
+ return 0;
+
+ dev = rte_cryptodev_pmd_get_dev(dev_id);
+
+ if (*dev->dev_ops->asym_session_get_size == NULL)
+ return 0;
+
+ priv_sess_size = (*dev->dev_ops->asym_session_get_size)(dev);
+ if (priv_sess_size < header_size)
+ return header_size;
+
+ return priv_sess_size;
+
+}
+
int __rte_experimental
-rte_cryptodev_sym_session_set_private_data(
+rte_cryptodev_sym_session_set_user_data(
struct rte_cryptodev_sym_session *sess,
void *data,
uint16_t size)
{
uint16_t off_set = sizeof(void *) * nb_drivers;
- uint8_t *private_data_present = (uint8_t *)sess + off_set;
+ uint8_t *user_data_present = (uint8_t *)sess + off_set;
if (sess == NULL)
return -EINVAL;
- *private_data_present = 1;
+ *user_data_present = 1;
off_set += sizeof(uint8_t);
rte_memcpy((uint8_t *)sess + off_set, data, size);
return 0;
}
void * __rte_experimental
-rte_cryptodev_sym_session_get_private_data(
+rte_cryptodev_sym_session_get_user_data(
struct rte_cryptodev_sym_session *sess)
{
uint16_t off_set = sizeof(void *) * nb_drivers;
- uint8_t *private_data_present = (uint8_t *)sess + off_set;
+ uint8_t *user_data_present = (uint8_t *)sess + off_set;
- if (sess == NULL || !*private_data_present)
+ if (sess == NULL || !*user_data_present)
return NULL;
off_set += sizeof(uint8_t);
@@ -1335,9 +1471,17 @@ rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type,
struct rte_crypto_op_pool_private *priv;
unsigned elt_size = sizeof(struct rte_crypto_op) +
- sizeof(struct rte_crypto_sym_op) +
priv_size;
+ if (type == RTE_CRYPTO_OP_TYPE_SYMMETRIC) {
+ elt_size += sizeof(struct rte_crypto_sym_op);
+ } else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
+ elt_size += sizeof(struct rte_crypto_asym_op);
+ } else {
+ CDEV_LOG_ERR("Invalid op_type\n");
+ return NULL;
+ }
+
/* lookup mempool in case already allocated */
struct rte_mempool *mp = rte_mempool_lookup(name);
diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h
index 92ce6d49..4099823f 100644
--- a/lib/librte_cryptodev/rte_cryptodev.h
+++ b/lib/librte_cryptodev/rte_cryptodev.h
@@ -1,32 +1,5 @@
-/*-
- *
- * Copyright(c) 2015-2017 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2017 Intel Corporation.
*/
#ifndef _RTE_CRYPTODEV_H_
@@ -65,7 +38,6 @@ extern const char **rte_cyptodev_names;
RTE_FMT(RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
RTE_FMT_TAIL(__VA_ARGS__,)))
-#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
#define CDEV_LOG_DEBUG(...) \
RTE_LOG(DEBUG, CRYPTODEV, \
RTE_FMT("%s() line %u: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
@@ -76,13 +48,6 @@ extern const char **rte_cyptodev_names;
RTE_FMT("[%s] %s: " RTE_FMT_HEAD(__VA_ARGS__,) "\n", \
dev, __func__, RTE_FMT_TAIL(__VA_ARGS__,)))
-#else
-#define CDEV_LOG_DEBUG(...) (void)0
-#define CDEV_PMD_TRACE(...) (void)0
-#endif
-
-
-
/**
* A macro that points to an offset from the start
* of the crypto operation structure (rte_crypto_op)
@@ -178,6 +143,35 @@ struct rte_cryptodev_symmetric_capability {
};
};
+/**
+ * Asymmetric Xform Crypto Capability
+ *
+ */
+struct rte_cryptodev_asymmetric_xform_capability {
+ enum rte_crypto_asym_xform_type xform_type;
+ /**< Transform type: RSA/MODEXP/DH/DSA/MODINV */
+
+ uint32_t op_types;
+ /**< bitmask for supported rte_crypto_asym_op_type */
+
+ __extension__
+ union {
+ struct rte_crypto_param_range modlen;
+ /**< Range of modulus length supported by modulus based xform.
+ * Value 0 mean implementation default
+ */
+ };
+};
+
+/**
+ * Asymmetric Crypto Capability
+ *
+ */
+struct rte_cryptodev_asymmetric_capability {
+ struct rte_cryptodev_asymmetric_xform_capability xform_capa;
+};
+
+
/** Structure used to capture a capability of a crypto device */
struct rte_cryptodev_capabilities {
enum rte_crypto_op_type op;
@@ -187,6 +181,8 @@ struct rte_cryptodev_capabilities {
union {
struct rte_cryptodev_symmetric_capability sym;
/**< Symmetric operation capability parameters */
+ struct rte_cryptodev_asymmetric_capability asym;
+ /**< Asymmetric operation capability parameters */
};
};
@@ -201,7 +197,17 @@ struct rte_cryptodev_sym_capability_idx {
};
/**
- * Provide capabilities available for defined device and algorithm
+ * Structure used to describe asymmetric crypto xforms
+ * Each xform maps to one asym algorithm.
+ *
+ */
+struct rte_cryptodev_asym_capability_idx {
+ enum rte_crypto_asym_xform_type type;
+ /**< Asymmetric xform (algo) type */
+};
+
+/**
+ * Provide capabilities available for defined device and algorithm
*
* @param dev_id The identifier of the device.
* @param idx Description of crypto algorithms.
@@ -215,6 +221,20 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id,
const struct rte_cryptodev_sym_capability_idx *idx);
/**
+ * Provide capabilities available for defined device and xform
+ *
+ * @param dev_id The identifier of the device.
+ * @param idx Description of asym crypto xform.
+ *
+ * @return
+ * - Return description of the asymmetric crypto capability if exist.
+ * - Return NULL if the capability not exist.
+ */
+const struct rte_cryptodev_asymmetric_xform_capability * __rte_experimental
+rte_cryptodev_asym_capability_get(uint8_t dev_id,
+ const struct rte_cryptodev_asym_capability_idx *idx);
+
+/**
* Check if key size and initial vector are supported
* in crypto cipher capability
*
@@ -270,6 +290,36 @@ rte_cryptodev_sym_capability_check_aead(
uint16_t iv_size);
/**
+ * Check if op type is supported
+ *
+ * @param capability Description of the asymmetric crypto capability.
+ * @param op_type op type
+ *
+ * @return
+ * - Return 1 if the op type is supported
+ * - Return 0 if unsupported
+ */
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_optype(
+ const struct rte_cryptodev_asymmetric_xform_capability *capability,
+ enum rte_crypto_asym_op_type op_type);
+
+/**
+ * Check if modulus length is in supported range
+ *
+ * @param capability Description of the asymmetric crypto capability.
+ * @param modlen modulus length.
+ *
+ * @return
+ * - Return 0 if the parameters are in range of the capability.
+ * - Return -1 if the parameters are out of range of the capability.
+ */
+int __rte_experimental
+rte_cryptodev_asym_xform_capability_check_modlen(
+ const struct rte_cryptodev_asymmetric_xform_capability *capability,
+ uint16_t modlen);
+
+/**
* Provide the cipher algorithm enum, given an algorithm string
*
* @param algo_enum A pointer to the cipher algorithm
@@ -314,6 +364,22 @@ int
rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
const char *algo_string);
+/**
+ * Provide the Asymmetric xform enum, given an xform string
+ *
+ * @param xform_enum A pointer to the xform type
+ * enum to be filled
+ * @param xform_string xform string
+ *
+ * @return
+ * - Return -1 if string is not valid
+ * - Return 0 if the string is valid
+ */
+int __rte_experimental
+rte_cryptodev_asym_get_xform_enum(enum rte_crypto_asym_xform_type *xform_enum,
+ const char *xform_string);
+
+
/** Macro used at end of crypto PMD list */
#define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \
{ RTE_CRYPTO_OP_TYPE_UNDEFINED }
@@ -327,31 +393,50 @@ rte_cryptodev_get_aead_algo_enum(enum rte_crypto_aead_algorithm *algo_enum,
*
* Keep these flags synchronised with rte_cryptodev_get_feature_name()
*/
-#define RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO (1ULL << 0)
+#define RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO (1ULL << 0)
/**< Symmetric crypto operations are supported */
-#define RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO (1ULL << 1)
+#define RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO (1ULL << 1)
/**< Asymmetric crypto operations are supported */
-#define RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING (1ULL << 2)
+#define RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING (1ULL << 2)
/**< Chaining symmetric crypto operations are supported */
-#define RTE_CRYPTODEV_FF_CPU_SSE (1ULL << 3)
+#define RTE_CRYPTODEV_FF_CPU_SSE (1ULL << 3)
/**< Utilises CPU SIMD SSE instructions */
-#define RTE_CRYPTODEV_FF_CPU_AVX (1ULL << 4)
+#define RTE_CRYPTODEV_FF_CPU_AVX (1ULL << 4)
/**< Utilises CPU SIMD AVX instructions */
-#define RTE_CRYPTODEV_FF_CPU_AVX2 (1ULL << 5)
+#define RTE_CRYPTODEV_FF_CPU_AVX2 (1ULL << 5)
/**< Utilises CPU SIMD AVX2 instructions */
-#define RTE_CRYPTODEV_FF_CPU_AESNI (1ULL << 6)
+#define RTE_CRYPTODEV_FF_CPU_AESNI (1ULL << 6)
/**< Utilises CPU AES-NI instructions */
-#define RTE_CRYPTODEV_FF_HW_ACCELERATED (1ULL << 7)
-/**< Operations are off-loaded to an external hardware accelerator */
-#define RTE_CRYPTODEV_FF_CPU_AVX512 (1ULL << 8)
+#define RTE_CRYPTODEV_FF_HW_ACCELERATED (1ULL << 7)
+/**< Operations are off-loaded to an
+ * external hardware accelerator
+ */
+#define RTE_CRYPTODEV_FF_CPU_AVX512 (1ULL << 8)
/**< Utilises CPU SIMD AVX512 instructions */
-#define RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER (1ULL << 9)
-/**< Scatter-gather mbufs are supported */
-#define RTE_CRYPTODEV_FF_CPU_NEON (1ULL << 10)
+#define RTE_CRYPTODEV_FF_IN_PLACE_SGL (1ULL << 9)
+/**< In-place Scatter-gather (SGL) buffers, with multiple segments,
+ * are supported
+ */
+#define RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT (1ULL << 10)
+/**< Out-of-place Scatter-gather (SGL) buffers are
+ * supported in input and output
+ */
+#define RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT (1ULL << 11)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in input, combined with linear buffers (LB), with a
+ * single segment in output
+ */
+#define RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT (1ULL << 12)
+/**< Out-of-place Scatter-gather (SGL) buffers are supported
+ * in output, combined with linear buffers (LB) in input
+ */
+#define RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT (1ULL << 13)
+/**< Out-of-place linear buffers (LB) are supported in input and output */
+#define RTE_CRYPTODEV_FF_CPU_NEON (1ULL << 14)
/**< Utilises CPU NEON instructions */
-#define RTE_CRYPTODEV_FF_CPU_ARM_CE (1ULL << 11)
+#define RTE_CRYPTODEV_FF_CPU_ARM_CE (1ULL << 15)
/**< Utilises ARM CPU Cryptographic Extensions */
-#define RTE_CRYPTODEV_FF_SECURITY (1ULL << 12)
+#define RTE_CRYPTODEV_FF_SECURITY (1ULL << 16)
/**< Support Security Protocol Processing */
@@ -369,9 +454,9 @@ rte_cryptodev_get_feature_name(uint64_t flag);
/** Crypto device information */
struct rte_cryptodev_info {
- const char *driver_name; /**< Driver name. */
- uint8_t driver_id; /**< Driver identifier */
- struct rte_pci_device *pci_dev; /**< PCI information. */
+ const char *driver_name; /**< Driver name. */
+ uint8_t driver_id; /**< Driver identifier */
+ struct rte_device *device; /**< Generic device information. */
uint64_t feature_flags;
/**< Feature flags exposes HW/SW features for the given device */
@@ -382,12 +467,17 @@ struct rte_cryptodev_info {
unsigned max_nb_queue_pairs;
/**< Maximum number of queues pairs supported by device. */
+ uint16_t min_mbuf_headroom_req;
+ /**< Minimum mbuf headroom required by device */
+
+ uint16_t min_mbuf_tailroom_req;
+ /**< Minimum mbuf tailroom required by device */
+
struct {
unsigned max_nb_sessions;
- /**< Maximum number of sessions supported by device. */
- unsigned int max_nb_sessions_per_qp;
- /**< Maximum number of sessions per queue pair.
- * Default 0 for infinite sessions
+ /**< Maximum number of sessions supported by device.
+ * If 0, the device does not have any limitation in
+ * number of sessions that can be used.
*/
} sym;
};
@@ -603,43 +693,6 @@ rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id,
struct rte_mempool *session_pool);
/**
- * @deprecated
- * Start a specified queue pair of a device. It is used
- * when deferred_start flag of the specified queue is true.
- *
- * @param dev_id The identifier of the device
- * @param queue_pair_id The index of the queue pair to start. The value
- * must be in the range [0, nb_queue_pair - 1]
- * previously supplied to
- * rte_crypto_dev_configure().
- * @return
- * - 0: Success, the transmit queue is correctly set up.
- * - -EINVAL: The dev_id or the queue_id out of range.
- * - -ENOTSUP: The function not supported in PMD driver.
- */
-__rte_deprecated
-extern int
-rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id);
-
-/**
- * @deprecated
- * Stop specified queue pair of a device
- *
- * @param dev_id The identifier of the device
- * @param queue_pair_id The index of the queue pair to stop. The value
- * must be in the range [0, nb_queue_pair - 1]
- * previously supplied to
- * rte_cryptodev_configure().
- * @return
- * - 0: Success, the transmit queue is correctly set up.
- * - -EINVAL: The dev_id or the queue_id out of range.
- * - -ENOTSUP: The function not supported in PMD driver.
- */
-__rte_deprecated
-extern int
-rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id);
-
-/**
* Get the number of queue pairs on a specific crypto device
*
* @param dev_id Crypto device identifier.
@@ -902,9 +955,14 @@ rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
*/
struct rte_cryptodev_sym_session {
__extension__ void *sess_private_data[0];
- /**< Private session material */
+ /**< Private symmetric session material */
};
+/** Cryptodev asymmetric crypto session */
+struct rte_cryptodev_asym_session {
+ __extension__ void *sess_private_data[0];
+ /**< Private asymmetric session material */
+};
/**
* Create symmetric crypto session header (generic with no private data)
@@ -919,6 +977,18 @@ struct rte_cryptodev_sym_session *
rte_cryptodev_sym_session_create(struct rte_mempool *mempool);
/**
+ * Create asymmetric crypto session header (generic with no private data)
+ *
+ * @param mempool mempool to allocate asymmetric session
+ * objects from
+ * @return
+ * - On success return pointer to asym-session
+ * - On failure returns NULL
+ */
+struct rte_cryptodev_asym_session * __rte_experimental
+rte_cryptodev_asym_session_create(struct rte_mempool *mempool);
+
+/**
* Frees symmetric crypto session header, after checking that all
* the device private data has been freed, returning it
* to its original mempool.
@@ -934,6 +1004,21 @@ int
rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess);
/**
+ * Frees asymmetric crypto session header, after checking that all
+ * the device private data has been freed, returning it
+ * to its original mempool.
+ *
+ * @param sess Session header to be freed.
+ *
+ * @return
+ * - 0 if successful.
+ * - -EINVAL if session is NULL.
+ * - -EBUSY if not all device private data has been freed.
+ */
+int __rte_experimental
+rte_cryptodev_asym_session_free(struct rte_cryptodev_asym_session *sess);
+
+/**
* Fill out private data for the device id, based on its device type.
*
* @param dev_id ID of device that we want the session to be used on
@@ -945,7 +1030,8 @@ rte_cryptodev_sym_session_free(struct rte_cryptodev_sym_session *sess);
* @return
* - On success, zero.
* - -EINVAL if input parameters are invalid.
- * - -ENOTSUP if crypto device does not support the crypto transform.
+ * - -ENOTSUP if crypto device does not support the crypto transform or
+ * does not support symmetric operations.
* - -ENOMEM if the private session could not be allocated.
*/
int
@@ -955,6 +1041,27 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
struct rte_mempool *mempool);
/**
+ * Initialize asymmetric session on a device with specific asymmetric xform
+ *
+ * @param dev_id ID of device that we want the session to be used on
+ * @param sess Session to be set up on a device
+ * @param xforms Asymmetric crypto transform operations to apply on flow
+ * processed with this session
+ * @param mempool Mempool to be used for internal allocation.
+ *
+ * @return
+ * - On success, zero.
+ * - -EINVAL if input parameters are invalid.
+ * - -ENOTSUP if crypto device does not support the crypto transform.
+ * - -ENOMEM if the private session could not be allocated.
+ */
+int __rte_experimental
+rte_cryptodev_asym_session_init(uint8_t dev_id,
+ struct rte_cryptodev_asym_session *sess,
+ struct rte_crypto_asym_xform *xforms,
+ struct rte_mempool *mempool);
+
+/**
* Frees private data for the device id, based on its device type,
* returning it to its mempool. It is the application's responsibility
* to ensure that private session data is not cleared while there are
@@ -966,44 +1073,43 @@ rte_cryptodev_sym_session_init(uint8_t dev_id,
* @return
* - 0 if successful.
* - -EINVAL if device is invalid or session is NULL.
+ * - -ENOTSUP if crypto device does not support symmetric operations.
*/
int
rte_cryptodev_sym_session_clear(uint8_t dev_id,
struct rte_cryptodev_sym_session *sess);
/**
- * @deprecated
- * Get the size of the header session, for all registered drivers.
+ * Frees resources held by asymmetric session during rte_cryptodev_session_init
*
+ * @param dev_id ID of device that uses the asymmetric session.
+ * @param sess Asymmetric session setup on device using
+ * rte_cryptodev_session_init
* @return
- * Size of the header session.
+ * - 0 if successful.
+ * - -EINVAL if device is invalid or session is NULL.
*/
-__rte_deprecated
-unsigned int
-rte_cryptodev_get_header_session_size(void);
+int __rte_experimental
+rte_cryptodev_asym_session_clear(uint8_t dev_id,
+ struct rte_cryptodev_asym_session *sess);
/**
- * @deprecated
- * Get the size of the private session data for a device.
- *
- * @param dev_id The device identifier.
+ * Get the size of the header session, for all registered drivers.
*
* @return
- * - Size of the private data, if successful
- * - 0 if device is invalid or does not have private session
+ * Size of the symmetric eader session.
*/
-__rte_deprecated
unsigned int
-rte_cryptodev_get_private_session_size(uint8_t dev_id);
+rte_cryptodev_sym_get_header_session_size(void);
/**
- * Get the size of the header session, for all registered drivers.
+ * Get the size of the asymmetric session header, for all registered drivers.
*
* @return
- * Size of the symmetric eader session.
+ * Size of the asymmetric header session.
*/
-unsigned int
-rte_cryptodev_sym_get_header_session_size(void);
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_header_session_size(void);
/**
* Get the size of the private symmetric session data
@@ -1020,40 +1126,17 @@ unsigned int
rte_cryptodev_sym_get_private_session_size(uint8_t dev_id);
/**
- * @deprecated
- * Attach queue pair with sym session.
- *
- * @param dev_id Device to which the session will be attached.
- * @param qp_id Queue pair to which the session will be attached.
- * @param session Session pointer previously allocated by
- * *rte_cryptodev_sym_session_create*.
- *
- * @return
- * - On success, zero.
- * - On failure, a negative value.
- */
-__rte_deprecated
-int
-rte_cryptodev_queue_pair_attach_sym_session(uint8_t dev_id, uint16_t qp_id,
- struct rte_cryptodev_sym_session *session);
-
-/**
- * @deprecated
- * Detach queue pair with sym session.
+ * Get the size of the private data for asymmetric session
+ * on device
*
- * @param dev_id Device to which the session is attached.
- * @param qp_id Queue pair to which the session is attached.
- * @param session Session pointer previously allocated by
- * *rte_cryptodev_sym_session_create*.
+ * @param dev_id The device identifier.
*
* @return
- * - On success, zero.
- * - On failure, a negative value.
+ * - Size of the asymmetric private data, if successful
+ * - 0 if device is invalid or does not have private session
*/
-__rte_deprecated
-int
-rte_cryptodev_queue_pair_detach_sym_session(uint8_t dev_id, uint16_t qp_id,
- struct rte_cryptodev_sym_session *session);
+unsigned int __rte_experimental
+rte_cryptodev_asym_get_private_session_size(uint8_t dev_id);
/**
* Provide driver identifier.
@@ -1076,35 +1159,35 @@ int rte_cryptodev_driver_id_get(const char *name);
const char *rte_cryptodev_driver_name_get(uint8_t driver_id);
/**
- * Set private data for a session.
+ * Store user data in a session.
*
* @param sess Session pointer allocated by
* *rte_cryptodev_sym_session_create*.
- * @param data Pointer to the private data.
- * @param size Size of the private data.
+ * @param data Pointer to the user data.
+ * @param size Size of the user data.
*
* @return
* - On success, zero.
* - On failure, a negative value.
*/
int __rte_experimental
-rte_cryptodev_sym_session_set_private_data(
+rte_cryptodev_sym_session_set_user_data(
struct rte_cryptodev_sym_session *sess,
void *data,
uint16_t size);
/**
- * Get private data of a session.
+ * Get user data stored in a session.
*
* @param sess Session pointer allocated by
* *rte_cryptodev_sym_session_create*.
*
* @return
- * - On success return pointer to private data.
+ * - On success return pointer to user data.
* - On failure returns NULL.
*/
void * __rte_experimental
-rte_cryptodev_sym_session_get_private_data(
+rte_cryptodev_sym_session_get_user_data(
struct rte_cryptodev_sym_session *sess);
#ifdef __cplusplus
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.c b/lib/librte_cryptodev/rte_cryptodev_pmd.c
index f2aac24b..2088ac3f 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.c
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.c
@@ -66,13 +66,6 @@ rte_cryptodev_pmd_parse_input_args(
goto free_kvlist;
ret = rte_kvargs_process(kvlist,
- RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG,
- &rte_cryptodev_pmd_parse_uint_arg,
- &params->max_nb_sessions);
- if (ret < 0)
- goto free_kvlist;
-
- ret = rte_kvargs_process(kvlist,
RTE_CRYPTODEV_PMD_SOCKET_ID_ARG,
&rte_cryptodev_pmd_parse_uint_arg,
&params->socket_id);
@@ -109,10 +102,9 @@ rte_cryptodev_pmd_create(const char *name,
device->driver->name, name);
CDEV_LOG_INFO("[%s] - Initialisation parameters - name: %s,"
- "socket id: %d, max queue pairs: %u, max sessions: %u",
+ "socket id: %d, max queue pairs: %u",
device->driver->name, name,
- params->socket_id, params->max_nb_queue_pairs,
- params->max_nb_sessions);
+ params->socket_id, params->max_nb_queue_pairs);
/* allocate device structure */
cryptodev = rte_cryptodev_pmd_allocate(name, params->socket_id);
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index 69d77693..6ff49d64 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -1,32 +1,5 @@
-/*-
- *
- * Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015-2016 Intel Corporation.
*/
#ifndef _RTE_CRYPTODEV_PMD_H_
@@ -59,18 +32,15 @@ extern "C" {
#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS 8
-#define RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_SESSIONS 2048
#define RTE_CRYPTODEV_PMD_NAME_ARG ("name")
#define RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG ("max_nb_queue_pairs")
-#define RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG ("max_nb_sessions")
#define RTE_CRYPTODEV_PMD_SOCKET_ID_ARG ("socket_id")
static const char * const cryptodev_pmd_valid_params[] = {
RTE_CRYPTODEV_PMD_NAME_ARG,
RTE_CRYPTODEV_PMD_MAX_NB_QP_ARG,
- RTE_CRYPTODEV_PMD_MAX_NB_SESS_ARG,
RTE_CRYPTODEV_PMD_SOCKET_ID_ARG
};
@@ -83,7 +53,6 @@ struct rte_cryptodev_pmd_init_params {
size_t private_data_size;
int socket_id;
unsigned int max_nb_queue_pairs;
- unsigned int max_nb_sessions;
};
/** Global structure used for maintaining state of allocated crypto devices */
@@ -216,28 +185,6 @@ typedef void (*cryptodev_info_get_t)(struct rte_cryptodev *dev,
struct rte_cryptodev_info *dev_info);
/**
- * Start queue pair of a device.
- *
- * @param dev Crypto device pointer
- * @param qp_id Queue Pair Index
- *
- * @return Returns 0 on success.
- */
-typedef int (*cryptodev_queue_pair_start_t)(struct rte_cryptodev *dev,
- uint16_t qp_id);
-
-/**
- * Stop queue pair of a device.
- *
- * @param dev Crypto device pointer
- * @param qp_id Queue Pair Index
- *
- * @return Returns 0 on success.
- */
-typedef int (*cryptodev_queue_pair_stop_t)(struct rte_cryptodev *dev,
- uint16_t qp_id);
-
-/**
* Setup a queue pair for a device.
*
* @param dev Crypto device pointer
@@ -302,6 +249,17 @@ typedef int (*cryptodev_sym_create_session_pool_t)(
*/
typedef unsigned (*cryptodev_sym_get_session_private_size_t)(
struct rte_cryptodev *dev);
+/**
+ * Get the size of a asymmetric cryptodev session
+ *
+ * @param dev Crypto device pointer
+ *
+ * @return
+ * - On success returns the size of the session structure for device
+ * - On failure returns 0
+ */
+typedef unsigned int (*cryptodev_asym_get_session_private_size_t)(
+ struct rte_cryptodev *dev);
/**
* Configure a Crypto session on a device.
@@ -321,7 +279,24 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
struct rte_crypto_sym_xform *xform,
struct rte_cryptodev_sym_session *session,
struct rte_mempool *mp);
-
+/**
+ * Configure a Crypto asymmetric session on a device.
+ *
+ * @param dev Crypto device pointer
+ * @param xform Single or chain of crypto xforms
+ * @param priv_sess Pointer to cryptodev's private session structure
+ * @param mp Mempool where the private session is allocated
+ *
+ * @return
+ * - Returns 0 if private session structure have been created successfully.
+ * - Returns -EINVAL if input parameters are invalid.
+ * - Returns -ENOTSUP if crypto device does not support the crypto transform.
+ * - Returns -ENOMEM if the private session could not be allocated.
+ */
+typedef int (*cryptodev_asym_configure_session_t)(struct rte_cryptodev *dev,
+ struct rte_crypto_asym_xform *xform,
+ struct rte_cryptodev_asym_session *session,
+ struct rte_mempool *mp);
/**
* Free driver private session data.
*
@@ -330,32 +305,14 @@ typedef int (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev,
*/
typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev,
struct rte_cryptodev_sym_session *sess);
-
-/**
- * Optional API for drivers to attach sessions with queue pair.
- * @param dev Crypto device pointer
- * @param qp_id queue pair id for attaching session
- * @param priv_sess Pointer to cryptodev's private session structure
- * @return
- * - Return 0 on success
- */
-typedef int (*cryptodev_sym_queue_pair_attach_session_t)(
- struct rte_cryptodev *dev,
- uint16_t qp_id,
- void *session_private);
-
/**
- * Optional API for drivers to detach sessions from queue pair.
+ * Free asymmetric session private data.
+ *
* @param dev Crypto device pointer
- * @param qp_id queue pair id for detaching session
- * @param priv_sess Pointer to cryptodev's private session structure
- * @return
- * - Return 0 on success
+ * @param sess Cryptodev session structure
*/
-typedef int (*cryptodev_sym_queue_pair_detach_session_t)(
- struct rte_cryptodev *dev,
- uint16_t qp_id,
- void *session_private);
+typedef void (*cryptodev_asym_free_session_t)(struct rte_cryptodev *dev,
+ struct rte_cryptodev_asym_session *sess);
/** Crypto device operations function pointer table */
struct rte_cryptodev_ops {
@@ -375,23 +332,21 @@ struct rte_cryptodev_ops {
/**< Set up a device queue pair. */
cryptodev_queue_pair_release_t queue_pair_release;
/**< Release a queue pair. */
- cryptodev_queue_pair_start_t queue_pair_start;
- /**< Start a queue pair. */
- cryptodev_queue_pair_stop_t queue_pair_stop;
- /**< Stop a queue pair. */
cryptodev_queue_pair_count_t queue_pair_count;
/**< Get count of the queue pairs. */
- cryptodev_sym_get_session_private_size_t session_get_size;
+ cryptodev_sym_get_session_private_size_t sym_session_get_size;
/**< Return private session. */
- cryptodev_sym_configure_session_t session_configure;
+ cryptodev_asym_get_session_private_size_t asym_session_get_size;
+ /**< Return asym session private size. */
+ cryptodev_sym_configure_session_t sym_session_configure;
/**< Configure a Crypto session. */
- cryptodev_sym_free_session_t session_clear;
+ cryptodev_asym_configure_session_t asym_session_configure;
+ /**< Configure asymmetric Crypto session. */
+ cryptodev_sym_free_session_t sym_session_clear;
+ /**< Clear a Crypto sessions private data. */
+ cryptodev_asym_free_session_t asym_session_clear;
/**< Clear a Crypto sessions private data. */
- cryptodev_sym_queue_pair_attach_session_t qp_attach_session;
- /**< Attach session to queue pair. */
- cryptodev_sym_queue_pair_detach_session_t qp_detach_session;
- /**< Detach session from queue pair. */
};
@@ -516,20 +471,32 @@ uint8_t rte_cryptodev_allocate_driver(struct cryptodev_driver *crypto_drv,
#define RTE_PMD_REGISTER_CRYPTO_DRIVER(crypto_drv, drv, driver_id)\
-RTE_INIT(init_ ##driver_id);\
-static void init_ ##driver_id(void)\
+RTE_INIT(init_ ##driver_id)\
{\
driver_id = rte_cryptodev_allocate_driver(&crypto_drv, &(drv));\
}
static inline void *
-get_session_private_data(const struct rte_cryptodev_sym_session *sess,
+get_sym_session_private_data(const struct rte_cryptodev_sym_session *sess,
+ uint8_t driver_id) {
+ return sess->sess_private_data[driver_id];
+}
+
+static inline void
+set_sym_session_private_data(struct rte_cryptodev_sym_session *sess,
+ uint8_t driver_id, void *private_data)
+{
+ sess->sess_private_data[driver_id] = private_data;
+}
+
+static inline void *
+get_asym_session_private_data(const struct rte_cryptodev_asym_session *sess,
uint8_t driver_id) {
return sess->sess_private_data[driver_id];
}
static inline void
-set_session_private_data(struct rte_cryptodev_sym_session *sess,
+set_asym_session_private_data(struct rte_cryptodev_asym_session *sess,
uint8_t driver_id, void *private_data)
{
sess->sess_private_data[driver_id] = private_data;
diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map
index be8f4c1a..7ca00735 100644
--- a/lib/librte_cryptodev/rte_cryptodev_version.map
+++ b/lib/librte_cryptodev/rte_cryptodev_version.map
@@ -22,8 +22,6 @@ DPDK_16.04 {
rte_cryptodev_stop;
rte_cryptodev_queue_pair_count;
rte_cryptodev_queue_pair_setup;
- rte_cryptodev_queue_pair_start;
- rte_cryptodev_queue_pair_stop;
rte_crypto_op_pool_create;
local: *;
@@ -52,8 +50,6 @@ DPDK_17.05 {
rte_cryptodev_get_auth_algo_enum;
rte_cryptodev_get_cipher_algo_enum;
- rte_cryptodev_queue_pair_attach_sym_session;
- rte_cryptodev_queue_pair_detach_sym_session;
} DPDK_17.02;
@@ -65,8 +61,6 @@ DPDK_17.08 {
rte_cryptodev_driver_id_get;
rte_cryptodev_driver_name_get;
rte_cryptodev_get_aead_algo_enum;
- rte_cryptodev_get_header_session_size;
- rte_cryptodev_get_private_session_size;
rte_cryptodev_sym_capability_check_aead;
rte_cryptodev_sym_session_init;
rte_cryptodev_sym_session_clear;
@@ -97,6 +91,18 @@ DPDK_18.05 {
EXPERIMENTAL {
global:
- rte_cryptodev_sym_session_get_private_data;
- rte_cryptodev_sym_session_set_private_data;
+ rte_cryptodev_asym_capability_get;
+ rte_cryptodev_asym_get_header_session_size;
+ rte_cryptodev_asym_get_private_session_size;
+ rte_cryptodev_asym_get_xform_enum;
+ rte_cryptodev_asym_session_clear;
+ rte_cryptodev_asym_session_create;
+ rte_cryptodev_asym_session_free;
+ rte_cryptodev_asym_session_init;
+ rte_cryptodev_asym_xform_capability_check_modlen;
+ rte_cryptodev_asym_xform_capability_check_optype;
+ rte_cryptodev_sym_session_get_user_data;
+ rte_cryptodev_sym_session_set_user_data;
+ rte_crypto_asym_op_strings;
+ rte_crypto_asym_xform_strings;
};
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 3fd33f1e..d27da3d1 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -18,10 +18,11 @@ CFLAGS += $(WERROR_FLAGS) -O3
LDLIBS += -lexecinfo
LDLIBS += -lpthread
LDLIBS += -lgcc_s
+LDLIBS += -lrte_kvargs
EXPORT_MAP := ../../rte_eal_version.map
-LIBABIVER := 7
+LIBABIVER := 8
# specific to bsdapp exec-env
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
@@ -52,12 +53,14 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hypervisor.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_class.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_bus.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_uuid.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index dc279542..d7ae9d68 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -147,19 +147,9 @@ eal_get_runtime_dir(void)
}
/* Return user provided mbuf pool ops name */
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void)
-{
- return internal_config.user_mbuf_pool_ops_name;
-}
-
-/* Return mbuf pool ops name */
const char *
-rte_eal_mbuf_default_mempool_ops(void)
+rte_eal_mbuf_user_pool_ops(void)
{
- if (internal_config.user_mbuf_pool_ops_name == NULL)
- return RTE_MBUF_DEFAULT_MEMPOOL_OPS;
-
return internal_config.user_mbuf_pool_ops_name;
}
@@ -286,12 +276,17 @@ eal_proc_type_detect(void)
enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
const char *pathname = eal_runtime_config_path();
- /* if we can open the file but not get a write-lock we are a secondary
- * process. NOTE: if we get a file handle back, we keep that open
- * and don't close it to prevent a race condition between multiple opens */
- if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
- (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
- ptype = RTE_PROC_SECONDARY;
+ /* if there no shared config, there can be no secondary processes */
+ if (!internal_config.no_shconf) {
+ /* if we can open the file but not get a write-lock we are a
+ * secondary process. NOTE: if we get a file handle back, we
+ * keep that open and don't close it to prevent a race condition
+ * between multiple opens.
+ */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+ }
RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
@@ -468,6 +463,14 @@ eal_parse_args(int argc, char **argv)
}
}
+ /* create runtime data directory */
+ if (internal_config.no_shconf == 0 &&
+ eal_create_runtime_dir() < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+ ret = -1;
+ goto out;
+ }
+
if (eal_adjust_config(&internal_config) != 0) {
ret = -1;
goto out;
@@ -600,13 +603,6 @@ rte_eal_init(int argc, char **argv)
return -1;
}
- /* create runtime data directory */
- if (eal_create_runtime_dir() < 0) {
- rte_eal_init_alert("Cannot create runtime directory\n");
- rte_errno = EACCES;
- return -1;
- }
-
/* FreeBSD always uses legacy memory model */
internal_config.legacy_mem = true;
@@ -625,6 +621,11 @@ rte_eal_init(int argc, char **argv)
rte_config_init();
+ if (rte_eal_intr_init() < 0) {
+ rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+ return -1;
+ }
+
/* Put mp channel init before bus scan so that we can init the vdev
* bus through mp channel in the secondary process before the bus scan.
*/
@@ -713,11 +714,6 @@ rte_eal_init(int argc, char **argv)
return -1;
}
- if (rte_eal_intr_init() < 0) {
- rte_eal_init_alert("Cannot init interrupt-handling thread\n");
- return -1;
- }
-
if (rte_eal_timer_init() < 0) {
rte_eal_init_alert("Cannot init HPET or TSC timers\n");
rte_errno = ENOTSUP;
@@ -866,21 +862,21 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd)
return 0;
}
-int __rte_experimental
+int
rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
__rte_unused uint64_t len)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
__rte_unused uint64_t len)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
__rte_unused const char *dev_addr,
__rte_unused int *iommu_group_num)
@@ -888,45 +884,45 @@ rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
return -1;
}
-int __rte_experimental
+int
rte_vfio_get_container_fd(void)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_create(void)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_destroy(__rte_unused int container_fd)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_group_bind(__rte_unused int container_fd,
__rte_unused int iommu_group_num)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_group_unbind(__rte_unused int container_fd,
__rte_unused int iommu_group_num)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_dma_map(__rte_unused int container_fd,
__rte_unused uint64_t vaddr,
__rte_unused uint64_t iova,
@@ -935,7 +931,7 @@ rte_vfio_container_dma_map(__rte_unused int container_fd,
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_dma_unmap(__rte_unused int container_fd,
__rte_unused uint64_t vaddr,
__rte_unused uint64_t iova,
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c
index eb3913c9..51ea4b8c 100644
--- a/lib/librte_eal/bsdapp/eal/eal_alarm.c
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c
@@ -1,31 +1,314 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
+#include <time.h>
#include <errno.h>
#include <rte_alarm.h>
+#include <rte_cycles.h>
#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_interrupts.h>
+#include <rte_spinlock.h>
+
#include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define NS_PER_US 1000
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+ LIST_ENTRY(alarm_entry) next;
+ struct rte_intr_handle handle;
+ struct timespec time;
+ rte_eal_alarm_callback cb_fn;
+ void *cb_arg;
+ volatile uint8_t executing;
+ volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static void eal_alarm_callback(void *arg);
int
rte_eal_alarm_init(void)
{
+ intr_handle.type = RTE_INTR_HANDLE_ALARM;
+
+ /* on FreeBSD, timers don't use fd's, and their identifiers are stored
+ * in separate namespace from fd's, so using any value is OK. however,
+ * EAL interrupts handler expects fd's to be unique, so use an actual fd
+ * to guarantee unique timer identifier.
+ */
+ intr_handle.fd = open("/dev/zero", O_RDONLY);
+
+ return 0;
+}
+
+static inline int
+timespec_cmp(const struct timespec *now, const struct timespec *at)
+{
+ if (now->tv_sec < at->tv_sec)
+ return -1;
+ if (now->tv_sec > at->tv_sec)
+ return 1;
+ if (now->tv_nsec < at->tv_nsec)
+ return -1;
+ if (now->tv_nsec > at->tv_nsec)
+ return 1;
return 0;
}
+static inline uint64_t
+diff_ns(struct timespec *now, struct timespec *at)
+{
+ uint64_t now_ns, at_ns;
+
+ if (timespec_cmp(now, at) >= 0)
+ return 0;
+
+ now_ns = now->tv_sec * NS_PER_S + now->tv_nsec;
+ at_ns = at->tv_sec * NS_PER_S + at->tv_nsec;
+
+ return at_ns - now_ns;
+}
int
-rte_eal_alarm_set(uint64_t us __rte_unused,
- rte_eal_alarm_callback cb_fn __rte_unused,
- void *cb_arg __rte_unused)
+eal_alarm_get_timeout_ns(uint64_t *val)
{
- return -ENOTSUP;
+ struct alarm_entry *ap;
+ struct timespec now;
+
+ if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+ return -1;
+
+ if (LIST_EMPTY(&alarm_list))
+ return -1;
+
+ ap = LIST_FIRST(&alarm_list);
+
+ *val = diff_ns(&now, &ap->time);
+
+ return 0;
+}
+
+static int
+unregister_current_callback(void)
+{
+ struct alarm_entry *ap;
+ int ret = 0;
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ ap = LIST_FIRST(&alarm_list);
+
+ do {
+ ret = rte_intr_callback_unregister(&intr_handle,
+ eal_alarm_callback, &ap->time);
+ } while (ret == -EAGAIN);
+ }
+
+ return ret;
}
+static int
+register_first_callback(void)
+{
+ struct alarm_entry *ap;
+ int ret = 0;
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ ap = LIST_FIRST(&alarm_list);
+
+ /* register a new callback */
+ ret = rte_intr_callback_register(&intr_handle,
+ eal_alarm_callback, &ap->time);
+ }
+ return ret;
+}
+
+static void
+eal_alarm_callback(void *arg __rte_unused)
+{
+ struct timespec now;
+ struct alarm_entry *ap;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ ap = LIST_FIRST(&alarm_list);
+
+ if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+ return;
+
+ while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) {
+ ap->executing = 1;
+ ap->executing_id = pthread_self();
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ ap->cb_fn(ap->cb_arg);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ LIST_REMOVE(ap, next);
+ free(ap);
+
+ ap = LIST_FIRST(&alarm_list);
+ }
+
+ /* timer has been deleted from the kqueue, so recreate it if needed */
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+}
+
+
int
-rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused,
- void *cb_arg __rte_unused)
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
{
- return -ENOTSUP;
+ struct alarm_entry *ap, *new_alarm;
+ struct timespec now;
+ uint64_t ns;
+ int ret = 0;
+
+ /* check parameters, also ensure us won't cause a uint64_t overflow */
+ if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+ return -EINVAL;
+
+ new_alarm = calloc(1, sizeof(*new_alarm));
+ if (new_alarm == NULL)
+ return -ENOMEM;
+
+ /* use current time to calculate absolute time of alarm */
+ clock_gettime(CLOCK_TYPE_ID, &now);
+
+ ns = us * NS_PER_US;
+
+ new_alarm->cb_fn = cb_fn;
+ new_alarm->cb_arg = cb_arg;
+ new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S;
+ new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ if (LIST_EMPTY(&alarm_list))
+ LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+ else {
+ LIST_FOREACH(ap, &alarm_list, next) {
+ if (timespec_cmp(&new_alarm->time, &ap->time) < 0) {
+ LIST_INSERT_BEFORE(ap, new_alarm, next);
+ break;
+ }
+ if (LIST_NEXT(ap, next) == NULL) {
+ LIST_INSERT_AFTER(ap, new_alarm, next);
+ break;
+ }
+ }
+ }
+
+ /* re-register first callback just in case */
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct alarm_entry *ap, *ap_prev;
+ int count = 0;
+ int err = 0;
+ int executing;
+
+ if (!cb_fn) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ do {
+ executing = 0;
+ rte_spinlock_lock(&alarm_list_lk);
+ /* remove any matches at the start of the list */
+ while (1) {
+ ap = LIST_FIRST(&alarm_list);
+ if (ap == NULL)
+ break;
+ if (cb_fn != ap->cb_fn)
+ break;
+ if (cb_arg != ap->cb_arg && cb_arg != (void *) -1)
+ break;
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ } else {
+ /* If calling from other context, mark that
+ * alarm is executing so loop can spin till it
+ * finish. Otherwise we are trying to cancel
+ * ourselves - mark it by EINPROGRESS.
+ */
+ if (pthread_equal(ap->executing_id,
+ pthread_self()) == 0)
+ executing++;
+ else
+ err = EINPROGRESS;
+
+ break;
+ }
+ }
+ ap_prev = ap;
+
+ /* now go through list, removing entries not at start */
+ LIST_FOREACH(ap, &alarm_list, next) {
+ /* this won't be true first time through */
+ if (cb_fn == ap->cb_fn &&
+ (cb_arg == (void *)-1 ||
+ cb_arg == ap->cb_arg)) {
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ ap = ap_prev;
+ } else if (pthread_equal(ap->executing_id,
+ pthread_self()) == 0) {
+ executing++;
+ } else {
+ err = EINPROGRESS;
+ }
+ }
+ ap_prev = ap;
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+ } while (executing != 0);
+
+ if (count == 0 && err == 0)
+ rte_errno = ENOENT;
+ else if (err)
+ rte_errno = err;
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ /* unregister if no alarms left, otherwise re-register first */
+ if (LIST_EMPTY(&alarm_list))
+ unregister_current_callback();
+ else
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ return count;
}
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm_private.h b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
new file mode 100644
index 00000000..65c71151
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef EAL_ALARM_PRIVATE_H
+#define EAL_ALARM_PRIVATE_H
+
+#include <inttypes.h>
+
+/*
+ * FreeBSD needs a back-channel communication mechanism between interrupt and
+ * alarm thread, because on FreeBSD, timer period is set up inside the interrupt
+ * API and not inside alarm API like on Linux.
+ */
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val);
+
+#endif // EAL_ALARM_PRIVATE_H
diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
index 836feb67..1e8f5df2 100644
--- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -101,6 +101,10 @@ eal_hugepage_info_init(void)
hpi->num_pages[0] = num_buffers;
hpi->lock_descriptor = fd;
+ /* for no shared files mode, do not create shared memory config */
+ if (internal_config.no_shconf)
+ return 0;
+
tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
sizeof(internal_config.hugepage_info));
if (tmp_hpi == NULL ) {
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
index 290d53ab..2feee2d5 100644
--- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -1,51 +1,479 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
*/
+#include <string.h>
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/queue.h>
+#include <unistd.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
#include <rte_common.h>
#include <rte_interrupts.h>
+
#include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define MAX_INTR_EVENTS 16
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+ char charbuf[16]; /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+ TAILQ_ENTRY(rte_intr_callback) next;
+ rte_intr_callback_fn cb_fn; /**< callback address */
+ void *cb_arg; /**< parameter for callback */
+};
+
+struct rte_intr_source {
+ TAILQ_ENTRY(rte_intr_source) next;
+ struct rte_intr_handle intr_handle; /**< interrupt handle */
+ struct rte_intr_cb_list callbacks; /**< user callbacks */
+ uint32_t active;
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+static volatile int kq = -1;
+
+static int
+intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke)
+{
+ /* alarm callbacks are special case */
+ if (ih->type == RTE_INTR_HANDLE_ALARM) {
+ uint64_t timeout_ns;
+
+ /* get soonest alarm timeout */
+ if (eal_alarm_get_timeout_ns(&timeout_ns) < 0)
+ return -1;
+
+ ke->filter = EVFILT_TIMER;
+ /* timers are one shot */
+ ke->flags |= EV_ONESHOT;
+ ke->fflags = NOTE_NSECONDS;
+ ke->data = timeout_ns;
+ } else {
+ ke->filter = EVFILT_READ;
+ }
+ ke->ident = ih->fd;
+
+ return 0;
+}
int
rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
- rte_intr_callback_fn cb,
- void *cb_arg)
+ rte_intr_callback_fn cb, void *cb_arg)
{
- RTE_SET_USED(intr_handle);
- RTE_SET_USED(cb);
- RTE_SET_USED(cb_arg);
+ struct rte_intr_callback *callback = NULL;
+ struct rte_intr_source *src = NULL;
+ int ret, add_event;
- return -ENOTSUP;
+ /* first do parameter checking */
+ if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Registering with invalid input parameter\n");
+ return -EINVAL;
+ }
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Kqueue is not active: %d\n", kq);
+ return -ENODEV;
+ }
+
+ /* allocate a new interrupt callback entity */
+ callback = calloc(1, sizeof(*callback));
+ if (callback == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ return -ENOMEM;
+ }
+ callback->cb_fn = cb;
+ callback->cb_arg = cb_arg;
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if there is at least one callback registered for the fd */
+ TAILQ_FOREACH(src, &intr_sources, next) {
+ if (src->intr_handle.fd == intr_handle->fd) {
+ /* we had no interrupts for this */
+ if (TAILQ_EMPTY(&src->callbacks))
+ add_event = 1;
+
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ ret = 0;
+ break;
+ }
+ }
+
+ /* no existing callbacks for this - add new source */
+ if (src == NULL) {
+ src = calloc(1, sizeof(*src));
+ if (src == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ ret = -ENOMEM;
+ goto fail;
+ } else {
+ src->intr_handle = *intr_handle;
+ TAILQ_INIT(&src->callbacks);
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ TAILQ_INSERT_TAIL(&intr_sources, src, next);
+ add_event = 1;
+ ret = 0;
+ }
+ }
+
+ /* add events to the queue. timer events are special as we need to
+ * re-set the timer.
+ */
+ if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) {
+ struct kevent ke;
+
+ memset(&ke, 0, sizeof(ke));
+ ke.flags = EV_ADD; /* mark for addition to the queue */
+
+ if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot convert interrupt handle to kevent\n");
+ ret = -ENODEV;
+ goto fail;
+ }
+
+ /**
+ * add the intr file descriptor into wait list.
+ */
+ if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+ /* currently, nic_uio does not support interrupts, so
+ * this error will always be triggered and output to the
+ * user. so, don't output it unless debug log level set.
+ */
+ if (errno == ENODEV)
+ RTE_LOG(DEBUG, EAL, "Interrupt handle %d not supported\n",
+ src->intr_handle.fd);
+ else
+ RTE_LOG(ERR, EAL, "Error adding fd %d "
+ "kevent, %s\n",
+ src->intr_handle.fd,
+ strerror(errno));
+ ret = -errno;
+ goto fail;
+ }
+ }
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
+fail:
+ /* clean up */
+ if (src != NULL) {
+ TAILQ_REMOVE(&(src->callbacks), callback, next);
+ if (TAILQ_EMPTY(&(src->callbacks))) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+ }
+ free(callback);
+ rte_spinlock_unlock(&intr_lock);
+ return ret;
}
int
rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
- rte_intr_callback_fn cb,
- void *cb_arg)
+ rte_intr_callback_fn cb_fn, void *cb_arg)
{
- RTE_SET_USED(intr_handle);
- RTE_SET_USED(cb);
- RTE_SET_USED(cb_arg);
+ int ret;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb, *next;
- return -ENOTSUP;
+ /* do parameter checking first */
+ if (intr_handle == NULL || intr_handle->fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Unregistering with invalid input parameter\n");
+ return -EINVAL;
+ }
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Kqueue is not active\n");
+ return -ENODEV;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if the insterrupt source for the fd is existent */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+
+ /* No interrupt source registered for the fd */
+ if (src == NULL) {
+ ret = -ENOENT;
+
+ /* interrupt source has some active callbacks right now. */
+ } else if (src->active != 0) {
+ ret = -EAGAIN;
+
+ /* ok to remove. */
+ } else {
+ struct kevent ke;
+
+ ret = 0;
+
+ /* remove it from the kqueue */
+ memset(&ke, 0, sizeof(ke));
+ ke.flags = EV_DELETE; /* mark for deletion from the queue */
+
+ if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot convert to kevent\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /**
+ * remove intr file descriptor from wait list.
+ */
+ if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n",
+ src->intr_handle.fd, strerror(errno));
+ /* removing non-existent even is an expected condition
+ * in some circumstances (e.g. oneshot events).
+ */
+ }
+
+ /*walk through the callbacks and remove all that match. */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+ next = TAILQ_NEXT(cb, next);
+ if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+ cb->cb_arg == cb_arg)) {
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ free(cb);
+ ret++;
+ }
+ }
+
+ /* all callbacks for that source are removed. */
+ if (TAILQ_EMPTY(&src->callbacks)) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+ }
+out:
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
}
int
-rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_enable(const struct rte_intr_handle *intr_handle)
{
- return -ENOTSUP;
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 0;
+
+ if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+ return -1;
+
+ switch (intr_handle->type) {
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ return -1;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
}
int
-rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_disable(const struct rte_intr_handle *intr_handle)
{
- return -ENOTSUP;
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 0;
+
+ if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+ return -1;
+
+ switch (intr_handle->type) {
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ return -1;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+eal_intr_process_interrupts(struct kevent *events, int nfds)
+{
+ struct rte_intr_callback active_cb;
+ union rte_intr_read_buffer buf;
+ struct rte_intr_callback *cb;
+ struct rte_intr_source *src;
+ bool call = false;
+ int n, bytes_read;
+
+ for (n = 0; n < nfds; n++) {
+ int event_fd = events[n].ident;
+
+ rte_spinlock_lock(&intr_lock);
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == event_fd)
+ break;
+ if (src == NULL) {
+ rte_spinlock_unlock(&intr_lock);
+ continue;
+ }
+
+ /* mark this interrupt source as active and release the lock. */
+ src->active = 1;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* set the length to be read dor different handle type */
+ switch (src->intr_handle.type) {
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = 0;
+ call = true;
+ break;
+ case RTE_INTR_HANDLE_VDEV:
+ case RTE_INTR_HANDLE_EXT:
+ bytes_read = 0;
+ call = true;
+ break;
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ bytes_read = 0;
+ call = true;
+ break;
+ default:
+ bytes_read = 1;
+ break;
+ }
+
+ if (bytes_read > 0) {
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ bytes_read = read(event_fd, &buf, bytes_read);
+ if (bytes_read < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK)
+ continue;
+
+ RTE_LOG(ERR, EAL, "Error reading from file "
+ "descriptor %d: %s\n",
+ event_fd,
+ strerror(errno));
+ } else if (bytes_read == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from file "
+ "descriptor %d\n", event_fd);
+ else
+ call = true;
+ }
+
+ /* grab a lock, again to call callbacks and update status. */
+ rte_spinlock_lock(&intr_lock);
+
+ if (call) {
+ /* Finally, call all callbacks. */
+ TAILQ_FOREACH(cb, &src->callbacks, next) {
+
+ /* make a copy and unlock. */
+ active_cb = *cb;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* call the actual callback */
+ active_cb.cb_fn(active_cb.cb_arg);
+
+ /*get the lock back. */
+ rte_spinlock_lock(&intr_lock);
+ }
+ }
+
+ /* we done with that interrupt source, release it. */
+ src->active = 0;
+ rte_spinlock_unlock(&intr_lock);
+ }
+}
+
+static void *
+eal_intr_thread_main(void *arg __rte_unused)
+{
+ struct kevent events[MAX_INTR_EVENTS];
+ int nfds;
+
+ /* host thread, never break out */
+ for (;;) {
+ /* do not change anything, just wait */
+ nfds = kevent(kq, NULL, 0, events, MAX_INTR_EVENTS, NULL);
+
+ /* kevent fail */
+ if (nfds < 0) {
+ if (errno == EINTR)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "kevent returns with fail\n");
+ break;
+ }
+ /* kevent timeout, will never happen here */
+ else if (nfds == 0)
+ continue;
+
+ /* kevent has at least one fd ready to read */
+ eal_intr_process_interrupts(events, nfds);
+ }
+ close(kq);
+ kq = -1;
+ return NULL;
}
int
rte_eal_intr_init(void)
{
- return 0;
+ int ret = 0;
+
+ /* init the global interrupt source head */
+ TAILQ_INIT(&intr_sources);
+
+ kq = kqueue();
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create kqueue instance\n");
+ return -1;
+ }
+
+ /* create the host thread to wait/handle the interrupt */
+ ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
+ eal_intr_thread_main, NULL);
+ if (ret != 0) {
+ rte_errno = -ret;
+ RTE_LOG(ERR, EAL,
+ "Failed to create thread for interrupt handling\n");
+ }
+
+ return ret;
}
int
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index a5e03478..16d2bc7c 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -12,6 +12,7 @@
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
#include <rte_log.h>
#include <rte_string_fns.h>
#include "eal_private.h"
@@ -104,6 +105,8 @@ rte_eal_hugepage_init(void)
/* map all hugepages and sort them */
for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
struct hugepage_info *hpi;
+ rte_iova_t prev_end = 0;
+ int prev_ms_idx = -1;
uint64_t page_sz, mem_needed;
unsigned int n_pages, max_pages;
@@ -124,10 +127,27 @@ rte_eal_hugepage_init(void)
int error;
size_t sysctl_size = sizeof(physaddr);
char physaddr_str[64];
+ bool is_adjacent;
+
+ /* first, check if this segment is IOVA-adjacent to
+ * the previous one.
+ */
+ snprintf(physaddr_str, sizeof(physaddr_str),
+ "hw.contigmem.physaddr.%d", j);
+ error = sysctlbyname(physaddr_str, &physaddr,
+ &sysctl_size, NULL, 0);
+ if (error < 0) {
+ RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
+ "from %s\n", j, hpi->hugedir);
+ return -1;
+ }
+
+ is_adjacent = prev_end != 0 && physaddr == prev_end;
+ prev_end = physaddr + hpi->hugepage_sz;
for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
msl_idx++) {
- bool empty;
+ bool empty, need_hole;
msl = &mcfg->memsegs[msl_idx];
arr = &msl->memseg_arr;
@@ -136,20 +156,23 @@ rte_eal_hugepage_init(void)
empty = arr->count == 0;
- /* we need 1, plus hole if not empty */
+ /* we need a hole if this isn't an empty memseg
+ * list, and if previous segment was not
+ * adjacent to current one.
+ */
+ need_hole = !empty && !is_adjacent;
+
+ /* we need 1, plus hole if not adjacent */
ms_idx = rte_fbarray_find_next_n_free(arr,
- 0, 1 + (empty ? 1 : 0));
+ 0, 1 + (need_hole ? 1 : 0));
/* memseg list is full? */
if (ms_idx < 0)
continue;
- /* leave some space between memsegs, they are
- * not IOVA contiguous, so they shouldn't be VA
- * contiguous either.
- */
- if (!empty)
+ if (need_hole && prev_ms_idx == ms_idx - 1)
ms_idx++;
+ prev_ms_idx = ms_idx;
break;
}
@@ -178,16 +201,6 @@ rte_eal_hugepage_init(void)
return -1;
}
- snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem"
- ".physaddr.%d", j);
- error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size,
- NULL, 0);
- if (error < 0) {
- RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
- "from %s\n", j, hpi->hugedir);
- return -1;
- }
-
seg->addr = addr;
seg->iova = physaddr;
seg->hugepage_sz = page_sz;
@@ -200,7 +213,7 @@ rte_eal_hugepage_init(void)
RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
PRIx64", len %zu\n",
- seg_idx, addr, physaddr, page_sz);
+ seg_idx++, addr, physaddr, page_sz);
total_mem += seg->len;
}
@@ -288,3 +301,217 @@ rte_eal_using_phys_addrs(void)
{
return 0;
}
+
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+ uint64_t area_sz, max_pages;
+
+ /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+ max_pages = RTE_MAX_MEMSEG_PER_LIST;
+ max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+ area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+ /* make sure the list isn't smaller than the page size */
+ area_sz = RTE_MAX(area_sz, page_sz);
+
+ return RTE_ALIGN(area_sz, page_sz);
+}
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+static int
+alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
+ int n_segs, int socket_id, int type_msl_idx)
+{
+ char name[RTE_FBARRAY_NAME_LEN];
+
+ snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
+ type_msl_idx);
+ if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
+ sizeof(struct rte_memseg))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
+ rte_strerror(rte_errno));
+ return -1;
+ }
+
+ msl->page_sz = page_sz;
+ msl->socket_id = socket_id;
+ msl->base_va = NULL;
+
+ RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
+ (size_t)page_sz >> 10, socket_id);
+
+ return 0;
+}
+
+static int
+alloc_va_space(struct rte_memseg_list *msl)
+{
+ uint64_t page_sz;
+ size_t mem_sz;
+ void *addr;
+ int flags = 0;
+
+#ifdef RTE_ARCH_PPC_64
+ flags |= MAP_HUGETLB;
+#endif
+
+ page_sz = msl->page_sz;
+ mem_sz = page_sz * msl->memseg_arr.len;
+
+ addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+ if (addr == NULL) {
+ if (rte_errno == EADDRNOTAVAIL)
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
+ (unsigned long long)mem_sz, msl->base_va);
+ else
+ RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+ return -1;
+ }
+ msl->base_va = addr;
+
+ return 0;
+}
+
+
+static int
+memseg_primary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int hpi_idx, msl_idx = 0;
+ struct rte_memseg_list *msl;
+ uint64_t max_mem, total_mem;
+
+ /* no-huge does not need this at all */
+ if (internal_config.no_hugetlbfs)
+ return 0;
+
+ /* FreeBSD has an issue where core dump will dump the entire memory
+ * contents, including anonymous zero-page memory. Therefore, while we
+ * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
+ * also be further limiting total memory amount to whatever memory is
+ * available to us through contigmem driver (plus spacing blocks).
+ *
+ * so, at each stage, we will be checking how much memory we are
+ * preallocating, and adjust all the values accordingly.
+ */
+
+ max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+ total_mem = 0;
+
+ /* create memseg lists */
+ for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+ hpi_idx++) {
+ uint64_t max_type_mem, total_type_mem = 0;
+ uint64_t avail_mem;
+ int type_msl_idx, max_segs, avail_segs, total_segs = 0;
+ struct hugepage_info *hpi;
+ uint64_t hugepage_sz;
+
+ hpi = &internal_config.hugepage_info[hpi_idx];
+ hugepage_sz = hpi->hugepage_sz;
+
+ /* no NUMA support on FreeBSD */
+
+ /* check if we've already exceeded total memory amount */
+ if (total_mem >= max_mem)
+ break;
+
+ /* first, calculate theoretical limits according to config */
+ max_type_mem = RTE_MIN(max_mem - total_mem,
+ (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
+ max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+
+ /* now, limit all of that to whatever will actually be
+ * available to us, because without dynamic allocation support,
+ * all of that extra memory will be sitting there being useless
+ * and slowing down core dumps in case of a crash.
+ *
+ * we need (N*2)-1 segments because we cannot guarantee that
+ * each segment will be IOVA-contiguous with the previous one,
+ * so we will allocate more and put spaces inbetween segments
+ * that are non-contiguous.
+ */
+ avail_segs = (hpi->num_pages[0] * 2) - 1;
+ avail_mem = avail_segs * hugepage_sz;
+
+ max_type_mem = RTE_MIN(avail_mem, max_type_mem);
+ max_segs = RTE_MIN(avail_segs, max_segs);
+
+ type_msl_idx = 0;
+ while (total_type_mem < max_type_mem &&
+ total_segs < max_segs) {
+ uint64_t cur_max_mem, cur_mem;
+ unsigned int n_segs;
+
+ if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL,
+ "No more space in memseg lists, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ return -1;
+ }
+
+ msl = &mcfg->memsegs[msl_idx++];
+
+ cur_max_mem = max_type_mem - total_type_mem;
+
+ cur_mem = get_mem_amount(hugepage_sz,
+ cur_max_mem);
+ n_segs = cur_mem / hugepage_sz;
+
+ if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+ 0, type_msl_idx))
+ return -1;
+
+ total_segs += msl->memseg_arr.len;
+ total_type_mem = total_segs * hugepage_sz;
+ type_msl_idx++;
+
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+ return -1;
+ }
+ }
+ total_mem += total_type_mem;
+ }
+ return 0;
+}
+
+static int
+memseg_secondary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx = 0;
+ struct rte_memseg_list *msl;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+ msl = &mcfg->memsegs[msl_idx];
+
+ /* skip empty memseg lists */
+ if (msl->memseg_arr.len == 0)
+ continue;
+
+ if (rte_fbarray_attach(&msl->memseg_arr)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+ return -1;
+ }
+
+ /* preallocate VA space */
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+ return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ memseg_primary_init() :
+ memseg_secondary_init();
+}
diff --git a/lib/librte_eal/bsdapp/eal/meson.build b/lib/librte_eal/bsdapp/eal/meson.build
index 47e16a64..3945b529 100644
--- a/lib/librte_eal/bsdapp/eal/meson.build
+++ b/lib/librte_eal/bsdapp/eal/meson.build
@@ -16,3 +16,5 @@ env_sources = files('eal_alarm.c',
'eal_memory.c',
'eal_dev.c'
)
+
+deps += ['kvargs']
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 48f870f2..cca68826 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -11,12 +11,12 @@ INC += rte_per_lcore.h rte_random.h
INC += rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h
+INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_keepalive.h rte_time.h
INC += rte_service.h rte_service_component.h
INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
-INC += rte_reciprocal.h rte_fbarray.h
+INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c
new file mode 100644
index 00000000..404a9065
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_class.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_class.h>
+#include <rte_debug.h>
+
+struct rte_class_list rte_class_list =
+ TAILQ_HEAD_INITIALIZER(rte_class_list);
+
+__rte_experimental void
+rte_class_register(struct rte_class *class)
+{
+ RTE_VERIFY(class);
+ RTE_VERIFY(class->name && strlen(class->name));
+
+ TAILQ_INSERT_TAIL(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name);
+}
+
+__rte_experimental void
+rte_class_unregister(struct rte_class *class)
+{
+ TAILQ_REMOVE(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data)
+{
+ struct rte_class *cls;
+
+ if (start != NULL)
+ cls = TAILQ_NEXT(start, next);
+ else
+ cls = TAILQ_FIRST(&rte_class_list);
+ while (cls != NULL) {
+ if (cmp(cls, data) == 0)
+ break;
+ cls = TAILQ_NEXT(cls, next);
+ }
+ return cls;
+}
+
+static int
+cmp_class_name(const struct rte_class *class, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(class->name, name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name)
+{
+ return rte_class_find(NULL, cmp_class_name, (const void *)name);
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 61cb3b16..678dbcac 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -10,9 +10,12 @@
#include <rte_compat.h>
#include <rte_bus.h>
+#include <rte_class.h>
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
#include <rte_log.h>
#include <rte_spinlock.h>
#include <rte_malloc.h>
@@ -42,17 +45,27 @@ static struct dev_event_cb_list dev_event_cbs;
/* spinlock for device callbacks */
static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
-static int cmp_detached_dev_name(const struct rte_device *dev,
- const void *_name)
-{
- const char *name = _name;
+struct dev_next_ctx {
+ struct rte_dev_iterator *it;
+ const char *bus_str;
+ const char *cls_str;
+};
- /* skip attached devices */
- if (dev->driver != NULL)
- return 1;
+#define CTX(it, bus_str, cls_str) \
+ (&(const struct dev_next_ctx){ \
+ .it = it, \
+ .bus_str = bus_str, \
+ .cls_str = cls_str, \
+ })
- return strcmp(dev->name, name);
-}
+#define ITCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->it)
+
+#define BUSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->bus_str)
+
+#define CLSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->cls_str)
static int cmp_dev_name(const struct rte_device *dev, const void *_name)
{
@@ -138,8 +151,8 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (da == NULL)
return -ENOMEM;
- ret = rte_devargs_parse(da, "%s:%s,%s",
- busname, devname, devargs);
+ ret = rte_devargs_parsef(da, "%s:%s,%s",
+ busname, devname, devargs);
if (ret)
goto err_devarg;
@@ -151,14 +164,19 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (ret)
goto err_devarg;
- dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+ dev = bus->find_device(NULL, cmp_dev_name, devname);
if (dev == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
+ RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
devname);
ret = -ENODEV;
goto err_devarg;
}
+ if (dev->driver != NULL) {
+ RTE_LOG(ERR, EAL, "Device is already plugged\n");
+ return -EEXIST;
+ }
+
ret = bus->plug(dev);
if (ret) {
RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
@@ -200,6 +218,11 @@ rte_eal_hotplug_remove(const char *busname, const char *devname)
return -EINVAL;
}
+ if (dev->driver == NULL) {
+ RTE_LOG(ERR, EAL, "Device is already unplugged\n");
+ return -ENOENT;
+ }
+
ret = bus->unplug(dev);
if (ret)
RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
@@ -343,3 +366,201 @@ dev_callback_process(char *device_name, enum rte_dev_event_type event)
}
rte_spinlock_unlock(&dev_event_lock);
}
+
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it,
+ const char *dev_str)
+{
+ struct rte_devargs devargs;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+
+ /* Having both bus_str and cls_str NULL is illegal,
+ * marking this iterator as invalid unless
+ * everything goes well.
+ */
+ it->bus_str = NULL;
+ it->cls_str = NULL;
+
+ devargs.data = dev_str;
+ if (rte_devargs_layers_parse(&devargs, dev_str))
+ goto get_out;
+
+ bus = devargs.bus;
+ cls = devargs.cls;
+ /* The string should have at least
+ * one layer specified.
+ */
+ if (bus == NULL && cls == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Either bus or class must be specified.\n");
+ rte_errno = EINVAL;
+ goto get_out;
+ }
+ if (bus != NULL && bus->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ if (cls != NULL && cls->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ it->bus_str = devargs.bus_str;
+ it->cls_str = devargs.cls_str;
+ it->dev_str = dev_str;
+ it->bus = bus;
+ it->cls = cls;
+ it->device = NULL;
+ it->class_device = NULL;
+get_out:
+ return -rte_errno;
+}
+
+static char *
+dev_str_sane_copy(const char *str)
+{
+ size_t end;
+ char *copy;
+
+ end = strcspn(str, ",/");
+ if (str[end] == ',') {
+ copy = strdup(&str[end + 1]);
+ } else {
+ /* '/' or '\0' */
+ copy = strdup("");
+ }
+ if (copy == NULL) {
+ rte_errno = ENOMEM;
+ } else {
+ char *slash;
+
+ slash = strchr(copy, '/');
+ if (slash != NULL)
+ slash[0] = '\0';
+ }
+ return copy;
+}
+
+static int
+class_next_dev_cmp(const struct rte_class *cls,
+ const void *ctx)
+{
+ struct rte_dev_iterator *it;
+ const char *cls_str = NULL;
+ void *dev;
+
+ if (cls->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ cls_str = CLSCTX(ctx);
+ dev = it->class_device;
+ /* it->cls_str != NULL means a class
+ * was specified in the devstr.
+ */
+ if (it->cls_str != NULL && cls != it->cls)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ dev = cls->dev_iterate(dev, cls_str, it);
+ it->class_device = dev;
+ return dev == NULL;
+}
+
+static int
+bus_next_dev_cmp(const struct rte_bus *bus,
+ const void *ctx)
+{
+ struct rte_device *dev = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_dev_iterator *it;
+ const char *bus_str = NULL;
+
+ if (bus->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ bus_str = BUSCTX(ctx);
+ dev = it->device;
+ /* it->bus_str != NULL means a bus
+ * was specified in the devstr.
+ */
+ if (it->bus_str != NULL && bus != it->bus)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ if (it->cls_str == NULL) {
+ dev = bus->dev_iterate(dev, bus_str, it);
+ goto end;
+ }
+ /* cls_str != NULL */
+ if (dev == NULL) {
+next_dev_on_bus:
+ dev = bus->dev_iterate(dev, bus_str, it);
+ it->device = dev;
+ }
+ if (dev == NULL)
+ return 1;
+ if (it->cls != NULL)
+ cls = TAILQ_PREV(it->cls, rte_class_list, next);
+ cls = rte_class_find(cls, class_next_dev_cmp, ctx);
+ if (cls != NULL) {
+ it->cls = cls;
+ goto end;
+ }
+ goto next_dev_on_bus;
+end:
+ it->device = dev;
+ return dev == NULL;
+}
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it)
+{
+ struct rte_bus *bus = NULL;
+ int old_errno = rte_errno;
+ char *bus_str = NULL;
+ char *cls_str = NULL;
+
+ rte_errno = 0;
+ if (it->bus_str == NULL && it->cls_str == NULL) {
+ /* Invalid iterator. */
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ if (it->bus != NULL)
+ bus = TAILQ_PREV(it->bus, rte_bus_list, next);
+ if (it->bus_str != NULL) {
+ bus_str = dev_str_sane_copy(it->bus_str);
+ if (bus_str == NULL)
+ goto out;
+ }
+ if (it->cls_str != NULL) {
+ cls_str = dev_str_sane_copy(it->cls_str);
+ if (cls_str == NULL)
+ goto out;
+ }
+ while ((bus = rte_bus_find(bus, bus_next_dev_cmp,
+ CTX(it, bus_str, cls_str)))) {
+ if (it->device != NULL) {
+ it->bus = bus;
+ goto out;
+ }
+ if (it->bus_str != NULL ||
+ rte_errno != 0)
+ break;
+ }
+ if (rte_errno == 0)
+ rte_errno = old_errno;
+out:
+ free(bus_str);
+ free(cls_str);
+ return it->device;
+}
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index b0434158..dac2402a 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -13,9 +13,14 @@
#include <string.h>
#include <stdarg.h>
+#include <rte_bus.h>
+#include <rte_class.h>
#include <rte_compat.h>
#include <rte_dev.h>
#include <rte_devargs.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
#include <rte_tailq.h>
#include "eal_private.h"
@@ -56,30 +61,164 @@ rte_eal_parse_devargs_str(const char *devargs_str,
return 0;
}
+static size_t
+devargs_layer_count(const char *s)
+{
+ size_t i = s ? 1 : 0;
+
+ while (s != NULL && s[0] != '\0') {
+ i += s[0] == '/';
+ s++;
+ }
+ return i;
+}
+
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr)
+{
+ struct {
+ const char *key;
+ const char *str;
+ struct rte_kvargs *kvlist;
+ } layers[] = {
+ { "bus=", NULL, NULL, },
+ { "class=", NULL, NULL, },
+ { "driver=", NULL, NULL, },
+ };
+ struct rte_kvargs_pair *kv = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+ const char *s = devstr;
+ size_t nblayer;
+ size_t i = 0;
+ int ret = 0;
+
+ /* Split each sub-lists. */
+ nblayer = devargs_layer_count(devstr);
+ if (nblayer > RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n",
+ nblayer);
+ ret = -E2BIG;
+ goto get_out;
+ }
+
+ /* If the devargs points the devstr
+ * as source data, then it should not allocate
+ * anything and keep referring only to it.
+ */
+ if (devargs->data != devstr) {
+ devargs->data = strdup(devstr);
+ if (devargs->data == NULL) {
+ RTE_LOG(ERR, EAL, "OOM\n");
+ ret = -ENOMEM;
+ goto get_out;
+ }
+ s = devargs->data;
+ }
+
+ while (s != NULL) {
+ if (i >= RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ /*
+ * The last layer is free-form.
+ * The "driver" key is not required (but accepted).
+ */
+ if (strncmp(layers[i].key, s, strlen(layers[i].key)) &&
+ i != RTE_DIM(layers) - 1)
+ goto next_layer;
+ layers[i].str = s;
+ layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/");
+ if (layers[i].kvlist == NULL) {
+ RTE_LOG(ERR, EAL, "Could not parse %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ s = strchr(s, '/');
+ if (s != NULL)
+ s++;
+next_layer:
+ i++;
+ }
+
+ /* Parse each sub-list. */
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist == NULL)
+ continue;
+ kv = &layers[i].kvlist->pairs[0];
+ if (strcmp(kv->key, "bus") == 0) {
+ bus = rte_bus_find_by_name(kv->value);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "class") == 0) {
+ cls = rte_class_find_by_name(kv->value);
+ if (cls == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "driver") == 0) {
+ /* Ignore */
+ continue;
+ }
+ }
+
+ /* Fill devargs fields. */
+ devargs->bus_str = layers[0].str;
+ devargs->cls_str = layers[1].str;
+ devargs->drv_str = layers[2].str;
+ devargs->bus = bus;
+ devargs->cls = cls;
+
+ /* If we own the data, clean up a bit
+ * the several layers string, to ease
+ * their parsing afterward.
+ */
+ if (devargs->data != devstr) {
+ char *s = (void *)(intptr_t)(devargs->data);
+
+ while ((s = strchr(s, '/'))) {
+ *s = '\0';
+ s++;
+ }
+ }
+
+get_out:
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist)
+ rte_kvargs_free(layers[i].kvlist);
+ }
+ if (ret != 0)
+ rte_errno = -ret;
+ return ret;
+}
+
static int
bus_name_cmp(const struct rte_bus *bus, const void *name)
{
return strncmp(bus->name, name, strlen(bus->name));
}
-int __rte_experimental
-rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev)
{
struct rte_bus *bus = NULL;
- va_list ap;
- va_start(ap, format);
- char dev[vsnprintf(NULL, 0, format, ap) + 1];
const char *devname;
const size_t maxlen = sizeof(da->name);
size_t i;
- va_end(ap);
if (da == NULL)
return -EINVAL;
- va_start(ap, format);
- vsnprintf(dev, sizeof(dev), format, ap);
- va_end(ap);
/* Retrieve eventual bus info */
do {
devname = dev;
@@ -96,7 +235,7 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
da->name[i] = devname[i];
i++;
if (i == maxlen) {
- fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n",
+ RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n",
dev, maxlen);
da->name[i - 1] = '\0';
return -EINVAL;
@@ -106,7 +245,7 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
if (bus == NULL) {
bus = rte_bus_find_by_device_name(da->name);
if (bus == NULL) {
- fprintf(stderr, "ERROR: failed to parse device \"%s\"\n",
+ RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n",
da->name);
return -EFAULT;
}
@@ -118,12 +257,40 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
else
da->args = strdup("");
if (da->args == NULL) {
- fprintf(stderr, "ERROR: not enough memory to parse arguments\n");
+ RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n");
return -ENOMEM;
}
return 0;
}
+__rte_experimental
+int
+rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
+{
+ va_list ap;
+ size_t len;
+ char *dev;
+
+ if (da == NULL)
+ return -EINVAL;
+
+ va_start(ap, format);
+ len = vsnprintf(NULL, 0, format, ap);
+ va_end(ap);
+
+ dev = calloc(1, len + 1);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory to parse device\n");
+ return -ENOMEM;
+ }
+
+ va_start(ap, format);
+ vsnprintf(dev, len + 1, format, ap);
+ va_end(ap);
+
+ return rte_devargs_parse(da, dev);
+}
+
int __rte_experimental
rte_devargs_insert(struct rte_devargs *da)
{
@@ -150,7 +317,7 @@ rte_devargs_add(enum rte_devtype devtype, const char *devargs_str)
if (devargs == NULL)
goto fail;
- if (rte_devargs_parse(devargs, "%s", dev))
+ if (rte_devargs_parse(devargs, dev))
goto fail;
devargs->type = devtype;
bus = devargs->bus;
diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
index 019f84c1..43caf3ce 100644
--- a/lib/librte_eal/common/eal_common_fbarray.c
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -231,7 +231,7 @@ find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
return MASK_GET_IDX(msk_idx, run_start);
}
/* we didn't find anything */
- rte_errno = used ? -ENOENT : -ENOSPC;
+ rte_errno = used ? ENOENT : ENOSPC;
return -1;
}
@@ -287,7 +287,7 @@ find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
return MASK_GET_IDX(idx, found);
}
/* we didn't find anything */
- rte_errno = used ? -ENOENT : -ENOSPC;
+ rte_errno = used ? ENOENT : ENOSPC;
return -1;
}
@@ -353,6 +353,277 @@ find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
}
static int
+find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int msk_idx, lookbehind_idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ msk_idx = first;
+ do {
+ uint64_t cur_msk, lookbehind_msk;
+ unsigned int run_start, run_end, ctz, left;
+ bool found = false;
+ /*
+ * The process of getting n consecutive bits from the top for
+ * arbitrary n is a bit involved, but here it is in a nutshell:
+ *
+ * 1. let n be the number of consecutive bits we're looking for
+ * 2. check if n can fit in one mask, and if so, do n-1
+ * lshift-ands to see if there is an appropriate run inside
+ * our current mask
+ * 2a. if we found a run, bail out early
+ * 2b. if we didn't find a run, proceed
+ * 3. invert the mask and count trailing zeroes (that is, count
+ * how many consecutive set bits we had starting from the
+ * start of current mask) as k
+ * 3a. if k is 0, continue to next mask
+ * 3b. if k is not 0, we have a potential run
+ * 4. to satisfy our requirements, next mask must have n-k
+ * consecutive set bits at the end, so we will do (n-k-1)
+ * lshift-ands and check if last bit is set.
+ *
+ * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+ * we either run out of masks, lose the run, or find what we
+ * were looking for.
+ */
+ cur_msk = msk->data[msk_idx];
+ left = n;
+
+ /* if we're looking for free spaces, invert the mask */
+ if (!used)
+ cur_msk = ~cur_msk;
+
+ /* if we have an ignore mask, ignore once */
+ if (ignore_msk) {
+ cur_msk &= ignore_msk;
+ ignore_msk = 0;
+ }
+
+ /* if n can fit in within a single mask, do a search */
+ if (n <= MASK_ALIGN) {
+ uint64_t tmp_msk = cur_msk;
+ unsigned int s_idx;
+ for (s_idx = 0; s_idx < n - 1; s_idx++)
+ tmp_msk &= tmp_msk << 1ULL;
+ /* we found what we were looking for */
+ if (tmp_msk != 0) {
+ /* clz will give us offset from end of mask, and
+ * we only get the end of our run, not start,
+ * so adjust result to point to where start
+ * would have been.
+ */
+ run_start = MASK_ALIGN -
+ __builtin_clzll(tmp_msk) - n;
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ }
+
+ /*
+ * we didn't find our run within the mask, or n > MASK_ALIGN,
+ * so we're going for plan B.
+ */
+
+ /* count trailing zeroes on inverted mask */
+ if (~cur_msk == 0)
+ ctz = sizeof(cur_msk) * 8;
+ else
+ ctz = __builtin_ctzll(~cur_msk);
+
+ /* if there aren't any runs at the start either, just
+ * continue
+ */
+ if (ctz == 0)
+ continue;
+
+ /* we have a partial run at the start, so try looking behind */
+ run_end = MASK_GET_IDX(msk_idx, ctz);
+ left -= ctz;
+
+ /* go backwards, include zero */
+ lookbehind_idx = msk_idx - 1;
+
+ /* we can't lookbehind as we've run out of masks, so stop */
+ if (msk_idx == 0)
+ break;
+
+ do {
+ const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
+ unsigned int s_idx, need;
+
+ lookbehind_msk = msk->data[lookbehind_idx];
+
+ /* if we're looking for free space, invert the mask */
+ if (!used)
+ lookbehind_msk = ~lookbehind_msk;
+
+ /* figure out how many consecutive bits we need here */
+ need = RTE_MIN(left, MASK_ALIGN);
+
+ for (s_idx = 0; s_idx < need - 1; s_idx++)
+ lookbehind_msk &= lookbehind_msk << 1ULL;
+
+ /* if last bit is not set, we've lost the run */
+ if ((lookbehind_msk & last_bit) == 0) {
+ /*
+ * we've scanned this far, so we know there are
+ * no runs in the space we've lookbehind-scanned
+ * as well, so skip that on next iteration.
+ */
+ ignore_msk = -1ULL << need;
+ msk_idx = lookbehind_idx;
+ break;
+ }
+
+ left -= need;
+
+ /* check if we've found what we were looking for */
+ if (left == 0) {
+ found = true;
+ break;
+ }
+ } while ((lookbehind_idx--) != 0); /* decrement after check to
+ * include zero
+ */
+
+ /* we didn't find anything, so continue */
+ if (!found)
+ continue;
+
+ /* we've found what we were looking for, but we only know where
+ * the run ended, so calculate start position.
+ */
+ return run_end - n;
+ } while (msk_idx-- != 0); /* decrement after check to include zero */
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing clz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ int found;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first)
+ cur &= ignore_msk;
+
+ /* check if we have any entries */
+ if (cur == 0)
+ continue;
+
+ /*
+ * find last set bit - that will correspond to whatever it is
+ * that we're looking for. we're counting trailing zeroes, thus
+ * the value we get is counted from end of mask, so calculate
+ * position from start of mask.
+ */
+ found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+
+ return MASK_GET_IDX(idx, found);
+ } while (idx-- != 0); /* decrement after check to include zero*/
+
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int need_len, result = 0;
+
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ unsigned int run_len;
+
+ need_len = MASK_ALIGN;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything after start on first iteration */
+ if (idx == first) {
+ unsigned int end_len = MASK_ALIGN - first_mod - 1;
+ cur <<= end_len;
+ /* at the start, we don't need the full mask len */
+ need_len -= end_len;
+ }
+
+ /* we will be looking for zeroes, so invert the mask */
+ cur = ~cur;
+
+ /* if mask is zero, we have a complete run */
+ if (cur == 0)
+ goto endloop;
+
+ /*
+ * see where run ends, starting from the end.
+ */
+ run_len = __builtin_clzll(cur);
+
+ /* add however many zeroes we've had in the last run and quit */
+ if (run_len < need_len) {
+ result += run_len;
+ break;
+ }
+endloop:
+ result += need_len;
+ } while (idx-- != 0); /* decrement after check to include zero */
+ return result;
+}
+
+static int
set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
{
struct used_mask *msk;
@@ -434,39 +705,52 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
if (data == NULL)
goto fail;
- eal_get_fbarray_path(path, sizeof(path), name);
+ if (internal_config.no_shconf) {
+ /* remap virtual area as writable */
+ void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (new_data == MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
+ __func__, strerror(errno));
+ goto fail;
+ }
+ } else {
+ eal_get_fbarray_path(path, sizeof(path), name);
- /*
- * Each fbarray is unique to process namespace, i.e. the filename
- * depends on process prefix. Try to take out a lock and see if we
- * succeed. If we don't, someone else is using it already.
- */
- fd = open(path, O_CREAT | O_RDWR, 0600);
- if (fd < 0) {
- RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", __func__,
- path, strerror(errno));
- rte_errno = errno;
- goto fail;
- } else if (flock(fd, LOCK_EX | LOCK_NB)) {
- RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", __func__,
- path, strerror(errno));
- rte_errno = EBUSY;
- goto fail;
- }
+ /*
+ * Each fbarray is unique to process namespace, i.e. the
+ * filename depends on process prefix. Try to take out a lock
+ * and see if we succeed. If we don't, someone else is using it
+ * already.
+ */
+ fd = open(path, O_CREAT | O_RDWR, 0600);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = errno;
+ goto fail;
+ } else if (flock(fd, LOCK_EX | LOCK_NB)) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = EBUSY;
+ goto fail;
+ }
- /* take out a non-exclusive lock, so that other processes could still
- * attach to it, but no other process could reinitialize it.
- */
- if (flock(fd, LOCK_SH | LOCK_NB)) {
- rte_errno = errno;
- goto fail;
- }
+ /* take out a non-exclusive lock, so that other processes could
+ * still attach to it, but no other process could reinitialize
+ * it.
+ */
+ if (flock(fd, LOCK_SH | LOCK_NB)) {
+ rte_errno = errno;
+ goto fail;
+ }
- if (resize_and_map(fd, data, mmap_len))
- goto fail;
+ if (resize_and_map(fd, data, mmap_len))
+ goto fail;
- /* we've mmap'ed the file, we can now close the fd */
- close(fd);
+ /* we've mmap'ed the file, we can now close the fd */
+ close(fd);
+ }
/* initialize the data */
memset(data, 0, mmap_len);
@@ -675,8 +959,8 @@ rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
return ret;
}
-int __rte_experimental
-rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+static int
+fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
{
int ret = -1;
@@ -688,36 +972,106 @@ rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
/* prevent array from changing under us */
rte_rwlock_read_lock(&arr->rwlock);
- if (arr->len == arr->count) {
- rte_errno = ENOSPC;
- goto out;
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = start;
+ goto out;
+ }
+ } else {
+ if (arr->count == 0) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->len == arr->count) {
+ ret = start;
+ goto out;
+ }
}
-
- ret = find_next(arr, start, false);
+ if (next)
+ ret = find_next(arr, start, used);
+ else
+ ret = find_prev(arr, start, used);
out:
rte_rwlock_read_unlock(&arr->rwlock);
return ret;
}
int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, true, false);
+}
+
+int __rte_experimental
rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
{
+ return fbarray_find(arr, start, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, true);
+}
+
+static int
+fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool next, bool used)
+{
int ret = -1;
- if (arr == NULL || start >= arr->len) {
+ if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
rte_errno = EINVAL;
return -1;
}
+ if (next && (arr->len - start) < n) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
+ if (!next && start < (n - 1)) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
/* prevent array from changing under us */
rte_rwlock_read_lock(&arr->rwlock);
- if (arr->count == 0) {
- rte_errno = ENOENT;
- goto out;
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count || arr->len - arr->count < n) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
+ } else {
+ if (arr->count < n) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->count == arr->len) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
}
- ret = find_next(arr, start, true);
+ if (next)
+ ret = find_next_n(arr, start, n, used);
+ else
+ ret = find_prev_n(arr, start, n, used);
out:
rte_rwlock_read_unlock(&arr->rwlock);
return ret;
@@ -727,54 +1081,33 @@ int __rte_experimental
rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
unsigned int n)
{
- int ret = -1;
-
- if (arr == NULL || start >= arr->len || n > arr->len) {
- rte_errno = EINVAL;
- return -1;
- }
-
- /* prevent array from changing under us */
- rte_rwlock_read_lock(&arr->rwlock);
-
- if (arr->len == arr->count || arr->len - arr->count < n) {
- rte_errno = ENOSPC;
- goto out;
- }
-
- ret = find_next_n(arr, start, n, false);
-out:
- rte_rwlock_read_unlock(&arr->rwlock);
- return ret;
+ return fbarray_find_n(arr, start, n, true, false);
}
int __rte_experimental
rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
unsigned int n)
{
- int ret = -1;
-
- if (arr == NULL || start >= arr->len || n > arr->len) {
- rte_errno = EINVAL;
- return -1;
- }
-
- /* prevent array from changing under us */
- rte_rwlock_read_lock(&arr->rwlock);
-
- if (arr->count < n) {
- rte_errno = ENOENT;
- goto out;
- }
+ return fbarray_find_n(arr, start, n, true, true);
+}
- ret = find_next_n(arr, start, n, true);
-out:
- rte_rwlock_read_unlock(&arr->rwlock);
- return ret;
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, false);
}
int __rte_experimental
-rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, true);
+}
+
+static int
+fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
+ bool used)
{
int ret = -1;
@@ -786,39 +1119,66 @@ rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
/* prevent array from changing under us */
rte_rwlock_read_lock(&arr->rwlock);
- if (arr->len == arr->count) {
- rte_errno = ENOSPC;
- goto out;
- }
-
- if (arr->count == 0) {
- ret = arr->len - start;
- goto out;
+ /* cheap checks to prevent doing useless work */
+ if (used) {
+ if (arr->count == 0) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == arr->len) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == arr->len) {
+ ret = start + 1;
+ goto out;
+ }
+ } else {
+ if (arr->len == arr->count) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == 0) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == 0) {
+ ret = start + 1;
+ goto out;
+ }
}
- ret = find_contig(arr, start, false);
+ if (next)
+ ret = find_contig(arr, start, used);
+ else
+ ret = find_rev_contig(arr, start, used);
out:
rte_rwlock_read_unlock(&arr->rwlock);
return ret;
}
int __rte_experimental
-rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
{
- int ret = -1;
-
- if (arr == NULL || start >= arr->len) {
- rte_errno = EINVAL;
- return -1;
- }
+ return fbarray_find_contig(arr, start, true, false);
+}
- /* prevent array from changing under us */
- rte_rwlock_read_lock(&arr->rwlock);
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, true, true);
+}
- ret = find_contig(arr, start, true);
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, false);
+}
- rte_rwlock_read_unlock(&arr->rwlock);
- return ret;
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, true);
}
int __rte_experimental
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 81811894..c714a4bd 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -335,9 +335,7 @@ static const struct logtype logtype_strings[] = {
};
/* Logging should be first initializer (before drivers and bus) */
-RTE_INIT_PRIO(rte_log_init, LOG);
-static void
-rte_log_init(void)
+RTE_INIT_PRIO(rte_log_init, LOG)
{
uint32_t i;
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 4f0688f9..fbfb1b05 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -34,7 +34,7 @@
#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
-static uint64_t baseaddr_offset;
+static void *next_baseaddr;
static uint64_t system_page_sz;
void *
@@ -56,21 +56,27 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
- if (requested_addr == NULL && internal_config.base_virtaddr != 0) {
- requested_addr = (void *) (internal_config.base_virtaddr +
- (size_t)baseaddr_offset);
+ if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) internal_config.base_virtaddr;
+
+ if (requested_addr == NULL && next_baseaddr != NULL) {
+ requested_addr = next_baseaddr;
requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
addr_is_hint = true;
}
- /* if requested address is not aligned by page size, or if requested
- * address is NULL, add page size to requested length as we may get an
- * address that's aligned by system page size, which can be smaller than
- * our requested page size. additionally, we shouldn't try to align if
- * system page size is the same as requested page size.
+ /* we don't need alignment of resulting pointer in the following cases:
+ *
+ * 1. page size is equal to system size
+ * 2. we have a requested address, and it is page-aligned, and we will
+ * be discarding the address if we get a different one.
+ *
+ * for all other cases, alignment is potentially necessary.
*/
no_align = (requested_addr != NULL &&
- ((uintptr_t)requested_addr & (page_sz - 1)) == 0) ||
+ requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
+ !addr_is_hint) ||
page_sz == system_page_sz;
do {
@@ -116,6 +122,8 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
requested_addr, aligned_addr);
RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n");
+ } else if (next_baseaddr != NULL) {
+ next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
}
RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
@@ -148,387 +156,9 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
munmap(aligned_end, after_len);
}
- baseaddr_offset += *size;
-
return aligned_addr;
}
-static uint64_t
-get_mem_amount(uint64_t page_sz, uint64_t max_mem)
-{
- uint64_t area_sz, max_pages;
-
- /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
- max_pages = RTE_MAX_MEMSEG_PER_LIST;
- max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
-
- area_sz = RTE_MIN(page_sz * max_pages, max_mem);
-
- /* make sure the list isn't smaller than the page size */
- area_sz = RTE_MAX(area_sz, page_sz);
-
- return RTE_ALIGN(area_sz, page_sz);
-}
-
-static int
-free_memseg_list(struct rte_memseg_list *msl)
-{
- if (rte_fbarray_destroy(&msl->memseg_arr)) {
- RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
- return -1;
- }
- memset(msl, 0, sizeof(*msl));
- return 0;
-}
-
-static int
-alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
- uint64_t max_mem, int socket_id, int type_msl_idx)
-{
- char name[RTE_FBARRAY_NAME_LEN];
- uint64_t mem_amount;
- int max_segs;
-
- mem_amount = get_mem_amount(page_sz, max_mem);
- max_segs = mem_amount / page_sz;
-
- snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
- type_msl_idx);
- if (rte_fbarray_init(&msl->memseg_arr, name, max_segs,
- sizeof(struct rte_memseg))) {
- RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
- rte_strerror(rte_errno));
- return -1;
- }
-
- msl->page_sz = page_sz;
- msl->socket_id = socket_id;
- msl->base_va = NULL;
-
- RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
- (size_t)page_sz >> 10, socket_id);
-
- return 0;
-}
-
-static int
-alloc_va_space(struct rte_memseg_list *msl)
-{
- uint64_t page_sz;
- size_t mem_sz;
- void *addr;
- int flags = 0;
-
-#ifdef RTE_ARCH_PPC_64
- flags |= MAP_HUGETLB;
-#endif
-
- page_sz = msl->page_sz;
- mem_sz = page_sz * msl->memseg_arr.len;
-
- addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
- if (addr == NULL) {
- if (rte_errno == EADDRNOTAVAIL)
- RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
- (unsigned long long)mem_sz, msl->base_va);
- else
- RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
- return -1;
- }
- msl->base_va = addr;
-
- return 0;
-}
-
-static int __rte_unused
-memseg_primary_init_32(void)
-{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int active_sockets, hpi_idx, msl_idx = 0;
- unsigned int socket_id, i;
- struct rte_memseg_list *msl;
- uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
- uint64_t max_mem;
-
- /* no-huge does not need this at all */
- if (internal_config.no_hugetlbfs)
- return 0;
-
- /* this is a giant hack, but desperate times call for desperate
- * measures. in legacy 32-bit mode, we cannot preallocate VA space,
- * because having upwards of 2 gigabytes of VA space already mapped will
- * interfere with our ability to map and sort hugepages.
- *
- * therefore, in legacy 32-bit mode, we will be initializing memseg
- * lists much later - in eal_memory.c, right after we unmap all the
- * unneeded pages. this will not affect secondary processes, as those
- * should be able to mmap the space without (too many) problems.
- */
- if (internal_config.legacy_mem)
- return 0;
-
- /* 32-bit mode is a very special case. we cannot know in advance where
- * the user will want to allocate their memory, so we have to do some
- * heuristics.
- */
- active_sockets = 0;
- total_requested_mem = 0;
- if (internal_config.force_sockets)
- for (i = 0; i < rte_socket_count(); i++) {
- uint64_t mem;
-
- socket_id = rte_socket_id_by_idx(i);
- mem = internal_config.socket_mem[socket_id];
-
- if (mem == 0)
- continue;
-
- active_sockets++;
- total_requested_mem += mem;
- }
- else
- total_requested_mem = internal_config.memory;
-
- max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
- if (total_requested_mem > max_mem) {
- RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
- (unsigned int)(max_mem >> 20));
- return -1;
- }
- total_extra_mem = max_mem - total_requested_mem;
- extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
- total_extra_mem / active_sockets;
-
- /* the allocation logic is a little bit convoluted, but here's how it
- * works, in a nutshell:
- * - if user hasn't specified on which sockets to allocate memory via
- * --socket-mem, we allocate all of our memory on master core socket.
- * - if user has specified sockets to allocate memory on, there may be
- * some "unused" memory left (e.g. if user has specified --socket-mem
- * such that not all memory adds up to 2 gigabytes), so add it to all
- * sockets that are in use equally.
- *
- * page sizes are sorted by size in descending order, so we can safely
- * assume that we dispense with bigger page sizes first.
- */
-
- /* create memseg lists */
- for (i = 0; i < rte_socket_count(); i++) {
- int hp_sizes = (int) internal_config.num_hugepage_sizes;
- uint64_t max_socket_mem, cur_socket_mem;
- unsigned int master_lcore_socket;
- struct rte_config *cfg = rte_eal_get_configuration();
- bool skip;
-
- socket_id = rte_socket_id_by_idx(i);
-
-#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
- if (socket_id > 0)
- break;
-#endif
-
- /* if we didn't specifically request memory on this socket */
- skip = active_sockets != 0 &&
- internal_config.socket_mem[socket_id] == 0;
- /* ...or if we didn't specifically request memory on *any*
- * socket, and this is not master lcore
- */
- master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore);
- skip |= active_sockets == 0 && socket_id != master_lcore_socket;
-
- if (skip) {
- RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
- socket_id);
- continue;
- }
-
- /* max amount of memory on this socket */
- max_socket_mem = (active_sockets != 0 ?
- internal_config.socket_mem[socket_id] :
- internal_config.memory) +
- extra_mem_per_socket;
- cur_socket_mem = 0;
-
- for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
- uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
- uint64_t hugepage_sz;
- struct hugepage_info *hpi;
- int type_msl_idx, max_segs, total_segs = 0;
-
- hpi = &internal_config.hugepage_info[hpi_idx];
- hugepage_sz = hpi->hugepage_sz;
-
- /* check if pages are actually available */
- if (hpi->num_pages[socket_id] == 0)
- continue;
-
- max_segs = RTE_MAX_MEMSEG_PER_TYPE;
- max_pagesz_mem = max_socket_mem - cur_socket_mem;
-
- /* make it multiple of page size */
- max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
- hugepage_sz);
-
- RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
- "%" PRIu64 "M on socket %i\n",
- max_pagesz_mem >> 20, socket_id);
-
- type_msl_idx = 0;
- while (cur_pagesz_mem < max_pagesz_mem &&
- total_segs < max_segs) {
- if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
- RTE_LOG(ERR, EAL,
- "No more space in memseg lists, please increase %s\n",
- RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
- return -1;
- }
-
- msl = &mcfg->memsegs[msl_idx];
-
- if (alloc_memseg_list(msl, hugepage_sz,
- max_pagesz_mem, socket_id,
- type_msl_idx)) {
- /* failing to allocate a memseg list is
- * a serious error.
- */
- RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
- return -1;
- }
-
- if (alloc_va_space(msl)) {
- /* if we couldn't allocate VA space, we
- * can try with smaller page sizes.
- */
- RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
- /* deallocate memseg list */
- if (free_memseg_list(msl))
- return -1;
- break;
- }
-
- total_segs += msl->memseg_arr.len;
- cur_pagesz_mem = total_segs * hugepage_sz;
- type_msl_idx++;
- msl_idx++;
- }
- cur_socket_mem += cur_pagesz_mem;
- }
- if (cur_socket_mem == 0) {
- RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
- socket_id);
- return -1;
- }
- }
-
- return 0;
-}
-
-static int __rte_unused
-memseg_primary_init(void)
-{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int i, socket_id, hpi_idx, msl_idx = 0;
- struct rte_memseg_list *msl;
- uint64_t max_mem, total_mem;
-
- /* no-huge does not need this at all */
- if (internal_config.no_hugetlbfs)
- return 0;
-
- max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
- total_mem = 0;
-
- /* create memseg lists */
- for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
- hpi_idx++) {
- struct hugepage_info *hpi;
- uint64_t hugepage_sz;
-
- hpi = &internal_config.hugepage_info[hpi_idx];
- hugepage_sz = hpi->hugepage_sz;
-
- for (i = 0; i < (int) rte_socket_count(); i++) {
- uint64_t max_type_mem, total_type_mem = 0;
- int type_msl_idx, max_segs, total_segs = 0;
-
- socket_id = rte_socket_id_by_idx(i);
-
-#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
- if (socket_id > 0)
- break;
-#endif
-
- if (total_mem >= max_mem)
- break;
-
- max_type_mem = RTE_MIN(max_mem - total_mem,
- (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
- max_segs = RTE_MAX_MEMSEG_PER_TYPE;
-
- type_msl_idx = 0;
- while (total_type_mem < max_type_mem &&
- total_segs < max_segs) {
- uint64_t cur_max_mem;
- if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
- RTE_LOG(ERR, EAL,
- "No more space in memseg lists, please increase %s\n",
- RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
- return -1;
- }
-
- msl = &mcfg->memsegs[msl_idx++];
-
- cur_max_mem = max_type_mem - total_type_mem;
- if (alloc_memseg_list(msl, hugepage_sz,
- cur_max_mem, socket_id,
- type_msl_idx))
- return -1;
-
- total_segs += msl->memseg_arr.len;
- total_type_mem = total_segs * hugepage_sz;
- type_msl_idx++;
-
- if (alloc_va_space(msl)) {
- RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
- return -1;
- }
- }
- total_mem += total_type_mem;
- }
- }
- return 0;
-}
-
-static int
-memseg_secondary_init(void)
-{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int msl_idx = 0;
- struct rte_memseg_list *msl;
-
- for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
-
- msl = &mcfg->memsegs[msl_idx];
-
- /* skip empty memseg lists */
- if (msl->memseg_arr.len == 0)
- continue;
-
- if (rte_fbarray_attach(&msl->memseg_arr)) {
- RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
- return -1;
- }
-
- /* preallocate VA space */
- if (alloc_va_space(msl)) {
- RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
- return -1;
- }
- }
-
- return 0;
-}
-
static struct rte_memseg *
virt2memseg(const void *addr, const struct rte_memseg_list *msl)
{
@@ -536,6 +166,9 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl)
void *start, *end;
int ms_idx;
+ if (msl == NULL)
+ return NULL;
+
/* a memseg list was specified, check if it's the right one */
start = msl->base_va;
end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
@@ -788,14 +421,11 @@ rte_mem_lock_page(const void *virt)
}
int __rte_experimental
-rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
int i, ms_idx, ret = 0;
- /* do not allow allocations/frees/init while we iterate */
- rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
-
for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
struct rte_memseg_list *msl = &mcfg->memsegs[i];
const struct rte_memseg *ms;
@@ -820,30 +450,34 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
len = n_segs * msl->page_sz;
ret = func(msl, ms, len, arg);
- if (ret < 0) {
- ret = -1;
- goto out;
- } else if (ret > 0) {
- ret = 1;
- goto out;
- }
+ if (ret)
+ return ret;
ms_idx = rte_fbarray_find_next_used(arr,
ms_idx + n_segs);
}
}
-out:
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
- return ret;
+ return 0;
}
int __rte_experimental
-rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int i, ms_idx, ret = 0;
+ int ret = 0;
/* do not allow allocations/frees/init while we iterate */
rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ms_idx, ret = 0;
for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
struct rte_memseg_list *msl = &mcfg->memsegs[i];
@@ -859,29 +493,33 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg)
while (ms_idx >= 0) {
ms = rte_fbarray_get(arr, ms_idx);
ret = func(msl, ms, arg);
- if (ret < 0) {
- ret = -1;
- goto out;
- } else if (ret > 0) {
- ret = 1;
- goto out;
- }
+ if (ret)
+ return ret;
ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
}
}
-out:
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
- return ret;
+ return 0;
}
int __rte_experimental
-rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int i, ret = 0;
+ int ret = 0;
/* do not allow allocations/frees/init while we iterate */
rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_walk_thread_unsafe(func, arg);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ret = 0;
for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
struct rte_memseg_list *msl = &mcfg->memsegs[i];
@@ -890,17 +528,23 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
continue;
ret = func(msl, arg);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
- if (ret > 0) {
- ret = 1;
- goto out;
- }
+ if (ret)
+ return ret;
}
-out:
+ return 0;
+}
+
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_list_walk_thread_unsafe(func, arg);
rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
return ret;
}
@@ -918,15 +562,7 @@ rte_eal_memory_init(void)
/* lock mem hotplug here, to prevent races while we init */
rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
- retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
-#ifndef RTE_ARCH_64
- memseg_primary_init_32() :
-#else
- memseg_primary_init() :
-#endif
- memseg_secondary_init();
-
- if (retval < 0)
+ if (rte_eal_memseg_init() < 0)
goto fail;
if (eal_memalloc_init() < 0)
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index faa3b061..7300fe05 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -52,38 +52,6 @@ memzone_lookup_thread_unsafe(const char *name)
return NULL;
}
-
-/* This function will return the greatest free block if a heap has been
- * specified. If no heap has been specified, it will return the heap and
- * length of the greatest free block available in all heaps */
-static size_t
-find_heap_max_free_elem(int *s, unsigned align)
-{
- struct rte_mem_config *mcfg;
- struct rte_malloc_socket_stats stats;
- int i, socket = *s;
- size_t len = 0;
-
- /* get pointer to global configuration */
- mcfg = rte_eal_get_configuration()->mem_config;
-
- for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
- if ((socket != SOCKET_ID_ANY) && (socket != i))
- continue;
-
- malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
- if (stats.greatest_free_size > len) {
- len = stats.greatest_free_size;
- *s = i;
- }
- }
-
- if (len < MALLOC_ELEM_OVERHEAD + align)
- return 0;
-
- return len - MALLOC_ELEM_OVERHEAD - align;
-}
-
static const struct rte_memzone *
memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
int socket_id, unsigned int flags, unsigned int align,
@@ -92,6 +60,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
struct rte_memzone *mz;
struct rte_mem_config *mcfg;
struct rte_fbarray *arr;
+ void *mz_addr;
size_t requested_len;
int mz_idx;
bool contig;
@@ -140,8 +109,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
return NULL;
}
- len += RTE_CACHE_LINE_MASK;
- len &= ~((size_t) RTE_CACHE_LINE_MASK);
+ len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
/* save minimal requested length */
requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
@@ -165,27 +133,18 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
/* malloc only cares about size flags, remove contig flag from flags */
flags &= ~RTE_MEMZONE_IOVA_CONTIG;
- if (len == 0) {
- /* len == 0 is only allowed for non-contiguous zones */
- if (contig) {
- RTE_LOG(DEBUG, EAL, "Reserving zero-length contiguous memzones is not supported\n");
- rte_errno = EINVAL;
- return NULL;
- }
- if (bound != 0)
+ if (len == 0 && bound == 0) {
+ /* no size constraints were placed, so use malloc elem len */
+ requested_len = 0;
+ mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
+ align, contig);
+ } else {
+ if (len == 0)
requested_len = bound;
- else {
- requested_len = find_heap_max_free_elem(&socket_id, align);
- if (requested_len == 0) {
- rte_errno = ENOMEM;
- return NULL;
- }
- }
+ /* allocate memory on heap */
+ mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
+ flags, align, bound, contig);
}
-
- /* allocate memory on heap */
- void *mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, flags,
- align, bound, contig);
if (mz_addr == NULL) {
rte_errno = ENOMEM;
return NULL;
@@ -213,8 +172,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
snprintf(mz->name, sizeof(mz->name), "%s", name);
mz->iova = rte_malloc_virt2iova(mz_addr);
mz->addr = mz_addr;
- mz->len = (requested_len == 0 ?
- (elem->size - MALLOC_ELEM_OVERHEAD) : requested_len);
+ mz->len = requested_len == 0 ?
+ elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
+ requested_len;
mz->hugepage_sz = elem->msl->page_sz;
mz->socket_id = elem->msl->socket_id;
mz->flags = 0;
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index ecebb292..dd5f9740 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -66,10 +66,12 @@ eal_long_options[] = {
{OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM },
{OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM },
{OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM },
+ {OPT_IN_MEMORY, 0, NULL, OPT_IN_MEMORY_NUM },
{OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM },
{OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM },
{OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM },
{OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM },
+ {OPT_SOCKET_LIMIT, 1, NULL, OPT_SOCKET_LIMIT_NUM },
{OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM },
{OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
@@ -179,6 +181,10 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
/* zero out the NUMA config */
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
internal_cfg->socket_mem[i] = 0;
+ internal_cfg->force_socket_limits = 0;
+ /* zero out the NUMA limits config */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->socket_limit[i] = 0;
/* zero out hugedir descriptors */
for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
memset(&internal_cfg->hugepage_info[i], 0,
@@ -315,6 +321,7 @@ eal_parse_service_coremask(const char *coremask)
unsigned int count = 0;
char c;
int val;
+ uint32_t taken_lcore_count = 0;
if (coremask == NULL)
return -1;
@@ -348,7 +355,7 @@ eal_parse_service_coremask(const char *coremask)
if (master_lcore_parsed &&
cfg->master_lcore == lcore) {
RTE_LOG(ERR, EAL,
- "Error: lcore %u is master lcore, cannot use as service core\n",
+ "lcore %u is master lcore, cannot use as service core\n",
idx);
return -1;
}
@@ -358,6 +365,10 @@ eal_parse_service_coremask(const char *coremask)
"lcore %u unavailable\n", idx);
return -1;
}
+
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
lcore_config[idx].core_role = ROLE_SERVICE;
count++;
}
@@ -374,11 +385,28 @@ eal_parse_service_coremask(const char *coremask)
if (count == 0)
return -1;
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores are in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
cfg->service_lcore_count = count;
return 0;
}
static int
+eal_service_cores_parsed(void)
+{
+ int idx;
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (lcore_config[idx].core_role == ROLE_SERVICE)
+ return 1;
+ }
+ return 0;
+}
+
+static int
eal_parse_coremask(const char *coremask)
{
struct rte_config *cfg = rte_eal_get_configuration();
@@ -387,6 +415,11 @@ eal_parse_coremask(const char *coremask)
char c;
int val;
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. "
+ "Please ensure -c is before -s or -S\n");
+
if (coremask == NULL)
return -1;
/* Remove all blank characters ahead and after .
@@ -418,6 +451,7 @@ eal_parse_coremask(const char *coremask)
"unavailable\n", idx);
return -1;
}
+
cfg->lcore_role[idx] = ROLE_RTE;
lcore_config[idx].core_index = count;
count++;
@@ -449,6 +483,7 @@ eal_parse_service_corelist(const char *corelist)
unsigned count = 0;
char *end = NULL;
int min, max;
+ uint32_t taken_lcore_count = 0;
if (corelist == NULL)
return -1;
@@ -490,6 +525,9 @@ eal_parse_service_corelist(const char *corelist)
idx);
return -1;
}
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
lcore_config[idx].core_role =
ROLE_SERVICE;
count++;
@@ -504,6 +542,12 @@ eal_parse_service_corelist(const char *corelist)
if (count == 0)
return -1;
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores were in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
return 0;
}
@@ -516,6 +560,11 @@ eal_parse_corelist(const char *corelist)
char *end = NULL;
int min, max;
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. "
+ "Please ensure -l is before -s or -S\n");
+
if (corelist == NULL)
return -1;
@@ -590,7 +639,8 @@ eal_parse_master_lcore(const char *arg)
/* ensure master core is not used as service core */
if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
- RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n");
+ RTE_LOG(ERR, EAL,
+ "Error: Master lcore is used as a service core\n");
return -1;
}
@@ -1165,6 +1215,13 @@ eal_parse_common_option(int opt, const char *optarg,
conf->no_shconf = 1;
break;
+ case OPT_IN_MEMORY_NUM:
+ conf->in_memory = 1;
+ /* in-memory is a superset of noshconf and huge-unlink */
+ conf->no_shconf = 1;
+ conf->hugepage_unlink = 1;
+ break;
+
case OPT_PROC_TYPE_NUM:
conf->process_type = eal_parse_proc_type(optarg);
break;
@@ -1316,12 +1373,23 @@ eal_check_common_options(struct internal_config *internal_cfg)
"be specified together with --"OPT_NO_HUGE"\n");
return -1;
}
-
- if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
+ if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+ !internal_cfg->in_memory) {
RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
"be specified together with --"OPT_NO_HUGE"\n");
return -1;
}
+ if (internal_config.force_socket_limits && internal_config.legacy_mem) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
+ " is only supported in non-legacy memory mode\n");
+ }
+ if (internal_cfg->single_file_segments &&
+ internal_cfg->hugepage_unlink) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
+ "not compatible with neither --"OPT_IN_MEMORY" nor "
+ "--"OPT_HUGE_UNLINK"\n");
+ return -1;
+ }
return 0;
}
@@ -1370,6 +1438,8 @@ eal_common_usage(void)
" Set specific log level\n"
" -v Display version information on startup\n"
" -h, --help This help\n"
+ " --"OPT_IN_MEMORY" Operate entirely in memory. This will\n"
+ " disable secondary process support\n"
"\nEAL options for DEBUG use only:\n"
" --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n"
" --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 707d8ab3..9fcb9121 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -20,6 +20,7 @@
#include <sys/un.h>
#include <unistd.h>
+#include <rte_alarm.h>
#include <rte_common.h>
#include <rte_cycles.h>
#include <rte_eal.h>
@@ -94,11 +95,9 @@ TAILQ_HEAD(pending_request_list, pending_request);
static struct {
struct pending_request_list requests;
pthread_mutex_t lock;
- pthread_cond_t async_cond;
} pending_requests = {
.requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
.lock = PTHREAD_MUTEX_INITIALIZER,
- .async_cond = PTHREAD_COND_INITIALIZER
/**< used in async requests only */
};
@@ -106,6 +105,16 @@ static struct {
static int
mp_send(struct rte_mp_msg *msg, const char *peer, int type);
+/* for use with alarm callback */
+static void
+async_reply_handle(void *arg);
+
+/* for use with process_msg */
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg);
+
+static void
+trigger_async_action(struct pending_request *req);
static struct pending_request *
find_pending_request(const char *dst, const char *act_name)
@@ -290,6 +299,8 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
if (m->type == MP_REP || m->type == MP_IGN) {
+ struct pending_request *req = NULL;
+
pthread_mutex_lock(&pending_requests.lock);
pending_req = find_pending_request(s->sun_path, msg->name);
if (pending_req) {
@@ -301,11 +312,14 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
if (pending_req->type == REQUEST_TYPE_SYNC)
pthread_cond_signal(&pending_req->sync.cond);
else if (pending_req->type == REQUEST_TYPE_ASYNC)
- pthread_cond_signal(
- &pending_requests.async_cond);
+ req = async_reply_handle_thread_unsafe(
+ pending_req);
} else
RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
pthread_mutex_unlock(&pending_requests.lock);
+
+ if (req != NULL)
+ trigger_async_action(req);
return;
}
@@ -365,7 +379,6 @@ timespec_cmp(const struct timespec *a, const struct timespec *b)
}
enum async_action {
- ACTION_NONE, /**< don't do anything */
ACTION_FREE, /**< free the action entry, but don't trigger callback */
ACTION_TRIGGER /**< trigger callback, then free action entry */
};
@@ -375,7 +388,7 @@ process_async_request(struct pending_request *sr, const struct timespec *now)
{
struct async_request_param *param;
struct rte_mp_reply *reply;
- bool timeout, received, last_msg;
+ bool timeout, last_msg;
param = sr->async.param;
reply = &param->user_reply;
@@ -383,13 +396,6 @@ process_async_request(struct pending_request *sr, const struct timespec *now)
/* did we timeout? */
timeout = timespec_cmp(&param->end, now) <= 0;
- /* did we receive a response? */
- received = sr->reply_received != 0;
-
- /* if we didn't time out, and we didn't receive a response, ignore */
- if (!timeout && !received)
- return ACTION_NONE;
-
/* if we received a response, adjust relevant data and copy mesasge. */
if (sr->reply_received == 1 && sr->reply) {
struct rte_mp_msg *msg, *user_msgs, *tmp;
@@ -448,118 +454,58 @@ trigger_async_action(struct pending_request *sr)
free(sr->async.param->user_reply.msgs);
free(sr->async.param);
free(sr->request);
+ free(sr);
}
static struct pending_request *
-check_trigger(struct timespec *ts)
+async_reply_handle_thread_unsafe(void *arg)
{
- struct pending_request *next, *cur, *trigger = NULL;
-
- TAILQ_FOREACH_SAFE(cur, &pending_requests.requests, next, next) {
- enum async_action action;
- if (cur->type != REQUEST_TYPE_ASYNC)
- continue;
+ struct pending_request *req = (struct pending_request *)arg;
+ enum async_action action;
+ struct timespec ts_now;
+ struct timeval now;
- action = process_async_request(cur, ts);
- if (action == ACTION_FREE) {
- TAILQ_REMOVE(&pending_requests.requests, cur, next);
- free(cur);
- } else if (action == ACTION_TRIGGER) {
- TAILQ_REMOVE(&pending_requests.requests, cur, next);
- trigger = cur;
- break;
- }
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot get current time\n");
+ goto no_trigger;
}
- return trigger;
-}
+ ts_now.tv_nsec = now.tv_usec * 1000;
+ ts_now.tv_sec = now.tv_sec;
-static void
-wait_for_async_messages(void)
-{
- struct pending_request *sr;
- struct timespec timeout;
- bool timedwait = false;
- bool nowait = false;
- int ret;
+ action = process_async_request(req, &ts_now);
- /* scan through the list and see if there are any timeouts that
- * are earlier than our current timeout.
- */
- TAILQ_FOREACH(sr, &pending_requests.requests, next) {
- if (sr->type != REQUEST_TYPE_ASYNC)
- continue;
- if (!timedwait || timespec_cmp(&sr->async.param->end,
- &timeout) < 0) {
- memcpy(&timeout, &sr->async.param->end,
- sizeof(timeout));
- timedwait = true;
- }
+ TAILQ_REMOVE(&pending_requests.requests, req, next);
- /* sometimes, we don't even wait */
- if (sr->reply_received) {
- nowait = true;
- break;
+ if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+ /* if we failed to cancel the alarm because it's already in
+ * progress, don't proceed because otherwise we will end up
+ * handling the same message twice.
+ */
+ if (rte_errno == EINPROGRESS) {
+ RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
+ goto no_trigger;
}
+ RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
}
- if (nowait)
- return;
-
- do {
- ret = timedwait ?
- pthread_cond_timedwait(
- &pending_requests.async_cond,
- &pending_requests.lock,
- &timeout) :
- pthread_cond_wait(
- &pending_requests.async_cond,
- &pending_requests.lock);
- } while (ret != 0 && ret != ETIMEDOUT);
-
- /* we've been woken up or timed out */
+ if (action == ACTION_TRIGGER)
+ return req;
+no_trigger:
+ free(req);
+ return NULL;
}
-static void *
-async_reply_handle(void *arg __rte_unused)
+static void
+async_reply_handle(void *arg)
{
- struct timeval now;
- struct timespec ts_now;
- while (1) {
- struct pending_request *trigger = NULL;
-
- pthread_mutex_lock(&pending_requests.lock);
+ struct pending_request *req;
- /* we exit this function holding the lock */
- wait_for_async_messages();
-
- if (gettimeofday(&now, NULL) < 0) {
- pthread_mutex_unlock(&pending_requests.lock);
- RTE_LOG(ERR, EAL, "Cannot get current time\n");
- break;
- }
- ts_now.tv_nsec = now.tv_usec * 1000;
- ts_now.tv_sec = now.tv_sec;
-
- do {
- trigger = check_trigger(&ts_now);
- /* unlock request list */
- pthread_mutex_unlock(&pending_requests.lock);
-
- if (trigger) {
- trigger_async_action(trigger);
- free(trigger);
-
- /* we've triggered a callback, but there may be
- * more, so lock the list and check again.
- */
- pthread_mutex_lock(&pending_requests.lock);
- }
- } while (trigger);
- }
-
- RTE_LOG(ERR, EAL, "ERROR: asynchronous requests disabled\n");
+ pthread_mutex_lock(&pending_requests.lock);
+ req = async_reply_handle_thread_unsafe(arg);
+ pthread_mutex_unlock(&pending_requests.lock);
- return NULL;
+ if (req != NULL)
+ trigger_async_action(req);
}
static int
@@ -624,7 +570,15 @@ rte_mp_channel_init(void)
{
char path[PATH_MAX];
int dir_fd;
- pthread_t mp_handle_tid, async_reply_handle_tid;
+ pthread_t mp_handle_tid;
+
+ /* in no shared files mode, we do not have secondary processes support,
+ * so no need to initialize IPC.
+ */
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
+ return 0;
+ }
/* create filter path */
create_socket_path("*", path, sizeof(path));
@@ -671,17 +625,6 @@ rte_mp_channel_init(void)
return -1;
}
- if (rte_ctrl_thread_create(&async_reply_handle_tid,
- "rte_mp_async", NULL,
- async_reply_handle, NULL) < 0) {
- RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
- strerror(errno));
- close(mp_fd);
- close(dir_fd);
- mp_fd = -1;
- return -1;
- }
-
/* unlock the directory */
flock(dir_fd, LOCK_UN);
close(dir_fd);
@@ -786,7 +729,7 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
- if (flock(dir_fd, LOCK_EX)) {
+ if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
rte_errno = errno;
@@ -853,7 +796,7 @@ rte_mp_sendmsg(struct rte_mp_msg *msg)
static int
mp_request_async(const char *dst, struct rte_mp_msg *req,
- struct async_request_param *param)
+ struct async_request_param *param, const struct timespec *ts)
{
struct rte_mp_msg *reply_msg;
struct pending_request *pending_req, *exist;
@@ -898,6 +841,13 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
param->user_reply.nb_sent++;
+ if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+ async_reply_handle, pending_req) < 0) {
+ RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+ dst, req->name);
+ rte_panic("Fix the above shit to properly free all memory\n");
+ }
+
return 0;
fail:
free(pending_req);
@@ -988,6 +938,12 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
if (check_input(req) == false)
return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
if (gettimeofday(&now, NULL) < 0) {
RTE_LOG(ERR, EAL, "Faile to get current time\n");
rte_errno = errno;
@@ -1020,7 +976,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
- if (flock(dir_fd, LOCK_EX)) {
+ if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
closedir(mp_dir);
@@ -1072,6 +1028,12 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
if (check_input(req) == false)
return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
if (gettimeofday(&now, NULL) < 0) {
RTE_LOG(ERR, EAL, "Faile to get current time\n");
rte_errno = errno;
@@ -1119,7 +1081,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
/* for secondary process, send request to the primary process only */
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
- ret = mp_request_async(eal_mp_socket_path(), copy, param);
+ ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
/* if we didn't send anything, put dummy request on the queue */
if (ret == 0 && reply->nb_sent == 0) {
@@ -1146,7 +1108,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
- if (flock(dir_fd, LOCK_EX)) {
+ if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
rte_errno = errno;
@@ -1162,7 +1124,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
ent->d_name);
- if (mp_request_async(path, copy, param))
+ if (mp_request_async(path, copy, param, ts))
ret = -1;
}
/* if we didn't send anything, put dummy request on the queue */
@@ -1171,9 +1133,6 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
dummy_used = true;
}
- /* trigger async request thread wake up */
- pthread_cond_signal(&pending_requests.async_cond);
-
/* finally, unlock the queue */
pthread_mutex_unlock(&pending_requests.lock);
@@ -1213,5 +1172,10 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
return -1;
}
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
return mp_send(msg, peer, MP_REP);
}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 42398630..48ef4d6d 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -175,7 +175,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
params = malloc(sizeof(*params));
if (!params)
- return -1;
+ return -ENOMEM;
params->start_routine = start_routine;
params->arg = arg;
@@ -185,13 +185,14 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
if (ret != 0) {
free(params);
- return ret;
+ return -ret;
}
if (name != NULL) {
ret = rte_thread_setname(*thread, name);
if (ret < 0)
- goto fail;
+ RTE_LOG(DEBUG, EAL,
+ "Cannot set name for ctrl thread\n");
}
cpu_found = 0;
@@ -227,5 +228,5 @@ fail:
}
pthread_cancel(*thread);
pthread_join(*thread, NULL);
- return ret;
+ return -ret;
}
diff --git a/lib/librte_eal/common/eal_common_uuid.c b/lib/librte_eal/common/eal_common_uuid.c
new file mode 100644
index 00000000..1b93c5b3
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_uuid.c
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <rte_uuid.h>
+
+/* UUID packed form */
+struct uuid {
+ uint32_t time_low;
+ uint16_t time_mid;
+ uint16_t time_hi_and_version;
+ uint16_t clock_seq;
+ uint8_t node[6];
+};
+
+static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr)
+{
+ uint32_t tmp;
+ uint8_t *out = ptr;
+
+ tmp = uu->time_low;
+ out[3] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[2] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[1] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[0] = (uint8_t) tmp;
+
+ tmp = uu->time_mid;
+ out[5] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[4] = (uint8_t) tmp;
+
+ tmp = uu->time_hi_and_version;
+ out[7] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[6] = (uint8_t) tmp;
+
+ tmp = uu->clock_seq;
+ out[9] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[8] = (uint8_t) tmp;
+
+ memcpy(out+10, uu->node, 6);
+}
+
+static void uuid_unpack(const rte_uuid_t in, struct uuid *uu)
+{
+ const uint8_t *ptr = in;
+ uint32_t tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_low = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_mid = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_hi_and_version = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->clock_seq = tmp;
+
+ memcpy(uu->node, ptr, 6);
+}
+
+bool rte_uuid_is_null(const rte_uuid_t uu)
+{
+ const uint8_t *cp = uu;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ if (*cp++)
+ return false;
+ return true;
+}
+
+/*
+ * rte_uuid_compare() - compare two UUIDs.
+ */
+int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2)
+{
+ struct uuid uuid1, uuid2;
+
+ uuid_unpack(uu1, &uuid1);
+ uuid_unpack(uu2, &uuid2);
+
+#define UUCMP(u1, u2) \
+ do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0)
+
+ UUCMP(uuid1.time_low, uuid2.time_low);
+ UUCMP(uuid1.time_mid, uuid2.time_mid);
+ UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+ UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+#undef UUCMP
+
+ return memcmp(uuid1.node, uuid2.node, 6);
+}
+
+int rte_uuid_parse(const char *in, rte_uuid_t uu)
+{
+ struct uuid uuid;
+ int i;
+ const char *cp;
+ char buf[3];
+
+ if (strlen(in) != 36)
+ return -1;
+
+ for (i = 0, cp = in; i <= 36; i++, cp++) {
+ if ((i == 8) || (i == 13) || (i == 18) ||
+ (i == 23)) {
+ if (*cp == '-')
+ continue;
+ else
+ return -1;
+ }
+ if (i == 36)
+ if (*cp == 0)
+ continue;
+ if (!isxdigit(*cp))
+ return -1;
+ }
+
+ uuid.time_low = strtoul(in, NULL, 16);
+ uuid.time_mid = strtoul(in+9, NULL, 16);
+ uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+ uuid.clock_seq = strtoul(in+19, NULL, 16);
+ cp = in+24;
+ buf[2] = 0;
+
+ for (i = 0; i < 6; i++) {
+ buf[0] = *cp++;
+ buf[1] = *cp++;
+ uuid.node[i] = strtoul(buf, NULL, 16);
+ }
+
+ uuid_pack(&uuid, uu);
+ return 0;
+}
+
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len)
+{
+ struct uuid uuid;
+
+ uuid_unpack(uu, &uuid);
+
+ snprintf(out, len,
+ "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+ uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+ uuid.node[0], uuid.node[1], uuid.node[2],
+ uuid.node[3], uuid.node[4], uuid.node[5]);
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index 364f38d1..de05febf 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -12,7 +12,6 @@
#define EAL_FILESYSTEM_H
/** Path of rte config file. */
-#define RUNTIME_CONFIG_FMT "%s/.%s_config"
#include <stdint.h>
#include <limits.h>
@@ -30,17 +29,14 @@ eal_create_runtime_dir(void);
const char *
eal_get_runtime_dir(void);
+#define RUNTIME_CONFIG_FNAME "config"
static inline const char *
eal_runtime_config_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- const char *directory = "/var/run";
- const char *home_dir = getenv("HOME");
- if (getuid() != 0 && home_dir != NULL)
- directory = home_dir;
- snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
- internal_config.hugefile_prefix);
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ RUNTIME_CONFIG_FNAME);
return buffer;
}
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index c4cbf3ac..00ee6e06 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -41,11 +41,17 @@ struct internal_config {
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
* instead of native TSC */
volatile unsigned no_shconf; /**< true if there is no shared config */
+ volatile unsigned in_memory;
+ /**< true if DPDK should operate entirely in-memory and not create any
+ * shared files or runtime data.
+ */
volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
/** true to try allocating memory on specific sockets */
volatile unsigned force_sockets;
volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+ volatile unsigned force_socket_limits;
+ volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */
uintptr_t base_virtaddr; /**< base address to try and reserve memory from */
volatile unsigned legacy_mem;
/**< true to enable legacy memory behavior (no dynamic allocation,
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 211ae06a..96e16678 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -45,8 +45,12 @@ enum {
OPT_NO_PCI_NUM,
#define OPT_NO_SHCONF "no-shconf"
OPT_NO_SHCONF_NUM,
+#define OPT_IN_MEMORY "in-memory"
+ OPT_IN_MEMORY_NUM,
#define OPT_SOCKET_MEM "socket-mem"
OPT_SOCKET_MEM_NUM,
+#define OPT_SOCKET_LIMIT "socket-limit"
+ OPT_SOCKET_LIMIT_NUM,
#define OPT_SYSLOG "syslog"
OPT_SYSLOG_NUM,
#define OPT_VDEV "vdev"
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index bdadc4d5..4f809a83 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -47,6 +47,18 @@ void eal_log_set_default(FILE *default_log);
int rte_eal_cpu_init(void);
/**
+ * Create memseg lists
+ *
+ * This function is private to EAL.
+ *
+ * Preallocate virtual memory.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_memseg_init(void);
+
+/**
* Map memory
*
* This function is private to EAL.
@@ -258,4 +270,38 @@ int rte_mp_channel_init(void);
*/
void dev_callback_process(char *device_name, enum rte_dev_event_type event);
+/**
+ * @internal
+ * Parse a device string and store its information in an
+ * rte_devargs structure.
+ *
+ * A device description is split by layers of abstraction of the device:
+ * bus, class and driver. Each layer will offer a set of properties that
+ * can be applied either to configure or recognize a device.
+ *
+ * This function will parse those properties and prepare the rte_devargs
+ * to be given to each layers for processing.
+ *
+ * Note: if the "data" field of the devargs points to devstr,
+ * then no dynamic allocation is performed and the rte_devargs
+ * can be safely discarded.
+ *
+ * Otherwise ``data`` will hold a workable copy of devstr, that will be
+ * used by layers descriptors within rte_devargs. In this case,
+ * any rte_devargs should be cleaned-up before being freed.
+ *
+ * @param da
+ * rte_devargs structure to fill.
+ *
+ * @param devstr
+ * Device string.
+ *
+ * @return
+ * 0 on success.
+ * Negative errno values on error (rte_errno is set).
+ */
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index 7d4935fc..d9facc64 100644
--- a/lib/librte_eal/common/include/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -198,12 +198,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) {
/**
* Bitmap initialization
*
- * @param mem_size
- * Minimum expected size of bitmap.
+ * @param n_bits
+ * Number of pre-allocated bits in array2.
* @param mem
* Base address of array1 and array2.
- * @param n_bits
- * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @param mem_size
+ * Minimum expected size of bitmap.
* @return
* Handle to bitmap instance.
*/
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4..b7b5b084 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -211,6 +211,7 @@ struct rte_bus {
rte_bus_parse_t parse; /**< Parse a device name */
struct rte_bus_conf conf; /**< Bus configuration */
rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
+ rte_dev_iterate_t dev_iterate; /**< Device iterator. */
};
/**
@@ -325,8 +326,7 @@ enum rte_iova_mode rte_bus_get_iommu_class(void);
* The constructor has higher priority than PMD constructors.
*/
#define RTE_REGISTER_BUS(nm, bus) \
-RTE_INIT_PRIO(businitfn_ ##nm, BUS); \
-static void businitfn_ ##nm(void) \
+RTE_INIT_PRIO(businitfn_ ##nm, BUS) \
{\
(bus).name = RTE_STR(nm);\
rte_bus_register(&bus); \
diff --git a/lib/librte_eal/common/include/rte_class.h b/lib/librte_eal/common/include/rte_class.h
new file mode 100644
index 00000000..276c91e9
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_class.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#ifndef _RTE_CLASS_H_
+#define _RTE_CLASS_H_
+
+/**
+ * @file
+ *
+ * DPDK device class interface.
+ *
+ * This file describes the interface of the device class
+ * abstraction layer.
+ *
+ * A device class defines the type of function a device
+ * will be used for e.g.: Ethernet adapter (eth),
+ * cryptographic coprocessor (crypto), etc.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+
+#include <rte_dev.h>
+
+/** Double linked list of classes */
+TAILQ_HEAD(rte_class_list, rte_class);
+
+/**
+ * A structure describing a generic device class.
+ */
+struct rte_class {
+ TAILQ_ENTRY(rte_class) next; /**< Next device class in linked list */
+ const char *name; /**< Name of the class */
+ rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+};
+
+/**
+ * Class comparison function.
+ *
+ * @param cls
+ * Class under test.
+ *
+ * @param data
+ * Data to compare against.
+ *
+ * @return
+ * 0 if the class matches the data.
+ * !0 if the class does not match.
+ * <0 if ordering is possible and the class is lower than the data.
+ * >0 if ordering is possible and the class is greater than the data.
+ */
+typedef int (*rte_class_cmp_t)(const struct rte_class *cls, const void *data);
+
+/**
+ * Class iterator to find a particular class.
+ *
+ * This function compares each registered class to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more classes. To continue a search the class of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param start
+ * Starting point for the iteration.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to pass to comparison function.
+ *
+ * @return
+ * A pointer to a rte_class structure or NULL in case no class matches
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data);
+
+/**
+ * Find the registered class for a given name.
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name);
+
+/**
+ * Register a Class handle.
+ *
+ * @param cls
+ * A pointer to a rte_class structure describing the class
+ * to be registered.
+ */
+__rte_experimental
+void rte_class_register(struct rte_class *cls);
+
+/**
+ * Unregister a Class handle.
+ *
+ * @param cls
+ * A pointer to a rte_class structure describing the class
+ * to be unregistered.
+ */
+__rte_experimental
+void rte_class_unregister(struct rte_class *cls);
+
+/**
+ * Helper for Class registration.
+ * The constructor has lower priority than Bus constructors.
+ * The constructor has higher priority than PMD constructors.
+ */
+#define RTE_REGISTER_CLASS(nm, cls) \
+RTE_INIT_PRIO(classinitfn_ ##nm, CLASS) \
+{\
+ (cls).name = RTE_STR(nm); \
+ rte_class_register(&cls); \
+}
+
+#define RTE_UNREGISTER_CLASS(nm, cls) \
+RTE_FINI_PRIO(classfinifn_ ##nm, CLASS) \
+{ \
+ rte_class_unregister(&cls); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CLASS_H_ */
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 434adfd4..069c13ec 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -83,6 +83,7 @@ typedef uint16_t unaligned_uint16_t;
#define RTE_PRIORITY_LOG 101
#define RTE_PRIORITY_BUS 110
+#define RTE_PRIORITY_CLASS 120
#define RTE_PRIORITY_LAST 65535
#define RTE_PRIO(prio) \
@@ -112,6 +113,29 @@ static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void)
RTE_INIT_PRIO(func, LAST)
/**
+ * Run after main() with low priority.
+ *
+ * @param func
+ * Destructor function name.
+ * @param prio
+ * Priority number must be above 100.
+ * Lowest number is the last to run.
+ */
+#define RTE_FINI_PRIO(func, prio) \
+static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
+
+/**
+ * Run after main() with high priority.
+ *
+ * The destructor will be run *before* prioritized destructors.
+ *
+ * @param func
+ * Destructor function name.
+ */
+#define RTE_FINI(func) \
+ RTE_FINI_PRIO(func, LAST)
+
+/**
* Force a function to be inlined
*/
#define __rte_always_inline inline __attribute__((always_inline))
@@ -294,6 +318,11 @@ rte_combine64ms1b(register uint64_t v)
/*********** Macros to work with powers of 2 ********/
/**
+ * Macro to return 1 if n is a power of 2, 0 otherwise
+ */
+#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n)))
+
+/**
* Returns true if n is a power of 2
* @param n
* Number to check
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 3879ff3c..b80a8059 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -69,9 +69,7 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
* Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the
* RTE_*_RET() macros defined below is compiled in debug mode.
*/
-#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \
- defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \
- defined(RTE_LIBRTE_EVENTDEV_DEBUG)
+#if defined(RTE_LIBRTE_EVENTDEV_DEBUG)
#define RTE_PMD_DEBUG_TRACE(...) \
rte_pmd_debug_trace(__func__, __VA_ARGS__)
#else
@@ -176,6 +174,7 @@ struct rte_device {
* @return
* 0 on success, negative on error.
*/
+__rte_deprecated
int rte_eal_dev_attach(const char *name, const char *devargs);
/**
@@ -186,6 +185,7 @@ int rte_eal_dev_attach(const char *name, const char *devargs);
* @return
* 0 on success, negative on error.
*/
+__rte_deprecated
int rte_eal_dev_detach(struct rte_device *dev);
/**
@@ -285,6 +285,103 @@ __attribute__((used)) = str
static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
__attribute__((used)) = str
+/**
+ * Iteration context.
+ *
+ * This context carries over the current iteration state.
+ */
+struct rte_dev_iterator {
+ const char *dev_str; /**< device string. */
+ const char *bus_str; /**< bus-related part of device string. */
+ const char *cls_str; /**< class-related part of device string. */
+ struct rte_bus *bus; /**< bus handle. */
+ struct rte_class *cls; /**< class handle. */
+ struct rte_device *device; /**< current position. */
+ void *class_device; /**< additional specialized context. */
+};
+
+/**
+ * Device iteration function.
+ *
+ * Find the next device matching properties passed in parameters.
+ * The function takes an additional ``start`` parameter, that is
+ * used as starting context when relevant.
+ *
+ * The function returns the current element in the iteration.
+ * This return value will potentially be used as a start parameter
+ * in subsequent calls to the function.
+ *
+ * The additional iterator parameter is only there if a specific
+ * implementation needs additional context. It must not be modified by
+ * the iteration function itself.
+ *
+ * @param start
+ * Starting iteration context.
+ *
+ * @param devstr
+ * Device description string.
+ *
+ * @param it
+ * Device iterator.
+ *
+ * @return
+ * The address of the current element matching the device description
+ * string.
+ */
+typedef void *(*rte_dev_iterate_t)(const void *start,
+ const char *devstr,
+ const struct rte_dev_iterator *it);
+
+/**
+ * Initializes a device iterator.
+ *
+ * This iterator allows accessing a list of devices matching a criteria.
+ * The device matching is made among all buses and classes currently registered,
+ * filtered by the device description given as parameter.
+ *
+ * This function will not allocate any memory. It is safe to stop the
+ * iteration at any moment and let the iterator go out of context.
+ *
+ * @param it
+ * Device iterator handle.
+ *
+ * @param str
+ * Device description string.
+ *
+ * @return
+ * 0 on successful initialization.
+ * <0 on error.
+ */
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it, const char *str);
+
+/**
+ * Iterates on a device iterator.
+ *
+ * Generates a new rte_device handle corresponding to the next element
+ * in the list described in comprehension by the iterator.
+ *
+ * The next object is returned, and the iterator is updated.
+ *
+ * @param it
+ * Device iterator handle.
+ *
+ * @return
+ * An rte_device handle if found.
+ * NULL if an error occurred (rte_errno is set).
+ * NULL if no device could be found (rte_errno is not set).
+ */
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it);
+
+#define RTE_DEV_FOREACH(dev, devstr, it) \
+ for (rte_dev_iterator_init(it, devstr), \
+ dev = rte_dev_iterator_next(it); \
+ dev != NULL; \
+ dev = rte_dev_iterator_next(it))
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 58fbd90a..097a4ce7 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -51,12 +51,19 @@ struct rte_devargs {
enum rte_devtype type;
/** Device policy. */
enum rte_dev_policy policy;
- /** Bus handle for the device. */
- struct rte_bus *bus;
/** Name of the device. */
char name[RTE_DEV_NAME_MAX_LEN];
+ RTE_STD_C11
+ union {
/** Arguments string as given by user or "" for no argument. */
- char *args;
+ char *args;
+ const char *drv_str;
+ };
+ struct rte_bus *bus; /**< bus handle. */
+ struct rte_class *cls; /**< class handle. */
+ const char *bus_str; /**< bus-related part of device string. */
+ const char *cls_str; /**< class-related part of device string. */
+ const char *data; /**< Device string storage. */
};
/**
@@ -96,6 +103,42 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
* in argument. Store which bus will handle the device, its name
* and the eventual device parameters.
*
+ * The syntax is:
+ *
+ * bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ * pci:0000:05.00.0,arg=val
+ * 05.00.0,arg=val
+ * vdev:net_ring0
+ *
+ * @param da
+ * The devargs structure holding the device information.
+ *
+ * @param dev
+ * String describing a device.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative errno on error.
+ */
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev);
+
+/**
+ * Parse a device string.
+ *
+ * Verify that a bus is capable of handling the device passed
+ * in argument. Store which bus will handle the device, its name
+ * and the eventual device parameters.
+ *
* The device string is built with a printf-like syntax.
*
* The syntax is:
@@ -124,8 +167,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
*/
__rte_experimental
int
-rte_devargs_parse(struct rte_devargs *da,
- const char *format, ...)
+rte_devargs_parsef(struct rte_devargs *da,
+ const char *format, ...)
__attribute__((format(printf, 2, 0)));
/**
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 8de5d69e..e114dcbd 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -490,27 +490,13 @@ static inline int rte_gettid(void)
enum rte_iova_mode rte_eal_iova_mode(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Get user provided pool ops name for mbuf
*
* @return
* returns user provided pool ops name.
*/
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void);
-
-/**
- * @deprecated
- * Get default pool ops name for mbuf
- *
- * @return
- * returns default pool ops name.
- */
-__rte_deprecated
const char *
-rte_eal_mbuf_default_mempool_ops(void);
+rte_eal_mbuf_user_pool_ops(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
index 3e61fffe..5d880551 100644
--- a/lib/librte_eal/common/include/rte_fbarray.h
+++ b/lib/librte_eal/common/include/rte_fbarray.h
@@ -336,6 +336,120 @@ rte_fbarray_find_contig_free(struct rte_fbarray *arr,
int __rte_experimental
rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start);
+/**
+ * Find index of previous free element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of previous used element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` free elements, down from specified
+ * index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of free elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` used elements, down from specified
+ * index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of used elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find how many more free entries there are before specified index (like
+ * ``rte_fbarray_find_contig_free`` but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr,
+ unsigned int start);
+
+
+/**
+ * Find how many more used entries there are before specified index (like
+ * ``rte_fbarray_find_contig_used`` but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start);
+
/**
* Dump ``rte_fbarray`` metadata.
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index aab9f6fe..c4b7f4cf 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -264,6 +264,60 @@ int __rte_experimental
rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
/**
+ * Walk list of all memsegs without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
+
+/**
* Dump the physical memory layout to a file.
*
* @note This function read-locks the memory hotplug subsystem, and thus cannot
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index ef370fa6..f478fa9e 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -81,8 +81,12 @@ struct rte_memzone {
* memzones from memory that is already available. It will not trigger any
* new allocations.
*
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- * supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
*
* @param name
* The name of the memzone. If it already exists, the function will
@@ -141,8 +145,12 @@ const struct rte_memzone *rte_memzone_reserve(const char *name,
* memzones from memory that is already available. It will not trigger any
* new allocations.
*
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- * supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
*
* @param name
* The name of the memzone. If it already exists, the function will
@@ -206,8 +214,12 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
* memzones from memory that is already available. It will not trigger any
* new allocations.
*
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- * supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
*
* @param name
* The name of the memzone. If it already exists, the function will
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index aea4d91b..34b41aff 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -162,6 +162,26 @@ int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
int32_t rte_service_runstate_get(uint32_t id);
/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * This function returns whether the service may be currently executing on
+ * at least one lcore, or definitely is not. This function can be used to
+ * determine if, after setting the service runstate to stopped, the service
+ * is still executing a service lcore.
+ *
+ * Care must be taken if calling this function when the service runstate is
+ * running, since the result of this function may be incorrect by the time the
+ * function returns due to service cores running in parallel.
+ *
+ * @retval 1 Service may be running on one or more lcores
+ * @retval 0 Service is not running on any lcore
+ * @retval -EINVAL Invalid service id
+ */
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id);
+
+/**
* Enable or disable the check for a service-core being mapped to the service.
* An application can disable the check when takes the responsibility to run a
* service itself using *rte_service_run_iter_on_app_lcore*.
@@ -363,6 +383,42 @@ int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id,
*/
int32_t rte_service_attr_reset_all(uint32_t id);
+/**
+ * Returns the number of times the service runner has looped.
+ */
+#define RTE_SERVICE_LCORE_ATTR_LOOPS 0
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get an attribute from a service core.
+ *
+ * @param lcore Id of the service core.
+ * @param attr_id Id of the attribute to be retrieved.
+ * @param [out] attr_value Pointer to storage in which to write retrieved value.
+ * @retval 0 Success, the attribute value has been written to *attr_value*.
+ * -EINVAL Invalid lcore, attr_id or attr_value was NULL.
+ * -ENOTSUP lcore is not a service core.
+ */
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset all attribute values of a service core.
+ *
+ * @param lcore The service core to reset all the statistics of
+ * @retval 0 Successfully reset attributes
+ * -EINVAL Invalid service id provided
+ * -ENOTSUP lcore is not a service core.
+ */
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
index 8dccaefc..9b01abb2 100644
--- a/lib/librte_eal/common/include/rte_tailq.h
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -119,8 +119,7 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
int rte_eal_tailq_register(struct rte_tailq_elem *t);
#define EAL_REGISTER_TAILQ(t) \
-RTE_INIT(tailqinitfn_ ##t); \
-static void tailqinitfn_ ##t(void) \
+RTE_INIT(tailqinitfn_ ##t) \
{ \
if (rte_eal_tailq_register(&t) < 0) \
rte_panic("Cannot initialize tailq: %s\n", t.name); \
diff --git a/lib/librte_eal/common/include/rte_uuid.h b/lib/librte_eal/common/include/rte_uuid.h
new file mode 100644
index 00000000..2c846b5f
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_uuid.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+/**
+ * @file
+ *
+ * UUID related functions originally from libuuid
+ */
+
+#ifndef _RTE_UUID_H_
+#define _RTE_UUID_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+
+/**
+ * Struct describing a Universal Unique Identifer
+ */
+typedef unsigned char rte_uuid_t[16];
+
+/**
+ * Helper for defining UUID values for id tables.
+ */
+#define RTE_UUID_INIT(a, b, c, d, e) { \
+ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, \
+ ((a) >> 8) & 0xff, (a) & 0xff, \
+ ((b) >> 8) & 0xff, (b) & 0xff, \
+ ((c) >> 8) & 0xff, (c) & 0xff, \
+ ((d) >> 8) & 0xff, (d) & 0xff, \
+ ((e) >> 40) & 0xff, ((e) >> 32) & 0xff, \
+ ((e) >> 24) & 0xff, ((e) >> 16) & 0xff, \
+ ((e) >> 8) & 0xff, (e) & 0xff \
+}
+
+/**
+ * Test if UUID is all zeros.
+ *
+ * @param uu
+ * The uuid to check.
+ * @return
+ * true if uuid is NULL value, false otherwise
+ */
+bool rte_uuid_is_null(const rte_uuid_t uu);
+
+/**
+ * Copy uuid.
+ *
+ * @param dst
+ * Destination uuid
+ * @param src
+ * Source uuid
+ */
+static inline void rte_uuid_copy(rte_uuid_t dst, const rte_uuid_t src)
+{
+ memcpy(dst, src, sizeof(rte_uuid_t));
+}
+
+/**
+ * Compare two UUID's
+ *
+ * @param a
+ * A UUID to compare
+ * @param b
+ * A UUID to compare
+ * @return
+ * returns an integer less than, equal to, or greater than zero if UUID a is
+ * is less than, equal, or greater than UUID b.
+ */
+int rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b);
+
+/**
+ * Extract UUID from string
+ *
+ * @param in
+ * Pointer to string of characters to convert
+ * @param uu
+ * Destination UUID
+ * @return
+ * Returns 0 on succes, and -1 if string is not a valid UUID.
+ */
+int rte_uuid_parse(const char *in, rte_uuid_t uu);
+
+/**
+ * Convert UUID to string
+ *
+ * @param uu
+ * UUID to format
+ * @param out
+ * Resulting string buffer
+ * @param len
+ * Sizeof the available string buffer
+ */
+#define RTE_UUID_STRLEN (36 + 1)
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_UUID_H */
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 6e2a2362..7c6714a2 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -32,7 +32,7 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 05
+#define RTE_VER_MONTH 8
/**
* Patch level number i.e. the z in yy.mm.z
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index f90972fa..5ca13fcc 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -179,7 +179,7 @@ rte_vfio_clear_group(int vfio_group_fd);
* 0 if success.
* -1 on error.
*/
-int __rte_experimental
+int
rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
@@ -200,7 +200,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
* -1 on error.
*/
-int __rte_experimental
+int
rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
/**
* Parse IOMMU group number for a device
@@ -222,7 +222,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
* 0 for non-existent group or VFIO
* <0 for errors
*/
-int __rte_experimental
+int
rte_vfio_get_group_num(const char *sysfs_base,
const char *dev_addr, int *iommu_group_num);
@@ -236,7 +236,7 @@ rte_vfio_get_group_num(const char *sysfs_base,
* > 0 container fd
* < 0 for errors
*/
-int __rte_experimental
+int
rte_vfio_get_container_fd(void);
/**
@@ -252,13 +252,10 @@ rte_vfio_get_container_fd(void);
* > 0 group fd
* < 0 for errors
*/
-int __rte_experimental
+int
rte_vfio_get_group_fd(int iommu_group_num);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Create a new container for device binding.
*
* @note Any newly allocated DPDK memory will not be mapped into these
@@ -269,13 +266,10 @@ rte_vfio_get_group_fd(int iommu_group_num);
* the container fd if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_create(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Destroy the container, unbind all vfio groups within it.
*
* @param container_fd
@@ -285,13 +279,10 @@ rte_vfio_container_create(void);
* 0 if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_destroy(int container_fd);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Bind a IOMMU group to a container.
*
* @param container_fd
@@ -304,13 +295,10 @@ rte_vfio_container_destroy(int container_fd);
* group fd if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Unbind a IOMMU group from a container.
*
* @param container_fd
@@ -323,13 +311,10 @@ rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
* 0 if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Perform DMA mapping for devices in a container.
*
* @param container_fd
@@ -348,14 +333,11 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
* 0 if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
uint64_t iova, uint64_t len);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Perform DMA unmapping for devices in a container.
*
* @param container_fd
@@ -374,7 +356,7 @@ rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
* 0 if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr,
uint64_t iova, uint64_t len);
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 9bfe9b9b..e0a8ed15 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -18,10 +18,89 @@
#include <rte_common.h>
#include <rte_spinlock.h>
+#include "eal_internal_cfg.h"
#include "eal_memalloc.h"
#include "malloc_elem.h"
#include "malloc_heap.h"
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
+{
+ void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
+ void *data_start, *data_end;
+ rte_iova_t expected_iova;
+ struct rte_memseg *ms;
+ size_t page_sz, cur, max;
+
+ page_sz = (size_t)elem->msl->page_sz;
+ data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+ data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
+ /* segment must start after header and with specified alignment */
+ contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+
+ /* if we're in IOVA as VA mode, or if we're in legacy mode with
+ * hugepages, all elements are IOVA-contiguous.
+ */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA ||
+ (internal_config.legacy_mem && rte_eal_has_hugepages()))
+ return RTE_PTR_DIFF(data_end, contig_seg_start);
+
+ cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
+ ms = rte_mem_virt2memseg(cur_page, elem->msl);
+
+ /* do first iteration outside the loop */
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
+ MALLOC_ELEM_TRAILER_LEN;
+ max = cur;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+
+ cur_page = RTE_PTR_ADD(cur_page, page_sz);
+
+ while (cur_page < data_end) {
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+
+ /* reset start of contiguous segment if unexpected iova */
+ if (ms->iova != expected_iova) {
+ /* next contiguous segment must start at specified
+ * alignment.
+ */
+ contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
+ /* new segment start may be on a different page, so find
+ * the page and skip to next iteration to make sure
+ * we're not blowing past data end.
+ */
+ ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
+ cur_page = ms->addr;
+ /* don't trigger another recalculation */
+ expected_iova = ms->iova;
+ continue;
+ }
+ /* cur_seg_end ends on a page boundary or on data end. if we're
+ * looking at data end, then malloc trailer is already included
+ * in the calculations. if we're looking at page end, then we
+ * know there's more data past this page and thus there's space
+ * for malloc element trailer, so don't count it here.
+ */
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
+ /* update max if cur value is bigger */
+ if (cur > max)
+ max = cur;
+
+ /* move to next page */
+ cur_page = page_end;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+ }
+
+ return max;
+}
+
/*
* Initialize a general malloc_elem header structure
*/
@@ -386,16 +465,18 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
if (elem->next != NULL && elem->next->state == ELEM_FREE &&
next_elem_is_adjacent(elem)) {
void *erase;
+ size_t erase_len;
/* we will want to erase the trailer and header */
erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
/* remove from free list, join to this one */
malloc_elem_free_list_remove(elem->next);
join_elem(elem, elem->next);
- /* erase header and trailer */
- memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+ /* erase header, trailer and pad */
+ memset(erase, 0, erase_len);
}
/*
@@ -406,9 +487,11 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
prev_elem_is_adjacent(elem)) {
struct malloc_elem *new_elem;
void *erase;
+ size_t erase_len;
/* we will want to erase trailer and header */
erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
/* remove from free list, join to this one */
malloc_elem_free_list_remove(elem->prev);
@@ -416,8 +499,8 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
new_elem = elem->prev;
join_elem(new_elem, elem);
- /* erase header and trailer */
- memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+ /* erase header, trailer and pad */
+ memset(erase, 0, erase_len);
elem = new_elem;
}
@@ -436,7 +519,7 @@ malloc_elem_free(struct malloc_elem *elem)
void *ptr;
size_t data_len;
- ptr = RTE_PTR_ADD(elem, sizeof(*elem));
+ ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
data_len = elem->size - MALLOC_ELEM_OVERHEAD;
elem = malloc_elem_join_adjacent_free(elem);
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index 7331af9c..e2bda4c0 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -179,4 +179,10 @@ malloc_elem_free_list_index(size_t size);
void
malloc_elem_free_list_insert(struct malloc_elem *elem);
+/*
+ * Find biggest IOVA-contiguous zone within an element with specified alignment.
+ */
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align);
+
#endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index d6cf3af8..12aaf2d7 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -149,6 +149,52 @@ find_suitable_element(struct malloc_heap *heap, size_t size,
}
/*
+ * Iterates through the freelist for a heap to find a free element with the
+ * biggest size and requested alignment. Will also set size to whatever element
+ * size that was found.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_biggest_element(struct malloc_heap *heap, size_t *size,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem, *max_elem = NULL;
+ size_t idx, max_size = 0;
+
+ for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list)) {
+ size_t cur_size;
+ if (!check_hugepage_sz(flags, elem->msl->page_sz))
+ continue;
+ if (contig) {
+ cur_size =
+ malloc_elem_find_max_iova_contig(elem,
+ align);
+ } else {
+ void *data_start = RTE_PTR_ADD(elem,
+ MALLOC_ELEM_HEADER_LEN);
+ void *data_end = RTE_PTR_ADD(elem, elem->size -
+ MALLOC_ELEM_TRAILER_LEN);
+ void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
+ align);
+ /* check if aligned data start is beyond end */
+ if (aligned >= data_end)
+ continue;
+ cur_size = RTE_PTR_DIFF(data_end, aligned);
+ }
+ if (cur_size > max_size) {
+ max_size = cur_size;
+ max_elem = elem;
+ }
+ }
+ }
+
+ *size = max_size;
+ return max_elem;
+}
+
+/*
* Main function to allocate a block of memory from the heap.
* It locks the free list, scans it, and adds a new memseg if the
* scan fails. Once the new memseg is added, it re-scans and should return
@@ -174,6 +220,26 @@ heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
return elem == NULL ? NULL : (void *)(&elem[1]);
}
+static void *
+heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem;
+ size_t size;
+
+ align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ elem = find_biggest_element(heap, &size, flags, align, contig);
+ if (elem != NULL) {
+ elem = malloc_elem_alloc(elem, size, align, 0, contig);
+
+ /* increase heap's count of allocated elements */
+ heap->alloc_count++;
+ }
+
+ return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
/* this function is exposed in malloc_mp.h */
void
rollback_expand_heap(struct rte_memseg **ms, int n_segs,
@@ -575,6 +641,66 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
return NULL;
}
+static void *
+heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
+ size_t align, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+ void *ret;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ align = align == 0 ? 1 : align;
+
+ ret = heap_alloc_biggest(heap, type, flags, align, contig);
+
+ rte_spinlock_unlock(&(heap->lock));
+
+ return ret;
+}
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
+ size_t align, bool contig)
+{
+ int socket, i, cur_socket;
+ void *ret;
+
+ /* return NULL if align is not power-of-2 */
+ if ((align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* Check socket parameter */
+ if (socket >= RTE_MAX_NUMA_NODES)
+ return NULL;
+
+ ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+ contig);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps */
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ cur_socket = rte_socket_id_by_idx(i);
+ if (cur_socket == socket)
+ continue;
+ ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
+ align, contig);
+ if (ret != NULL)
+ return ret;
+ }
+ return NULL;
+}
+
/* this function is exposed in malloc_mp.h */
int
malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index 03b80141..f52cb555 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -29,6 +29,10 @@ void *
malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
size_t align, size_t bound, bool contig);
+void *
+malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
+ size_t align, bool contig);
+
int
malloc_heap_free(struct malloc_elem *elem);
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 8a3dcfee..56005bea 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -8,6 +8,7 @@ common_objs = []
common_sources = files(
'eal_common_bus.c',
'eal_common_cpuflags.c',
+ 'eal_common_class.c',
'eal_common_devargs.c',
'eal_common_dev.c',
'eal_common_errno.c',
@@ -25,6 +26,7 @@ common_sources = files(
'eal_common_tailqs.c',
'eal_common_thread.c',
'eal_common_timer.c',
+ 'eal_common_uuid.c',
'malloc_elem.c',
'malloc_heap.c',
'malloc_mp.c',
@@ -46,6 +48,7 @@ common_headers = files(
'include/rte_branch_prediction.h',
'include/rte_bus.h',
'include/rte_bitmap.h',
+ 'include/rte_class.h',
'include/rte_common.h',
'include/rte_debug.h',
'include/rte_devargs.h',
@@ -75,6 +78,7 @@ common_headers = files(
'include/rte_string_fns.h',
'include/rte_tailq.h',
'include/rte_time.h',
+ 'include/rte_uuid.h',
'include/rte_version.h')
# special case install the generic headers, since they go in a subdir
diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 73507aac..8767c722 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -52,6 +52,7 @@ struct rte_service_spec_impl {
rte_atomic32_t num_mapped_cores;
uint64_t calls;
uint64_t cycles_spent;
+ uint8_t active_on_lcore[RTE_MAX_LCORE];
} __rte_cache_aligned;
/* the internal values of a service core */
@@ -61,7 +62,7 @@ struct core_state {
uint8_t runstate; /* running or stopped */
uint8_t is_service_core; /* set if core is currently a service core */
- /* extreme statistics */
+ uint64_t loops;
uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
} __rte_cache_aligned;
@@ -347,15 +348,19 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
static inline int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
+service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask)
{
if (!service_valid(i))
return -EINVAL;
struct rte_service_spec_impl *s = &rte_services[i];
if (s->comp_runstate != RUNSTATE_RUNNING ||
s->app_runstate != RUNSTATE_RUNNING ||
- !(service_mask & (UINT64_C(1) << i)))
+ !(service_mask & (UINT64_C(1) << i))) {
+ s->active_on_lcore[lcore] = 0;
return -ENOEXEC;
+ }
+
+ s->active_on_lcore[lcore] = 1;
/* check do we need cmpset, if MT safe or <= 1 core
* mapped, atomic ops are not required.
@@ -374,6 +379,25 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
return 0;
}
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id)
+{
+ uint32_t ids[RTE_MAX_LCORE] = {0};
+ struct rte_service_spec_impl *s = &rte_services[id];
+ int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+ int i;
+
+ if (!service_valid(id))
+ return -EINVAL;
+
+ for (i = 0; i < lcore_count; i++) {
+ if (s->active_on_lcore[ids[i]])
+ return 1;
+ }
+
+ return 0;
+}
+
int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
uint32_t serialize_mt_unsafe)
{
@@ -398,7 +422,7 @@ int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
return -EBUSY;
}
- int ret = service_run(id, cs, UINT64_MAX);
+ int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX);
if (serialize_mt_unsafe)
rte_atomic32_dec(&s->num_mapped_cores);
@@ -419,9 +443,11 @@ rte_service_runner_func(void *arg)
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
/* return value ignored as no change to code flow */
- service_run(i, cs, service_mask);
+ service_run(i, lcore, cs, service_mask);
}
+ cs->loops++;
+
rte_smp_rmb();
}
@@ -729,6 +755,28 @@ rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value)
}
}
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE || !attr_value)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ switch (attr_id) {
+ case RTE_SERVICE_LCORE_ATTR_LOOPS:
+ *attr_value = cs->loops;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static void
rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
uint64_t all_cycles, uint32_t reset)
@@ -764,6 +812,23 @@ rte_service_attr_reset_all(uint32_t id)
return 0;
}
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ cs->loops = 0;
+
+ return 0;
+}
+
static void
service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
{
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 3719ec9d..fd92c75c 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -10,7 +10,7 @@ ARCH_DIR ?= $(RTE_ARCH)
EXPORT_MAP := ../../rte_eal_version.map
VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
-LIBABIVER := 7
+LIBABIVER := 8
VPATH += $(RTE_SDK)/lib/librte_eal/common
@@ -24,6 +24,7 @@ LDLIBS += -ldl
LDLIBS += -lpthread
LDLIBS += -lgcc_s
LDLIBS += -lrt
+LDLIBS += -lrte_kvargs
ifeq ($(CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES),y)
LDLIBS += -lnuma
endif
@@ -60,12 +61,14 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hypervisor.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_class.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_bus.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_uuid.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8655b869..e59ac657 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -155,22 +155,12 @@ eal_get_runtime_dir(void)
}
/* Return user provided mbuf pool ops name */
-const char * __rte_experimental
+const char *
rte_eal_mbuf_user_pool_ops(void)
{
return internal_config.user_mbuf_pool_ops_name;
}
-/* Return mbuf pool ops name */
-const char *
-rte_eal_mbuf_default_mempool_ops(void)
-{
- if (internal_config.user_mbuf_pool_ops_name == NULL)
- return RTE_MBUF_DEFAULT_MEMPOOL_OPS;
-
- return internal_config.user_mbuf_pool_ops_name;
-}
-
/* Return a pointer to the configuration structure */
struct rte_config *
rte_eal_get_configuration(void)
@@ -344,12 +334,17 @@ eal_proc_type_detect(void)
enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
const char *pathname = eal_runtime_config_path();
- /* if we can open the file but not get a write-lock we are a secondary
- * process. NOTE: if we get a file handle back, we keep that open
- * and don't close it to prevent a race condition between multiple opens */
- if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
- (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
- ptype = RTE_PROC_SECONDARY;
+ /* if there no shared config, there can be no secondary processes */
+ if (!internal_config.no_shconf) {
+ /* if we can open the file but not get a write-lock we are a
+ * secondary process. NOTE: if we get a file handle back, we
+ * keep that open and don't close it to prevent a race condition
+ * between multiple opens.
+ */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+ }
RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
@@ -405,6 +400,7 @@ eal_usage(const char *prgname)
eal_common_usage();
printf("EAL Linux options:\n"
" --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n"
+ " --"OPT_SOCKET_LIMIT" Limit memory allocation on sockets (comma separated values)\n"
" --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n"
" --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n"
" --"OPT_BASE_VIRTADDR" Base virtual address\n"
@@ -434,46 +430,45 @@ rte_set_application_usage_hook( rte_usage_hook_t usage_func )
}
static int
-eal_parse_socket_mem(char *socket_mem)
+eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
{
char * arg[RTE_MAX_NUMA_NODES];
char *end;
int arg_num, i, len;
uint64_t total_mem = 0;
- len = strnlen(socket_mem, SOCKET_MEM_STRLEN);
+ len = strnlen(strval, SOCKET_MEM_STRLEN);
if (len == SOCKET_MEM_STRLEN) {
RTE_LOG(ERR, EAL, "--socket-mem is too long\n");
return -1;
}
/* all other error cases will be caught later */
- if (!isdigit(socket_mem[len-1]))
+ if (!isdigit(strval[len-1]))
return -1;
/* split the optarg into separate socket values */
- arg_num = rte_strsplit(socket_mem, len,
+ arg_num = rte_strsplit(strval, len,
arg, RTE_MAX_NUMA_NODES, ',');
/* if split failed, or 0 arguments */
if (arg_num <= 0)
return -1;
- internal_config.force_sockets = 1;
-
/* parse each defined socket option */
errno = 0;
for (i = 0; i < arg_num; i++) {
+ uint64_t val;
end = NULL;
- internal_config.socket_mem[i] = strtoull(arg[i], &end, 10);
+ val = strtoull(arg[i], &end, 10);
/* check for invalid input */
if ((errno != 0) ||
(arg[i][0] == '\0') || (end == NULL) || (*end != '\0'))
return -1;
- internal_config.socket_mem[i] *= 1024ULL;
- internal_config.socket_mem[i] *= 1024ULL;
- total_mem += internal_config.socket_mem[i];
+ val <<= 20;
+ total_mem += val;
+ socket_arg[i] = val;
}
/* check if we have a positive amount of total memory */
@@ -621,13 +616,27 @@ eal_parse_args(int argc, char **argv)
break;
case OPT_SOCKET_MEM_NUM:
- if (eal_parse_socket_mem(optarg) < 0) {
+ if (eal_parse_socket_arg(optarg,
+ internal_config.socket_mem) < 0) {
RTE_LOG(ERR, EAL, "invalid parameters for --"
OPT_SOCKET_MEM "\n");
eal_usage(prgname);
ret = -1;
goto out;
}
+ internal_config.force_sockets = 1;
+ break;
+
+ case OPT_SOCKET_LIMIT_NUM:
+ if (eal_parse_socket_arg(optarg,
+ internal_config.socket_limit) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_SOCKET_LIMIT "\n");
+ eal_usage(prgname);
+ ret = -1;
+ goto out;
+ }
+ internal_config.force_socket_limits = 1;
break;
case OPT_BASE_VIRTADDR_NUM:
@@ -678,6 +687,14 @@ eal_parse_args(int argc, char **argv)
}
}
+ /* create runtime data directory */
+ if (internal_config.no_shconf == 0 &&
+ eal_create_runtime_dir() < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+ ret = -1;
+ goto out;
+ }
+
if (eal_adjust_config(&internal_config) != 0) {
ret = -1;
goto out;
@@ -817,13 +834,6 @@ rte_eal_init(int argc, char **argv)
return -1;
}
- /* create runtime data directory */
- if (eal_create_runtime_dir() < 0) {
- rte_eal_init_alert("Cannot create runtime directory\n");
- rte_errno = EACCES;
- return -1;
- }
-
if (eal_plugins_init() < 0) {
rte_eal_init_alert("Cannot init plugins\n");
rte_errno = EINVAL;
@@ -839,6 +849,11 @@ rte_eal_init(int argc, char **argv)
rte_config_init();
+ if (rte_eal_intr_init() < 0) {
+ rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+ return -1;
+ }
+
/* Put mp channel init before bus scan so that we can init the vdev
* bus through mp channel in the secondary process before the bus scan.
*/
@@ -968,11 +983,6 @@ rte_eal_init(int argc, char **argv)
rte_config.master_lcore, (int)thread_id, cpuset,
ret == 0 ? "" : "...");
- if (rte_eal_intr_init() < 0) {
- rte_eal_init_alert("Cannot init interrupt-handling thread\n");
- return -1;
- }
-
RTE_LCORE_FOREACH_SLAVE(i) {
/*
@@ -1044,9 +1054,26 @@ rte_eal_init(int argc, char **argv)
return fctret;
}
+static int
+mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg __rte_unused)
+{
+ /* ms is const, so find this memseg */
+ struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl);
+
+ found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
+
+ return 0;
+}
+
int __rte_experimental
rte_eal_cleanup(void)
{
+ /* if we're in a primary process, we need to mark hugepages as freeable
+ * so that finalization can release them back to the system.
+ */
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+ rte_memseg_walk(mark_freeable, NULL);
rte_service_finalize();
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index c115e823..391d2a65 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -19,7 +19,6 @@
#include <rte_launch.h>
#include <rte_lcore.h>
#include <rte_errno.h>
-#include <rte_malloc.h>
#include <rte_spinlock.h>
#include <eal_private.h>
@@ -91,7 +90,7 @@ eal_alarm_callback(void *arg __rte_unused)
rte_spinlock_lock(&alarm_list_lk);
LIST_REMOVE(ap, next);
- rte_free(ap);
+ free(ap);
}
if (!LIST_EMPTY(&alarm_list)) {
@@ -122,7 +121,7 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
return -EINVAL;
- new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
+ new_alarm = calloc(1, sizeof(*new_alarm));
if (new_alarm == NULL)
return -ENOMEM;
@@ -196,7 +195,7 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
if (ap->executing == 0) {
LIST_REMOVE(ap, next);
- rte_free(ap);
+ free(ap);
count++;
} else {
/* If calling from other context, mark that alarm is executing
@@ -220,7 +219,7 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
if (ap->executing == 0) {
LIST_REMOVE(ap, next);
- rte_free(ap);
+ free(ap);
count++;
ap = ap_prev;
} else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 7eca711b..3a7d4b22 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -18,6 +18,8 @@
#include <sys/queue.h>
#include <sys/stat.h>
+#include <linux/mman.h> /* for hugetlb-related flags */
+
#include <rte_memory.h>
#include <rte_eal.h>
#include <rte_launch.h>
@@ -313,11 +315,49 @@ compare_hpi(const void *a, const void *b)
return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
}
+static void
+calc_num_pages(struct hugepage_info *hpi, struct dirent *dirent)
+{
+ uint64_t total_pages = 0;
+ unsigned int i;
+
+ /*
+ * first, try to put all hugepages into relevant sockets, but
+ * if first attempts fails, fall back to collecting all pages
+ * in one socket and sorting them later
+ */
+ total_pages = 0;
+ /* we also don't want to do this for legacy init */
+ if (!internal_config.legacy_mem)
+ for (i = 0; i < rte_socket_count(); i++) {
+ int socket = rte_socket_id_by_idx(i);
+ unsigned int num_pages =
+ get_num_hugepages_on_node(
+ dirent->d_name, socket);
+ hpi->num_pages[socket] = num_pages;
+ total_pages += num_pages;
+ }
+ /*
+ * we failed to sort memory from the get go, so fall
+ * back to old way
+ */
+ if (total_pages == 0) {
+ hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+
+#ifndef RTE_ARCH_64
+ /* for 32-bit systems, limit number of hugepages to
+ * 1GB per page size */
+ hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
+ RTE_PGSIZE_1G / hpi->hugepage_sz);
+#endif
+ }
+}
+
static int
hugepage_info_init(void)
{ const char dirent_start_text[] = "hugepages-";
const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
- unsigned int i, total_pages, num_sizes = 0;
+ unsigned int i, num_sizes = 0;
DIR *dir;
struct dirent *dirent;
@@ -355,6 +395,22 @@ hugepage_info_init(void)
"%" PRIu64 " reserved, but no mounted "
"hugetlbfs found for that size\n",
num_pages, hpi->hugepage_sz);
+ /* if we have kernel support for reserving hugepages
+ * through mmap, and we're in in-memory mode, treat this
+ * page size as valid. we cannot be in legacy mode at
+ * this point because we've checked this earlier in the
+ * init process.
+ */
+#ifdef MAP_HUGE_SHIFT
+ if (internal_config.in_memory) {
+ RTE_LOG(DEBUG, EAL, "In-memory mode enabled, "
+ "hugepages of size %" PRIu64 " bytes "
+ "will be allocated anonymously\n",
+ hpi->hugepage_sz);
+ calc_num_pages(hpi, dirent);
+ num_sizes++;
+ }
+#endif
continue;
}
@@ -371,35 +427,7 @@ hugepage_info_init(void)
if (clear_hugedir(hpi->hugedir) == -1)
break;
- /*
- * first, try to put all hugepages into relevant sockets, but
- * if first attempts fails, fall back to collecting all pages
- * in one socket and sorting them later
- */
- total_pages = 0;
- /* we also don't want to do this for legacy init */
- if (!internal_config.legacy_mem)
- for (i = 0; i < rte_socket_count(); i++) {
- int socket = rte_socket_id_by_idx(i);
- unsigned int num_pages =
- get_num_hugepages_on_node(
- dirent->d_name, socket);
- hpi->num_pages[socket] = num_pages;
- total_pages += num_pages;
- }
- /*
- * we failed to sort memory from the get go, so fall
- * back to old way
- */
- if (total_pages == 0)
- hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
-
-#ifndef RTE_ARCH_64
- /* for 32-bit systems, limit number of hugepages to
- * 1GB per page size */
- hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0],
- RTE_PGSIZE_1G / hpi->hugepage_sz);
-#endif
+ calc_num_pages(hpi, dirent);
num_sizes++;
}
@@ -423,8 +451,7 @@ hugepage_info_init(void)
for (j = 0; j < RTE_MAX_NUMA_NODES; j++)
num_pages += hpi->num_pages[j];
- if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0 &&
- num_pages > 0)
+ if (num_pages > 0)
return 0;
}
@@ -446,6 +473,10 @@ eal_hugepage_info_init(void)
if (hugepage_info_init() < 0)
return -1;
+ /* for no shared files mode, we're done */
+ if (internal_config.no_shconf)
+ return 0;
+
hpi = &internal_config.hugepage_info[0];
tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 056d41c1..4076c6d6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -30,7 +30,6 @@
#include <rte_branch_prediction.h>
#include <rte_debug.h>
#include <rte_log.h>
-#include <rte_malloc.h>
#include <rte_errno.h>
#include <rte_spinlock.h>
#include <rte_pause.h>
@@ -405,8 +404,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
}
/* allocate a new interrupt callback entity */
- callback = rte_zmalloc("interrupt callback list",
- sizeof(*callback), 0);
+ callback = calloc(1, sizeof(*callback));
if (callback == NULL) {
RTE_LOG(ERR, EAL, "Can not allocate memory\n");
return -ENOMEM;
@@ -420,7 +418,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
TAILQ_FOREACH(src, &intr_sources, next) {
if (src->intr_handle.fd == intr_handle->fd) {
/* we had no interrupts for this */
- if TAILQ_EMPTY(&src->callbacks)
+ if (TAILQ_EMPTY(&src->callbacks))
wake_thread = 1;
TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
@@ -431,10 +429,10 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
/* no existing callbacks for this - add new source */
if (src == NULL) {
- if ((src = rte_zmalloc("interrupt source list",
- sizeof(*src), 0)) == NULL) {
+ src = calloc(1, sizeof(*src));
+ if (src == NULL) {
RTE_LOG(ERR, EAL, "Can not allocate memory\n");
- rte_free(callback);
+ free(callback);
ret = -ENOMEM;
} else {
src->intr_handle = *intr_handle;
@@ -501,7 +499,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
cb->cb_arg == cb_arg)) {
TAILQ_REMOVE(&src->callbacks, cb, next);
- rte_free(cb);
+ free(cb);
ret++;
}
}
@@ -509,7 +507,7 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
/* all callbacks for that source are removed. */
if (TAILQ_EMPTY(&src->callbacks)) {
TAILQ_REMOVE(&intr_sources, src, next);
- rte_free(src);
+ free(src);
}
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c
index ff145884..9d02dddb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_log.c
+++ b/lib/librte_eal/linuxapp/eal/eal_log.c
@@ -25,25 +25,14 @@
static ssize_t
console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size)
{
- char copybuf[BUFSIZ + 1];
ssize_t ret;
- uint32_t loglevel;
/* write on stdout */
ret = fwrite(buf, 1, size, stdout);
fflush(stdout);
- /* truncate message if too big (should not happen) */
- if (size > BUFSIZ)
- size = BUFSIZ;
-
/* Syslog error levels are from 0 to 7, so subtract 1 to convert */
- loglevel = rte_log_cur_msg_loglevel() - 1;
- memcpy(copybuf, buf, size);
- copybuf[size] = '\0';
-
- /* write on syslog too */
- syslog(loglevel, "%s", copybuf);
+ syslog(rte_log_cur_msg_loglevel() - 1, "%.*s", (int)size, buf);
return ret;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 8c11f98c..aa95551a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -28,6 +28,7 @@
#include <numaif.h>
#endif
#include <linux/falloc.h>
+#include <linux/mman.h> /* for hugetlb-related mmap flags */
#include <rte_common.h>
#include <rte_log.h>
@@ -39,6 +40,16 @@
#include "eal_filesystem.h"
#include "eal_internal_cfg.h"
#include "eal_memalloc.h"
+#include "eal_private.h"
+
+const int anonymous_hugepages_supported =
+#ifdef MAP_HUGE_SHIFT
+ 1;
+#define RTE_MAP_HUGE_SHIFT MAP_HUGE_SHIFT
+#else
+ 0;
+#define RTE_MAP_HUGE_SHIFT 26
+#endif
/*
* not all kernel version support fallocate on hugetlbfs, so fall back to
@@ -171,32 +182,6 @@ get_file_size(int fd)
return st.st_size;
}
-/* we cannot use rte_memseg_list_walk() here because we will be holding a
- * write lock whenever we enter every function in this file, however copying
- * the same iteration code everywhere is not ideal as well. so, use a lockless
- * copy of memseg list walk here.
- */
-static int
-memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
-{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int i, ret = 0;
-
- for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
- struct rte_memseg_list *msl = &mcfg->memsegs[i];
-
- if (msl->base_va == NULL)
- continue;
-
- ret = func(msl, arg);
- if (ret < 0)
- return -1;
- if (ret > 0)
- return 1;
- }
- return 0;
-}
-
/* returns 1 on successful lock, 0 on unsuccessful lock, -1 on error */
static int lock(int fd, int type)
{
@@ -486,45 +471,84 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
int cur_socket_id = 0;
#endif
uint64_t map_offset;
+ rte_iova_t iova;
+ void *va;
char path[PATH_MAX];
int ret = 0;
int fd;
size_t alloc_sz;
-
- /* takes out a read lock on segment or segment list */
- fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
- if (fd < 0) {
- RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n");
- return -1;
- }
+ int flags;
+ void *new_addr;
alloc_sz = hi->hugepage_sz;
- if (internal_config.single_file_segments) {
- map_offset = seg_idx * alloc_sz;
- ret = resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
- alloc_sz, true);
- if (ret < 0)
- goto resized;
- } else {
+ if (!internal_config.single_file_segments &&
+ internal_config.in_memory &&
+ anonymous_hugepages_supported) {
+ int log2, flags;
+
+ log2 = rte_log2_u32(alloc_sz);
+ /* as per mmap() manpage, all page sizes are log2 of page size
+ * shifted by MAP_HUGE_SHIFT
+ */
+ flags = (log2 << RTE_MAP_HUGE_SHIFT) | MAP_HUGETLB | MAP_FIXED |
+ MAP_PRIVATE | MAP_ANONYMOUS;
+ fd = -1;
+ va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, flags, -1, 0);
+
+ /* single-file segments codepath will never be active because
+ * in-memory mode is incompatible with it and it's stopped at
+ * EAL initialization stage, however the compiler doesn't know
+ * that and complains about map_offset being used uninitialized
+ * on failure codepaths while having in-memory mode enabled. so,
+ * assign a value here.
+ */
map_offset = 0;
- if (ftruncate(fd, alloc_sz) < 0) {
- RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
- __func__, strerror(errno));
- goto resized;
+ } else {
+ /* takes out a read lock on segment or segment list */
+ fd = get_seg_fd(path, sizeof(path), hi, list_idx, seg_idx);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't get fd on hugepage file\n");
+ return -1;
}
- }
- /*
- * map the segment, and populate page tables, the kernel fills this
- * segment with zeros if it's a new page.
- */
- void *va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd, map_offset);
+ if (internal_config.single_file_segments) {
+ map_offset = seg_idx * alloc_sz;
+ ret = resize_hugefile(fd, path, list_idx, seg_idx,
+ map_offset, alloc_sz, true);
+ if (ret < 0)
+ goto resized;
+ } else {
+ map_offset = 0;
+ if (ftruncate(fd, alloc_sz) < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): ftruncate() failed: %s\n",
+ __func__, strerror(errno));
+ goto resized;
+ }
+ if (internal_config.hugepage_unlink) {
+ if (unlink(path)) {
+ RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n",
+ __func__, strerror(errno));
+ goto resized;
+ }
+ }
+ }
+
+ /*
+ * map the segment, and populate page tables, the kernel fills
+ * this segment with zeros if it's a new page.
+ */
+ va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd,
+ map_offset);
+ }
if (va == MAP_FAILED) {
RTE_LOG(DEBUG, EAL, "%s(): mmap() failed: %s\n", __func__,
strerror(errno));
- goto resized;
+ /* mmap failed, but the previous region might have been
+ * unmapped anyway. try to remap it
+ */
+ goto unmapped;
}
if (va != addr) {
RTE_LOG(DEBUG, EAL, "%s(): wrong mmap() address\n", __func__);
@@ -532,24 +556,6 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
goto resized;
}
- rte_iova_t iova = rte_mem_virt2iova(addr);
- if (iova == RTE_BAD_PHYS_ADDR) {
- RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n",
- __func__);
- goto mapped;
- }
-
-#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
- move_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0);
-
- if (cur_socket_id != socket_id) {
- RTE_LOG(DEBUG, EAL,
- "%s(): allocation happened on wrong socket (wanted %d, got %d)\n",
- __func__, socket_id, cur_socket_id);
- goto mapped;
- }
-#endif
-
/* In linux, hugetlb limitations, like cgroup, are
* enforced at fault time instead of mmap(), even
* with the option of MAP_POPULATE. Kernel will send
@@ -562,9 +568,6 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
(unsigned int)(alloc_sz >> 20));
goto mapped;
}
- /* for non-single file segments, we can close fd here */
- if (!internal_config.single_file_segments)
- close(fd);
/* we need to trigger a write to the page to enforce page fault and
* ensure that page is accessible to us, but we can't overwrite value
@@ -573,6 +576,28 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
*/
*(volatile int *)addr = *(volatile int *)addr;
+ iova = rte_mem_virt2iova(addr);
+ if (iova == RTE_BAD_PHYS_ADDR) {
+ RTE_LOG(DEBUG, EAL, "%s(): can't get IOVA addr\n",
+ __func__);
+ goto mapped;
+ }
+
+#ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ move_pages(getpid(), 1, &addr, NULL, &cur_socket_id, 0);
+
+ if (cur_socket_id != socket_id) {
+ RTE_LOG(DEBUG, EAL,
+ "%s(): allocation happened on wrong socket (wanted %d, got %d)\n",
+ __func__, socket_id, cur_socket_id);
+ goto mapped;
+ }
+#endif
+ /* for non-single file segments that aren't in-memory, we can close fd
+ * here */
+ if (!internal_config.single_file_segments && !internal_config.in_memory)
+ close(fd);
+
ms->addr = addr;
ms->hugepage_sz = alloc_sz;
ms->len = alloc_sz;
@@ -585,14 +610,32 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
mapped:
munmap(addr, alloc_sz);
+unmapped:
+ flags = MAP_FIXED;
+#ifdef RTE_ARCH_PPC_64
+ flags |= MAP_HUGETLB;
+#endif
+ new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags);
+ if (new_addr != addr) {
+ if (new_addr != NULL)
+ munmap(new_addr, alloc_sz);
+ /* we're leaving a hole in our virtual address space. if
+ * somebody else maps this hole now, we could accidentally
+ * override it in the future.
+ */
+ RTE_LOG(CRIT, EAL, "Can't mmap holes in our virtual address space\n");
+ }
resized:
+ /* in-memory mode will never be single-file-segments mode */
if (internal_config.single_file_segments) {
resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
alloc_sz, false);
/* ignore failure, can't make it any worse */
} else {
/* only remove file if we can take out a write lock */
- if (lock(fd, LOCK_EX) == 1)
+ if (internal_config.hugepage_unlink == 0 &&
+ internal_config.in_memory == 0 &&
+ lock(fd, LOCK_EX) == 1)
unlink(path);
close(fd);
}
@@ -617,6 +660,12 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
return -1;
}
+ /* if we've already unlinked the page, nothing needs to be done */
+ if (internal_config.hugepage_unlink) {
+ memset(ms, 0, sizeof(*ms));
+ return 0;
+ }
+
/* if we are not in single file segments mode, we're going to unmap the
* segment and thus drop the lock on original fd, but hugepage dir is
* now locked so we can take out another one without races.
@@ -695,7 +744,7 @@ alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
* during init, we already hold a write lock, so don't try to take out
* another one.
*/
- if (wa->hi->lock_descriptor == -1) {
+ if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
dir_fd = open(wa->hi->hugedir, O_RDONLY);
if (dir_fd < 0) {
RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
@@ -799,7 +848,7 @@ free_seg_walk(const struct rte_memseg_list *msl, void *arg)
* during init, we already hold a write lock, so don't try to take out
* another one.
*/
- if (wa->hi->lock_descriptor == -1) {
+ if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
dir_fd = open(wa->hi->hugedir, O_RDONLY);
if (dir_fd < 0) {
RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
@@ -878,7 +927,8 @@ eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
wa.socket = socket;
wa.segs_allocated = 0;
- ret = memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
+ /* memalloc is locked, so it's safe to use thread-unsafe version */
+ ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
if (ret == 0) {
RTE_LOG(ERR, EAL, "%s(): couldn't find suitable memseg_list\n",
__func__);
@@ -943,7 +993,10 @@ eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
wa.ms = cur;
wa.hi = hi;
- walk_res = memseg_list_walk_thread_unsafe(free_seg_walk, &wa);
+ /* memalloc is locked, so it's safe to use thread-unsafe version
+ */
+ walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
+ &wa);
if (walk_res == 1)
continue;
if (walk_res == 0)
@@ -1230,7 +1283,8 @@ eal_memalloc_sync_with_primary(void)
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
return 0;
- if (memseg_list_walk_thread_unsafe(sync_walk, NULL))
+ /* memalloc is locked, so it's safe to call thread-unsafe version */
+ if (rte_memseg_list_walk_thread_unsafe(sync_walk, NULL))
return -1;
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index c917de1c..dbf19499 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -66,7 +66,7 @@ static bool phys_addrs_available = true;
static void
test_phys_addrs_available(void)
{
- uint64_t tmp;
+ uint64_t tmp = 0;
phys_addr_t physaddr;
if (!rte_eal_has_hugepages()) {
@@ -521,7 +521,18 @@ static void *
create_shared_memory(const char *filename, const size_t mem_size)
{
void *retval;
- int fd = open(filename, O_CREAT | O_RDWR, 0666);
+ int fd;
+
+ /* if no shared files mode is used, create anonymous memory instead */
+ if (internal_config.no_shconf) {
+ retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (retval == MAP_FAILED)
+ return NULL;
+ return retval;
+ }
+
+ fd = open(filename, O_CREAT | O_RDWR, 0666);
if (fd < 0)
return NULL;
if (ftruncate(fd, mem_size) < 0) {
@@ -767,6 +778,34 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
return 0;
}
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+ uint64_t area_sz, max_pages;
+
+ /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+ max_pages = RTE_MAX_MEMSEG_PER_LIST;
+ max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+ area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+ /* make sure the list isn't smaller than the page size */
+ area_sz = RTE_MAX(area_sz, page_sz);
+
+ return RTE_ALIGN(area_sz, page_sz);
+}
+
+static int
+free_memseg_list(struct rte_memseg_list *msl)
+{
+ if (rte_fbarray_destroy(&msl->memseg_arr)) {
+ RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
+ return -1;
+ }
+ memset(msl, 0, sizeof(*msl));
+ return 0;
+}
+
#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
static int
alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
@@ -1049,8 +1088,7 @@ get_socket_mem_size(int socket)
for (i = 0; i < internal_config.num_hugepage_sizes; i++){
struct hugepage_info *hpi = &internal_config.hugepage_info[i];
- if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0)
- size += hpi->hugepage_sz * hpi->num_pages[socket];
+ size += hpi->hugepage_sz * hpi->num_pages[socket];
}
return size;
@@ -1605,6 +1643,15 @@ hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
}
static int
+limits_callback(int socket_id, size_t cur_limit, size_t new_len)
+{
+ RTE_SET_USED(socket_id);
+ RTE_SET_USED(cur_limit);
+ RTE_SET_USED(new_len);
+ return -1;
+}
+
+static int
eal_hugepage_init(void)
{
struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
@@ -1687,6 +1734,18 @@ eal_hugepage_init(void)
free(pages);
}
}
+ /* if socket limits were specified, set them */
+ if (internal_config.force_socket_limits) {
+ unsigned int i;
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
+ uint64_t limit = internal_config.socket_limit[i];
+ if (limit == 0)
+ continue;
+ if (rte_mem_alloc_validator_register("socket-limit",
+ limits_callback, i, limit))
+ RTE_LOG(ERR, EAL, "Failed to register socket limits validator callback\n");
+ }
+ }
return 0;
}
@@ -1840,3 +1899,316 @@ rte_eal_using_phys_addrs(void)
{
return phys_addrs_available;
}
+
+static int __rte_unused
+memseg_primary_init_32(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int active_sockets, hpi_idx, msl_idx = 0;
+ unsigned int socket_id, i;
+ struct rte_memseg_list *msl;
+ uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
+ uint64_t max_mem;
+
+ /* no-huge does not need this at all */
+ if (internal_config.no_hugetlbfs)
+ return 0;
+
+ /* this is a giant hack, but desperate times call for desperate
+ * measures. in legacy 32-bit mode, we cannot preallocate VA space,
+ * because having upwards of 2 gigabytes of VA space already mapped will
+ * interfere with our ability to map and sort hugepages.
+ *
+ * therefore, in legacy 32-bit mode, we will be initializing memseg
+ * lists much later - in eal_memory.c, right after we unmap all the
+ * unneeded pages. this will not affect secondary processes, as those
+ * should be able to mmap the space without (too many) problems.
+ */
+ if (internal_config.legacy_mem)
+ return 0;
+
+ /* 32-bit mode is a very special case. we cannot know in advance where
+ * the user will want to allocate their memory, so we have to do some
+ * heuristics.
+ */
+ active_sockets = 0;
+ total_requested_mem = 0;
+ if (internal_config.force_sockets)
+ for (i = 0; i < rte_socket_count(); i++) {
+ uint64_t mem;
+
+ socket_id = rte_socket_id_by_idx(i);
+ mem = internal_config.socket_mem[socket_id];
+
+ if (mem == 0)
+ continue;
+
+ active_sockets++;
+ total_requested_mem += mem;
+ }
+ else
+ total_requested_mem = internal_config.memory;
+
+ max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+ if (total_requested_mem > max_mem) {
+ RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
+ (unsigned int)(max_mem >> 20));
+ return -1;
+ }
+ total_extra_mem = max_mem - total_requested_mem;
+ extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
+ total_extra_mem / active_sockets;
+
+ /* the allocation logic is a little bit convoluted, but here's how it
+ * works, in a nutshell:
+ * - if user hasn't specified on which sockets to allocate memory via
+ * --socket-mem, we allocate all of our memory on master core socket.
+ * - if user has specified sockets to allocate memory on, there may be
+ * some "unused" memory left (e.g. if user has specified --socket-mem
+ * such that not all memory adds up to 2 gigabytes), so add it to all
+ * sockets that are in use equally.
+ *
+ * page sizes are sorted by size in descending order, so we can safely
+ * assume that we dispense with bigger page sizes first.
+ */
+
+ /* create memseg lists */
+ for (i = 0; i < rte_socket_count(); i++) {
+ int hp_sizes = (int) internal_config.num_hugepage_sizes;
+ uint64_t max_socket_mem, cur_socket_mem;
+ unsigned int master_lcore_socket;
+ struct rte_config *cfg = rte_eal_get_configuration();
+ bool skip;
+
+ socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (socket_id > 0)
+ break;
+#endif
+
+ /* if we didn't specifically request memory on this socket */
+ skip = active_sockets != 0 &&
+ internal_config.socket_mem[socket_id] == 0;
+ /* ...or if we didn't specifically request memory on *any*
+ * socket, and this is not master lcore
+ */
+ master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore);
+ skip |= active_sockets == 0 && socket_id != master_lcore_socket;
+
+ if (skip) {
+ RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
+ socket_id);
+ continue;
+ }
+
+ /* max amount of memory on this socket */
+ max_socket_mem = (active_sockets != 0 ?
+ internal_config.socket_mem[socket_id] :
+ internal_config.memory) +
+ extra_mem_per_socket;
+ cur_socket_mem = 0;
+
+ for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
+ uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
+ uint64_t hugepage_sz;
+ struct hugepage_info *hpi;
+ int type_msl_idx, max_segs, total_segs = 0;
+
+ hpi = &internal_config.hugepage_info[hpi_idx];
+ hugepage_sz = hpi->hugepage_sz;
+
+ /* check if pages are actually available */
+ if (hpi->num_pages[socket_id] == 0)
+ continue;
+
+ max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+ max_pagesz_mem = max_socket_mem - cur_socket_mem;
+
+ /* make it multiple of page size */
+ max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
+ hugepage_sz);
+
+ RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
+ "%" PRIu64 "M on socket %i\n",
+ max_pagesz_mem >> 20, socket_id);
+
+ type_msl_idx = 0;
+ while (cur_pagesz_mem < max_pagesz_mem &&
+ total_segs < max_segs) {
+ uint64_t cur_mem;
+ unsigned int n_segs;
+
+ if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL,
+ "No more space in memseg lists, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ return -1;
+ }
+
+ msl = &mcfg->memsegs[msl_idx];
+
+ cur_mem = get_mem_amount(hugepage_sz,
+ max_pagesz_mem);
+ n_segs = cur_mem / hugepage_sz;
+
+ if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+ socket_id, type_msl_idx)) {
+ /* failing to allocate a memseg list is
+ * a serious error.
+ */
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+ return -1;
+ }
+
+ if (alloc_va_space(msl)) {
+ /* if we couldn't allocate VA space, we
+ * can try with smaller page sizes.
+ */
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
+ /* deallocate memseg list */
+ if (free_memseg_list(msl))
+ return -1;
+ break;
+ }
+
+ total_segs += msl->memseg_arr.len;
+ cur_pagesz_mem = total_segs * hugepage_sz;
+ type_msl_idx++;
+ msl_idx++;
+ }
+ cur_socket_mem += cur_pagesz_mem;
+ }
+ if (cur_socket_mem == 0) {
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
+ socket_id);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int __rte_unused
+memseg_primary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, socket_id, hpi_idx, msl_idx = 0;
+ struct rte_memseg_list *msl;
+ uint64_t max_mem, total_mem;
+
+ /* no-huge does not need this at all */
+ if (internal_config.no_hugetlbfs)
+ return 0;
+
+ max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+ total_mem = 0;
+
+ /* create memseg lists */
+ for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+ hpi_idx++) {
+ struct hugepage_info *hpi;
+ uint64_t hugepage_sz;
+
+ hpi = &internal_config.hugepage_info[hpi_idx];
+ hugepage_sz = hpi->hugepage_sz;
+
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ uint64_t max_type_mem, total_type_mem = 0;
+ int type_msl_idx, max_segs, total_segs = 0;
+
+ socket_id = rte_socket_id_by_idx(i);
+
+#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
+ if (socket_id > 0)
+ break;
+#endif
+
+ if (total_mem >= max_mem)
+ break;
+
+ max_type_mem = RTE_MIN(max_mem - total_mem,
+ (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
+ max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+
+ type_msl_idx = 0;
+ while (total_type_mem < max_type_mem &&
+ total_segs < max_segs) {
+ uint64_t cur_max_mem, cur_mem;
+ unsigned int n_segs;
+
+ if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL,
+ "No more space in memseg lists, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ return -1;
+ }
+
+ msl = &mcfg->memsegs[msl_idx++];
+
+ cur_max_mem = max_type_mem - total_type_mem;
+
+ cur_mem = get_mem_amount(hugepage_sz,
+ cur_max_mem);
+ n_segs = cur_mem / hugepage_sz;
+
+ if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+ socket_id, type_msl_idx))
+ return -1;
+
+ total_segs += msl->memseg_arr.len;
+ total_type_mem = total_segs * hugepage_sz;
+ type_msl_idx++;
+
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+ return -1;
+ }
+ }
+ total_mem += total_type_mem;
+ }
+ }
+ return 0;
+}
+
+static int
+memseg_secondary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx = 0;
+ struct rte_memseg_list *msl;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+ msl = &mcfg->memsegs[msl_idx];
+
+ /* skip empty memseg lists */
+ if (msl->memseg_arr.len == 0)
+ continue;
+
+ if (rte_fbarray_attach(&msl->memseg_arr)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+ return -1;
+ }
+
+ /* preallocate VA space */
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+ return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+#ifndef RTE_ARCH_64
+ memseg_primary_init_32() :
+#else
+ memseg_primary_init() :
+#endif
+ memseg_secondary_init();
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index f652ff98..b496fc71 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -176,7 +176,7 @@ int rte_sys_gettid(void)
int rte_thread_setname(pthread_t id, const char *name)
{
- int ret = -1;
+ int ret = ENOSYS;
#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
#if __GLIBC_PREREQ(2, 12)
ret = pthread_setname_np(id, name);
@@ -184,5 +184,5 @@ int rte_thread_setname(pthread_t id, const char *name)
#endif
RTE_SET_USED(id);
RTE_SET_USED(name);
- return ret;
+ return -ret;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index a2bbdfbf..c68dc38e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -87,42 +87,6 @@ static const struct vfio_iommu_type iommu_types[] = {
},
};
-/* for sPAPR IOMMU, we will need to walk memseg list, but we cannot use
- * rte_memseg_walk() because by the time we enter callback we will be holding a
- * write lock, so regular rte-memseg_walk will deadlock. copying the same
- * iteration code everywhere is not ideal as well. so, use a lockless copy of
- * memseg walk here.
- */
-static int
-memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
-{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int i, ms_idx, ret = 0;
-
- for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
- struct rte_memseg_list *msl = &mcfg->memsegs[i];
- const struct rte_memseg *ms;
- struct rte_fbarray *arr;
-
- if (msl->memseg_arr.count == 0)
- continue;
-
- arr = &msl->memseg_arr;
-
- ms_idx = rte_fbarray_find_next_used(arr, 0);
- while (ms_idx >= 0) {
- ms = rte_fbarray_get(arr, ms_idx);
- ret = func(msl, ms, arg);
- if (ret < 0)
- return -1;
- if (ret > 0)
- return 1;
- ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
- }
- }
- return 0;
-}
-
static int
is_null_map(const struct user_mem_map *map)
{
@@ -575,10 +539,6 @@ int
rte_vfio_clear_group(int vfio_group_fd)
{
int i;
- struct rte_mp_msg mp_req, *mp_rep;
- struct rte_mp_reply mp_reply;
- struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
- struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
struct vfio_config *vfio_cfg;
vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);
@@ -587,40 +547,15 @@ rte_vfio_clear_group(int vfio_group_fd)
return -1;
}
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
-
- i = get_vfio_group_idx(vfio_group_fd);
- if (i < 0)
- return -1;
- vfio_cfg->vfio_groups[i].group_num = -1;
- vfio_cfg->vfio_groups[i].fd = -1;
- vfio_cfg->vfio_groups[i].devices = 0;
- vfio_cfg->vfio_active_groups--;
- return 0;
- }
-
- p->req = SOCKET_CLR_GROUP;
- p->group_num = vfio_group_fd;
- strcpy(mp_req.name, EAL_VFIO_MP);
- mp_req.len_param = sizeof(*p);
- mp_req.num_fds = 0;
-
- if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
- mp_reply.nb_received == 1) {
- mp_rep = &mp_reply.msgs[0];
- p = (struct vfio_mp_param *)mp_rep->param;
- if (p->result == SOCKET_OK) {
- free(mp_reply.msgs);
- return 0;
- } else if (p->result == SOCKET_NO_FD)
- RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n");
- else
- RTE_LOG(ERR, EAL, " no such VFIO group fd!\n");
-
- free(mp_reply.msgs);
- }
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0)
+ return -1;
+ vfio_cfg->vfio_groups[i].group_num = -1;
+ vfio_cfg->vfio_groups[i].fd = -1;
+ vfio_cfg->vfio_groups[i].devices = 0;
+ vfio_cfg->vfio_active_groups--;
- return -1;
+ return 0;
}
int
@@ -1357,7 +1292,8 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
/* check if window size needs to be adjusted */
memset(&param, 0, sizeof(param));
- if (memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
+ /* we're inside a callback so use thread-unsafe version */
+ if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk,
&param) < 0) {
RTE_LOG(ERR, EAL, "Could not get window size\n");
ret = -1;
@@ -1386,7 +1322,9 @@ vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
ret = -1;
goto out;
}
- if (memseg_walk_thread_unsafe(vfio_spapr_map_walk,
+ /* we're inside a callback, so use thread-unsafe version
+ */
+ if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk,
&vfio_container_fd) < 0) {
RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n");
ret = -1;
@@ -1624,7 +1562,7 @@ out:
return ret;
}
-int __rte_experimental
+int
rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
{
if (len == 0) {
@@ -1635,7 +1573,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
return container_dma_map(default_vfio_cfg, vaddr, iova, len);
}
-int __rte_experimental
+int
rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
{
if (len == 0) {
@@ -1678,7 +1616,7 @@ rte_vfio_noiommu_is_enabled(void)
return c == 'Y';
}
-int __rte_experimental
+int
rte_vfio_container_create(void)
{
int i;
@@ -1728,7 +1666,7 @@ rte_vfio_container_destroy(int container_fd)
return 0;
}
-int __rte_experimental
+int
rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
{
struct vfio_config *vfio_cfg;
@@ -1774,11 +1712,11 @@ rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
return vfio_group_fd;
}
-int __rte_experimental
+int
rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
{
struct vfio_config *vfio_cfg;
- struct vfio_group *cur_grp;
+ struct vfio_group *cur_grp = NULL;
int i;
vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
@@ -1795,7 +1733,7 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
}
/* This should not happen */
- if (i == VFIO_MAX_GROUPS) {
+ if (i == VFIO_MAX_GROUPS || cur_grp == NULL) {
RTE_LOG(ERR, EAL, "Specified group number not found\n");
return -1;
}
@@ -1813,7 +1751,7 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num)
return 0;
}
-int __rte_experimental
+int
rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova,
uint64_t len)
{
@@ -1833,7 +1771,7 @@ rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova,
return container_dma_map(vfio_cfg, vaddr, iova, len);
}
-int __rte_experimental
+int
rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
uint64_t len)
{
@@ -1855,14 +1793,14 @@ rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova,
#else
-int __rte_experimental
+int
rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
__rte_unused uint64_t len)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
__rte_unused uint64_t len)
{
@@ -1909,7 +1847,7 @@ rte_vfio_clear_group(__rte_unused int vfio_group_fd)
return -1;
}
-int __rte_experimental
+int
rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
__rte_unused const char *dev_addr,
__rte_unused int *iommu_group_num)
@@ -1917,45 +1855,45 @@ rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
return -1;
}
-int __rte_experimental
+int
rte_vfio_get_container_fd(void)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_create(void)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_destroy(__rte_unused int container_fd)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_group_bind(__rte_unused int container_fd,
__rte_unused int iommu_group_num)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_group_unbind(__rte_unused int container_fd,
__rte_unused int iommu_group_num)
{
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_dma_map(__rte_unused int container_fd,
__rte_unused uint64_t vaddr,
__rte_unused uint64_t iova,
@@ -1964,7 +1902,7 @@ rte_vfio_container_dma_map(__rte_unused int container_fd,
return -1;
}
-int __rte_experimental
+int
rte_vfio_container_dma_unmap(__rte_unused int container_fd,
__rte_unused uint64_t vaddr,
__rte_unused uint64_t iova,
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index e65b1037..68d4750a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -129,7 +129,6 @@ int vfio_mp_sync_setup(void);
#define SOCKET_REQ_CONTAINER 0x100
#define SOCKET_REQ_GROUP 0x200
-#define SOCKET_CLR_GROUP 0x300
#define SOCKET_OK 0x0
#define SOCKET_NO_FD 0x1
#define SOCKET_ERR 0xFF
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 9c202bb0..680a24aa 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -55,14 +55,6 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
reply.fds[0] = fd;
}
break;
- case SOCKET_CLR_GROUP:
- r->req = SOCKET_CLR_GROUP;
- r->group_num = m->group_num;
- if (rte_vfio_clear_group(m->group_num) < 0)
- r->result = SOCKET_NO_FD;
- else
- r->result = SOCKET_OK;
- break;
case SOCKET_REQ_CONTAINER:
r->req = SOCKET_REQ_CONTAINER;
fd = rte_vfio_get_container_fd();
diff --git a/lib/librte_eal/linuxapp/eal/meson.build b/lib/librte_eal/linuxapp/eal/meson.build
index cce37712..6e31c2aa 100644
--- a/lib/librte_eal/linuxapp/eal/meson.build
+++ b/lib/librte_eal/linuxapp/eal/meson.build
@@ -23,6 +23,7 @@ env_sources = files('eal_alarm.c',
'eal_dev.c',
)
+deps += ['kvargs']
if has_libnuma == 1
dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true)
endif
diff --git a/lib/librte_eal/meson.build b/lib/librte_eal/meson.build
index 4aa63e3d..e1fde15d 100644
--- a/lib/librte_eal/meson.build
+++ b/lib/librte_eal/meson.build
@@ -18,12 +18,13 @@ elif host_machine.system() == 'freebsd'
subdir('bsdapp/eal')
else
- error('unsupported system type @0@'.format(hostmachine.system()))
+ error('unsupported system type "@0@"'.format(host_machine.system()))
endif
-version = 7 # the version of the EAL API
+version = 8 # the version of the EAL API
allow_experimental_apis = true
deps += 'compat'
+deps += 'kvargs'
cflags += '-D_GNU_SOURCE'
sources = common_sources + env_sources
objs = common_objs + env_objs
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index f7dd0e7b..344a43d3 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -181,7 +181,6 @@ DPDK_17.11 {
rte_bus_get_iommu_class;
rte_eal_has_pci;
rte_eal_iova_mode;
- rte_eal_mbuf_default_mempool_ops;
rte_eal_using_phys_addrs;
rte_eal_vfio_intr_mode;
rte_lcore_has_role;
@@ -241,25 +240,53 @@ DPDK_18.05 {
} DPDK_18.02;
+DPDK_18.08 {
+ global:
+
+ rte_eal_mbuf_user_pool_ops;
+ rte_uuid_compare;
+ rte_uuid_is_null;
+ rte_uuid_parse;
+ rte_uuid_unparse;
+ rte_vfio_container_create;
+ rte_vfio_container_destroy;
+ rte_vfio_container_dma_map;
+ rte_vfio_container_dma_unmap;
+ rte_vfio_container_group_bind;
+ rte_vfio_container_group_unbind;
+ rte_vfio_dma_map;
+ rte_vfio_dma_unmap;
+ rte_vfio_get_container_fd;
+ rte_vfio_get_group_fd;
+ rte_vfio_get_group_num;
+
+} DPDK_18.05;
+
EXPERIMENTAL {
global:
+ rte_class_find;
+ rte_class_find_by_name;
+ rte_class_register;
+ rte_class_unregister;
rte_ctrl_thread_create;
rte_dev_event_callback_register;
rte_dev_event_callback_unregister;
rte_dev_event_monitor_start;
rte_dev_event_monitor_stop;
+ rte_dev_iterator_init;
+ rte_dev_iterator_next;
rte_devargs_add;
rte_devargs_dump;
rte_devargs_insert;
rte_devargs_next;
rte_devargs_parse;
+ rte_devargs_parsef;
rte_devargs_remove;
rte_devargs_type_count;
rte_eal_cleanup;
rte_eal_hotplug_add;
rte_eal_hotplug_remove;
- rte_eal_mbuf_user_pool_ops;
rte_fbarray_attach;
rte_fbarray_destroy;
rte_fbarray_detach;
@@ -269,8 +296,14 @@ EXPERIMENTAL {
rte_fbarray_find_next_used;
rte_fbarray_find_next_n_free;
rte_fbarray_find_next_n_used;
+ rte_fbarray_find_prev_free;
+ rte_fbarray_find_prev_used;
+ rte_fbarray_find_prev_n_free;
+ rte_fbarray_find_prev_n_used;
rte_fbarray_find_contig_free;
rte_fbarray_find_contig_used;
+ rte_fbarray_find_rev_contig_free;
+ rte_fbarray_find_rev_contig_used;
rte_fbarray_get;
rte_fbarray_init;
rte_fbarray_is_used;
@@ -286,25 +319,20 @@ EXPERIMENTAL {
rte_mem_virt2memseg;
rte_mem_virt2memseg_list;
rte_memseg_contig_walk;
+ rte_memseg_contig_walk_thread_unsafe;
rte_memseg_list_walk;
+ rte_memseg_list_walk_thread_unsafe;
rte_memseg_walk;
+ rte_memseg_walk_thread_unsafe;
rte_mp_action_register;
rte_mp_action_unregister;
rte_mp_reply;
rte_mp_request_sync;
rte_mp_request_async;
rte_mp_sendmsg;
+ rte_service_lcore_attr_get;
+ rte_service_lcore_attr_reset_all;
+ rte_service_may_be_active;
rte_socket_count;
rte_socket_id_by_idx;
- rte_vfio_dma_map;
- rte_vfio_dma_unmap;
- rte_vfio_get_container_fd;
- rte_vfio_get_group_fd;
- rte_vfio_get_group_num;
- rte_vfio_container_create;
- rte_vfio_container_destroy;
- rte_vfio_container_dma_map;
- rte_vfio_container_dma_unmap;
- rte_vfio_container_group_bind;
- rte_vfio_container_group_unbind;
};
diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile
index c2f2f7d8..0935a275 100644
--- a/lib/librte_ethdev/Makefile
+++ b/lib/librte_ethdev/Makefile
@@ -16,7 +16,7 @@ LDLIBS += -lrte_mbuf
EXPORT_MAP := rte_ethdev_version.map
-LIBABIVER := 9
+LIBABIVER := 10
SRCS-y += rte_ethdev.c
SRCS-y += rte_flow.c
diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build
index aed5d226..596cd0f3 100644
--- a/lib/librte_ethdev/meson.build
+++ b/lib/librte_ethdev/meson.build
@@ -2,7 +2,7 @@
# Copyright(c) 2017 Intel Corporation
name = 'ethdev'
-version = 9
+version = 10
allow_experimental_apis = true
sources = files('ethdev_profile.c',
'rte_ethdev.c',
diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index cd4bfd3c..4c320250 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -42,10 +42,7 @@
#include "rte_ethdev_driver.h"
#include "ethdev_profile.h"
-static int ethdev_logtype;
-
-#define ethdev_log(level, fmt, ...) \
- rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__)
+int rte_eth_dev_logtype;
static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data";
struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS];
@@ -129,6 +126,7 @@ static const struct {
RTE_RX_OFFLOAD_BIT2STR(SCATTER),
RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
RTE_RX_OFFLOAD_BIT2STR(SECURITY),
+ RTE_RX_OFFLOAD_BIT2STR(KEEP_CRC),
};
#undef RTE_RX_OFFLOAD_BIT2STR
@@ -303,14 +301,16 @@ rte_eth_dev_allocate(const char *name)
rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
if (_rte_eth_dev_allocated(name) != NULL) {
- ethdev_log(ERR, "Ethernet device with name %s already allocated",
- name);
+ RTE_ETHDEV_LOG(ERR,
+ "Ethernet device with name %s already allocated\n",
+ name);
goto unlock;
}
port_id = rte_eth_dev_find_free_port();
if (port_id == RTE_MAX_ETHPORTS) {
- ethdev_log(ERR, "Reached maximum number of Ethernet ports");
+ RTE_ETHDEV_LOG(ERR,
+ "Reached maximum number of Ethernet ports\n");
goto unlock;
}
@@ -346,8 +346,8 @@ rte_eth_dev_attach_secondary(const char *name)
break;
}
if (i == RTE_MAX_ETHPORTS) {
- RTE_PMD_DEBUG_TRACE(
- "device %s is not driven by the primary process\n",
+ RTE_ETHDEV_LOG(ERR,
+ "Device %s is not driven by the primary process\n",
name);
} else {
eth_dev = eth_dev_get(i);
@@ -394,7 +394,8 @@ rte_eth_is_valid_owner_id(uint64_t owner_id)
{
if (owner_id == RTE_ETH_DEV_NO_OWNER ||
rte_eth_dev_shared_data->next_owner_id <= owner_id) {
- RTE_PMD_DEBUG_TRACE("Invalid owner_id=%016"PRIX64".\n", owner_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid owner_id=%016"PRIx64"\n",
+ owner_id);
return 0;
}
return 1;
@@ -437,7 +438,8 @@ _rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
int sret;
if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) {
- RTE_PMD_DEBUG_TRACE("Port id %"PRIu16" is not allocated.\n", port_id);
+ RTE_ETHDEV_LOG(ERR, "Port id %"PRIu16" is not allocated\n",
+ port_id);
return -ENODEV;
}
@@ -447,22 +449,22 @@ _rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
port_owner = &rte_eth_devices[port_id].data->owner;
if (port_owner->id != old_owner_id) {
- RTE_PMD_DEBUG_TRACE("Cannot set owner to port %d already owned"
- " by %s_%016"PRIX64".\n", port_id,
- port_owner->name, port_owner->id);
+ RTE_ETHDEV_LOG(ERR,
+ "Cannot set owner to port %u already owned by %s_%016"PRIX64"\n",
+ port_id, port_owner->name, port_owner->id);
return -EPERM;
}
sret = snprintf(port_owner->name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s",
new_owner->name);
if (sret < 0 || sret >= RTE_ETH_MAX_OWNER_NAME_LEN)
- RTE_PMD_DEBUG_TRACE("Port %d owner name was truncated.\n",
- port_id);
+ RTE_ETHDEV_LOG(ERR, "Port %u owner name was truncated\n",
+ port_id);
port_owner->id = new_owner->id;
- RTE_PMD_DEBUG_TRACE("Port %d owner is %s_%016"PRIX64".\n", port_id,
- new_owner->name, new_owner->id);
+ RTE_ETHDEV_LOG(DEBUG, "Port %u owner is %s_%016"PRIx64"\n",
+ port_id, new_owner->name, new_owner->id);
return 0;
}
@@ -514,8 +516,9 @@ rte_eth_dev_owner_delete(const uint64_t owner_id)
if (rte_eth_devices[port_id].data->owner.id == owner_id)
memset(&rte_eth_devices[port_id].data->owner, 0,
sizeof(struct rte_eth_dev_owner));
- RTE_PMD_DEBUG_TRACE("All port owners owned by %016"PRIX64
- " identifier have removed.\n", owner_id);
+ RTE_ETHDEV_LOG(ERR,
+ "All port owners owned by %016"PRIx64" identifier have removed\n",
+ owner_id);
}
rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
@@ -532,7 +535,8 @@ rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner *owner)
rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) {
- RTE_PMD_DEBUG_TRACE("Port id %"PRIu16" is not allocated.\n", port_id);
+ RTE_ETHDEV_LOG(ERR, "Port id %"PRIu16" is not allocated\n",
+ port_id);
ret = -ENODEV;
} else {
rte_memcpy(owner, &ethdev->data->owner, sizeof(*owner));
@@ -596,7 +600,7 @@ rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
if (name == NULL) {
- RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+ RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n");
return -EINVAL;
}
@@ -613,7 +617,7 @@ rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id)
uint32_t pid;
if (name == NULL) {
- RTE_PMD_DEBUG_TRACE("Null pointer is specified\n");
+ RTE_ETHDEV_LOG(ERR, "Null pointer is specified\n");
return -EINVAL;
}
@@ -654,7 +658,7 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
}
/* parse devargs */
- if (rte_devargs_parse(&da, "%s", devargs))
+ if (rte_devargs_parse(&da, devargs))
goto err;
ret = rte_eal_hotplug_add(da.bus->name, da.name, da.args);
@@ -663,7 +667,7 @@ rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
/* no point looking at the port count if no port exists */
if (!rte_eth_dev_count_total()) {
- ethdev_log(ERR, "No port found for device (%s)", da.name);
+ RTE_ETHDEV_LOG(ERR, "No port found for device (%s)\n", da.name);
ret = -1;
goto err;
}
@@ -698,8 +702,8 @@ rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
dev_flags = rte_eth_devices[port_id].data->dev_flags;
if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
- ethdev_log(ERR,
- "Port %" PRIu16 " is bonded, cannot detach", port_id);
+ RTE_ETHDEV_LOG(ERR,
+ "Port %"PRIu16" is bonded, cannot detach\n", port_id);
return -ENOTSUP;
}
@@ -778,21 +782,22 @@ rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id)
dev = &rte_eth_devices[port_id];
if (!dev->data->dev_started) {
- RTE_PMD_DEBUG_TRACE(
- "port %d must be started before start any queue\n", port_id);
+ RTE_ETHDEV_LOG(ERR,
+ "Port %u must be started before start any queue\n",
+ port_id);
return -EINVAL;
}
if (rx_queue_id >= dev->data->nb_rx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP);
if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
- RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
- " already started\n",
+ RTE_ETHDEV_LOG(INFO,
+ "Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
rx_queue_id, port_id);
return 0;
}
@@ -811,15 +816,15 @@ rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id)
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP);
if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
- RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
- " already stopped\n",
+ RTE_ETHDEV_LOG(INFO,
+ "Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n",
rx_queue_id, port_id);
return 0;
}
@@ -837,28 +842,27 @@ rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id)
dev = &rte_eth_devices[port_id];
if (!dev->data->dev_started) {
- RTE_PMD_DEBUG_TRACE(
- "port %d must be started before start any queue\n", port_id);
+ RTE_ETHDEV_LOG(ERR,
+ "Port %u must be started before start any queue\n",
+ port_id);
return -EINVAL;
}
if (tx_queue_id >= dev->data->nb_tx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP);
if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) {
- RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
- " already started\n",
+ RTE_ETHDEV_LOG(INFO,
+ "Queue %"PRIu16" of device with port_id=%"PRIu16" already started\n",
tx_queue_id, port_id);
return 0;
}
- return eth_err(port_id, dev->dev_ops->tx_queue_start(dev,
- tx_queue_id));
-
+ return eth_err(port_id, dev->dev_ops->tx_queue_start(dev, tx_queue_id));
}
int
@@ -870,15 +874,15 @@ rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id)
dev = &rte_eth_devices[port_id];
if (tx_queue_id >= dev->data->nb_tx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP);
if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) {
- RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8
- " already stopped\n",
+ RTE_ETHDEV_LOG(INFO,
+ "Queue %"PRIu16" of device with port_id=%"PRIu16" already stopped\n",
tx_queue_id, port_id);
return 0;
}
@@ -970,41 +974,6 @@ rte_eth_speed_bitflag(uint32_t speed, int duplex)
}
}
-/**
- * A conversion function from rxmode bitfield API.
- */
-static void
-rte_eth_convert_rx_offload_bitfield(const struct rte_eth_rxmode *rxmode,
- uint64_t *rx_offloads)
-{
- uint64_t offloads = 0;
-
- if (rxmode->header_split == 1)
- offloads |= DEV_RX_OFFLOAD_HEADER_SPLIT;
- if (rxmode->hw_ip_checksum == 1)
- offloads |= DEV_RX_OFFLOAD_CHECKSUM;
- if (rxmode->hw_vlan_filter == 1)
- offloads |= DEV_RX_OFFLOAD_VLAN_FILTER;
- if (rxmode->hw_vlan_strip == 1)
- offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
- if (rxmode->hw_vlan_extend == 1)
- offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND;
- if (rxmode->jumbo_frame == 1)
- offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
- if (rxmode->hw_strip_crc == 1)
- offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
- if (rxmode->enable_scatter == 1)
- offloads |= DEV_RX_OFFLOAD_SCATTER;
- if (rxmode->enable_lro == 1)
- offloads |= DEV_RX_OFFLOAD_TCP_LRO;
- if (rxmode->hw_timestamp == 1)
- offloads |= DEV_RX_OFFLOAD_TIMESTAMP;
- if (rxmode->security == 1)
- offloads |= DEV_RX_OFFLOAD_SECURITY;
-
- *rx_offloads = offloads;
-}
-
const char * __rte_experimental
rte_eth_dev_rx_offload_name(uint64_t offload)
{
@@ -1071,33 +1040,26 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
}
if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) {
- RTE_PMD_DEBUG_TRACE(
+ RTE_ETHDEV_LOG(ERR,
"Number of RX queues requested (%u) is greater than max supported(%d)\n",
nb_rx_q, RTE_MAX_QUEUES_PER_PORT);
return -EINVAL;
}
if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) {
- RTE_PMD_DEBUG_TRACE(
+ RTE_ETHDEV_LOG(ERR,
"Number of TX queues requested (%u) is greater than max supported(%d)\n",
nb_tx_q, RTE_MAX_QUEUES_PER_PORT);
return -EINVAL;
}
if (dev->data->dev_started) {
- RTE_PMD_DEBUG_TRACE(
- "port %d must be stopped to allow configuration\n", port_id);
+ RTE_ETHDEV_LOG(ERR,
+ "Port %u must be stopped to allow configuration\n",
+ port_id);
return -EBUSY;
}
- /*
- * Convert between the offloads API to enable PMDs to support
- * only one of them.
- */
- if (dev_conf->rxmode.ignore_offload_bitfield == 0)
- rte_eth_convert_rx_offload_bitfield(
- &dev_conf->rxmode, &local_conf.rxmode.offloads);
-
/* Copy the dev_conf parameter into the dev structure */
memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf));
@@ -1107,28 +1069,28 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
* configured device.
*/
if (nb_rx_q > dev_info.max_rx_queues) {
- RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n",
- port_id, nb_rx_q, dev_info.max_rx_queues);
+ RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_rx_queues=%u > %u\n",
+ port_id, nb_rx_q, dev_info.max_rx_queues);
return -EINVAL;
}
if (nb_tx_q > dev_info.max_tx_queues) {
- RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n",
- port_id, nb_tx_q, dev_info.max_tx_queues);
+ RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_tx_queues=%u > %u\n",
+ port_id, nb_tx_q, dev_info.max_tx_queues);
return -EINVAL;
}
/* Check that the device supports requested interrupts */
if ((dev_conf->intr_conf.lsc == 1) &&
- (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
- RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n",
- dev->device->driver->name);
- return -EINVAL;
+ (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
+ RTE_ETHDEV_LOG(ERR, "Driver %s does not support lsc\n",
+ dev->device->driver->name);
+ return -EINVAL;
}
if ((dev_conf->intr_conf.rmv == 1) &&
- (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
- RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n",
- dev->device->driver->name);
+ (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) {
+ RTE_ETHDEV_LOG(ERR, "Driver %s does not support rmv\n",
+ dev->device->driver->name);
return -EINVAL;
}
@@ -1137,19 +1099,16 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
* length is supported by the configured device.
*/
if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
- if (dev_conf->rxmode.max_rx_pkt_len >
- dev_info.max_rx_pktlen) {
- RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
- " > max valid value %u\n",
- port_id,
- (unsigned)dev_conf->rxmode.max_rx_pkt_len,
- (unsigned)dev_info.max_rx_pktlen);
+ if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
+ RTE_ETHDEV_LOG(ERR,
+ "Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n",
+ port_id, dev_conf->rxmode.max_rx_pkt_len,
+ dev_info.max_rx_pktlen);
return -EINVAL;
} else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) {
- RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u"
- " < min valid value %u\n",
- port_id,
- (unsigned)dev_conf->rxmode.max_rx_pkt_len,
+ RTE_ETHDEV_LOG(ERR,
+ "Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n",
+ port_id, dev_conf->rxmode.max_rx_pkt_len,
(unsigned)ETHER_MIN_LEN);
return -EINVAL;
}
@@ -1164,36 +1123,42 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
/* Any requested offloading must be within its device capabilities */
if ((local_conf.rxmode.offloads & dev_info.rx_offload_capa) !=
local_conf.rxmode.offloads) {
- ethdev_log(ERR, "ethdev port_id=%d requested Rx offloads "
- "0x%" PRIx64 " doesn't match Rx offloads "
- "capabilities 0x%" PRIx64 " in %s()\n",
- port_id,
- local_conf.rxmode.offloads,
- dev_info.rx_offload_capa,
- __func__);
- /* Will return -EINVAL in the next release */
+ RTE_ETHDEV_LOG(ERR,
+ "Ethdev port_id=%u requested Rx offloads 0x%"PRIx64" doesn't match Rx offloads "
+ "capabilities 0x%"PRIx64" in %s()\n",
+ port_id, local_conf.rxmode.offloads,
+ dev_info.rx_offload_capa,
+ __func__);
+ return -EINVAL;
}
if ((local_conf.txmode.offloads & dev_info.tx_offload_capa) !=
local_conf.txmode.offloads) {
- ethdev_log(ERR, "ethdev port_id=%d requested Tx offloads "
- "0x%" PRIx64 " doesn't match Tx offloads "
- "capabilities 0x%" PRIx64 " in %s()\n",
- port_id,
- local_conf.txmode.offloads,
- dev_info.tx_offload_capa,
- __func__);
- /* Will return -EINVAL in the next release */
+ RTE_ETHDEV_LOG(ERR,
+ "Ethdev port_id=%u requested Tx offloads 0x%"PRIx64" doesn't match Tx offloads "
+ "capabilities 0x%"PRIx64" in %s()\n",
+ port_id, local_conf.txmode.offloads,
+ dev_info.tx_offload_capa,
+ __func__);
+ return -EINVAL;
+ }
+
+ if ((local_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP) &&
+ (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)) {
+ RTE_ETHDEV_LOG(ERR,
+ "Port id=%u not allowed to set both CRC STRIP and KEEP CRC offload flags\n",
+ port_id);
+ return -EINVAL;
}
/* Check that device supports requested rss hash functions. */
if ((dev_info.flow_type_rss_offloads |
dev_conf->rx_adv_conf.rss_conf.rss_hf) !=
dev_info.flow_type_rss_offloads) {
- RTE_PMD_DEBUG_TRACE("ethdev port_id=%d invalid rss_hf: "
- "0x%"PRIx64", valid value: 0x%"PRIx64"\n",
- port_id,
- dev_conf->rx_adv_conf.rss_conf.rss_hf,
- dev_info.flow_type_rss_offloads);
+ RTE_ETHDEV_LOG(ERR,
+ "Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n",
+ port_id, dev_conf->rx_adv_conf.rss_conf.rss_hf,
+ dev_info.flow_type_rss_offloads);
+ return -EINVAL;
}
/*
@@ -1201,23 +1166,25 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
*/
diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q);
if (diag != 0) {
- RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n",
- port_id, diag);
+ RTE_ETHDEV_LOG(ERR,
+ "Port%u rte_eth_dev_rx_queue_config = %d\n",
+ port_id, diag);
return diag;
}
diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q);
if (diag != 0) {
- RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n",
- port_id, diag);
+ RTE_ETHDEV_LOG(ERR,
+ "Port%u rte_eth_dev_tx_queue_config = %d\n",
+ port_id, diag);
rte_eth_dev_rx_queue_config(dev, 0);
return diag;
}
diag = (*dev->dev_ops->dev_configure)(dev);
if (diag != 0) {
- RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n",
- port_id, diag);
+ RTE_ETHDEV_LOG(ERR, "Port%u dev_configure = %d\n",
+ port_id, diag);
rte_eth_dev_rx_queue_config(dev, 0);
rte_eth_dev_tx_queue_config(dev, 0);
return eth_err(port_id, diag);
@@ -1226,8 +1193,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
/* Initialize Rx profiling if enabled at compilation time. */
diag = __rte_eth_profile_rx_init(port_id, dev);
if (diag != 0) {
- RTE_PMD_DEBUG_TRACE("port%d __rte_eth_profile_rx_init = %d\n",
- port_id, diag);
+ RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n",
+ port_id, diag);
rte_eth_dev_rx_queue_config(dev, 0);
rte_eth_dev_tx_queue_config(dev, 0);
return eth_err(port_id, diag);
@@ -1240,8 +1207,7 @@ void
_rte_eth_dev_reset(struct rte_eth_dev *dev)
{
if (dev->data->dev_started) {
- RTE_PMD_DEBUG_TRACE(
- "port %d must be stopped to allow reset\n",
+ RTE_ETHDEV_LOG(ERR, "Port %u must be stopped to allow reset\n",
dev->data->port_id);
return;
}
@@ -1320,8 +1286,8 @@ rte_eth_dev_start(uint16_t port_id)
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP);
if (dev->data->dev_started != 0) {
- RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16
- " already started\n",
+ RTE_ETHDEV_LOG(INFO,
+ "Device with port_id=%"PRIu16" already started\n",
port_id);
return 0;
}
@@ -1352,8 +1318,8 @@ rte_eth_dev_stop(uint16_t port_id)
RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop);
if (dev->data->dev_started == 0) {
- RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16
- " already stopped\n",
+ RTE_ETHDEV_LOG(INFO,
+ "Device with port_id=%"PRIu16" already stopped\n",
port_id);
return;
}
@@ -1465,7 +1431,7 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
return -EINVAL;
}
@@ -1479,23 +1445,20 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
*/
rte_eth_dev_info_get(port_id, &dev_info);
if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) {
- RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n",
- mp->name, (int) mp->private_data_size,
- (int) sizeof(struct rte_pktmbuf_pool_private));
+ RTE_ETHDEV_LOG(ERR, "%s private_data_size %d < %d\n",
+ mp->name, (int)mp->private_data_size,
+ (int)sizeof(struct rte_pktmbuf_pool_private));
return -ENOSPC;
}
mbp_buf_size = rte_pktmbuf_data_room_size(mp);
if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) {
- RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d "
- "(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)"
- "=%d)\n",
- mp->name,
- (int)mbp_buf_size,
- (int)(RTE_PKTMBUF_HEADROOM +
- dev_info.min_rx_bufsize),
- (int)RTE_PKTMBUF_HEADROOM,
- (int)dev_info.min_rx_bufsize);
+ RTE_ETHDEV_LOG(ERR,
+ "%s mbuf_data_room_size %d < %d (RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)=%d)\n",
+ mp->name, (int)mbp_buf_size,
+ (int)(RTE_PKTMBUF_HEADROOM + dev_info.min_rx_bufsize),
+ (int)RTE_PKTMBUF_HEADROOM,
+ (int)dev_info.min_rx_bufsize);
return -EINVAL;
}
@@ -1511,10 +1474,9 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
- RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), "
- "should be: <= %hu, = %hu, and a product of %hu\n",
- nb_rx_desc,
- dev_info.rx_desc_lim.nb_max,
+ RTE_ETHDEV_LOG(ERR,
+ "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+ nb_rx_desc, dev_info.rx_desc_lim.nb_max,
dev_info.rx_desc_lim.nb_min,
dev_info.rx_desc_lim.nb_align);
return -EINVAL;
@@ -1542,14 +1504,6 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
rx_conf = &dev_info.default_rxconf;
local_conf = *rx_conf;
- if (dev->data->dev_conf.rxmode.ignore_offload_bitfield == 0) {
- /**
- * Reflect port offloads to queue offloads in order for
- * offloads to not be discarded.
- */
- rte_eth_convert_rx_offload_bitfield(&dev->data->dev_conf.rxmode,
- &local_conf.offloads);
- }
/*
* If an offloading has already been enabled in
@@ -1571,16 +1525,13 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
*/
if ((local_conf.offloads & dev_info.rx_queue_offload_capa) !=
local_conf.offloads) {
- ethdev_log(ERR, "Ethdev port_id=%d rx_queue_id=%d, new "
- "added offloads 0x%" PRIx64 " must be "
- "within pre-queue offload capabilities 0x%"
- PRIx64 " in %s()\n",
- port_id,
- rx_queue_id,
- local_conf.offloads,
- dev_info.rx_queue_offload_capa,
- __func__);
- /* Will return -EINVAL in the next release */
+ RTE_ETHDEV_LOG(ERR,
+ "Ethdev port_id=%d rx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
+ "within pre-queue offload capabilities 0x%"PRIx64" in %s()\n",
+ port_id, rx_queue_id, local_conf.offloads,
+ dev_info.rx_queue_offload_capa,
+ __func__);
+ return -EINVAL;
}
ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc,
@@ -1594,55 +1545,6 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
return eth_err(port_id, ret);
}
-/**
- * Convert from tx offloads to txq_flags.
- */
-static void
-rte_eth_convert_tx_offload(const uint64_t tx_offloads, uint32_t *txq_flags)
-{
- uint32_t flags = 0;
-
- if (!(tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS))
- flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
- if (!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT))
- flags |= ETH_TXQ_FLAGS_NOVLANOFFL;
- if (!(tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM))
- flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
- if (!(tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM))
- flags |= ETH_TXQ_FLAGS_NOXSUMUDP;
- if (!(tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM))
- flags |= ETH_TXQ_FLAGS_NOXSUMTCP;
- if (tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
- flags |= ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP;
-
- *txq_flags = flags;
-}
-
-/**
- * A conversion function from txq_flags API.
- */
-static void
-rte_eth_convert_txq_flags(const uint32_t txq_flags, uint64_t *tx_offloads)
-{
- uint64_t offloads = 0;
-
- if (!(txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS))
- offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
- if (!(txq_flags & ETH_TXQ_FLAGS_NOVLANOFFL))
- offloads |= DEV_TX_OFFLOAD_VLAN_INSERT;
- if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP))
- offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM;
- if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMUDP))
- offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMTCP))
- offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- if ((txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT) &&
- (txq_flags & ETH_TXQ_FLAGS_NOMULTMEMP))
- offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
-
- *tx_offloads = offloads;
-}
-
int
rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
uint16_t nb_tx_desc, unsigned int socket_id,
@@ -1657,7 +1559,7 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
dev = &rte_eth_devices[port_id];
if (tx_queue_id >= dev->data->nb_tx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
return -EINVAL;
}
@@ -1676,12 +1578,11 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
- RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), "
- "should be: <= %hu, = %hu, and a product of %hu\n",
- nb_tx_desc,
- dev_info.tx_desc_lim.nb_max,
- dev_info.tx_desc_lim.nb_min,
- dev_info.tx_desc_lim.nb_align);
+ RTE_ETHDEV_LOG(ERR,
+ "Invalid value for nb_tx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n",
+ nb_tx_desc, dev_info.tx_desc_lim.nb_max,
+ dev_info.tx_desc_lim.nb_min,
+ dev_info.tx_desc_lim.nb_align);
return -EINVAL;
}
@@ -1706,15 +1607,7 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
if (tx_conf == NULL)
tx_conf = &dev_info.default_txconf;
- /*
- * Convert between the offloads API to enable PMDs to support
- * only one of them.
- */
local_conf = *tx_conf;
- if (!(tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE)) {
- rte_eth_convert_txq_flags(tx_conf->txq_flags,
- &local_conf.offloads);
- }
/*
* If an offloading has already been enabled in
@@ -1736,16 +1629,13 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
*/
if ((local_conf.offloads & dev_info.tx_queue_offload_capa) !=
local_conf.offloads) {
- ethdev_log(ERR, "Ethdev port_id=%d tx_queue_id=%d, new "
- "added offloads 0x%" PRIx64 " must be "
- "within pre-queue offload capabilities 0x%"
- PRIx64 " in %s()\n",
- port_id,
- tx_queue_id,
- local_conf.offloads,
- dev_info.tx_queue_offload_capa,
- __func__);
- /* Will return -EINVAL in the next release */
+ RTE_ETHDEV_LOG(ERR,
+ "Ethdev port_id=%d tx_queue_id=%d, new added offloads 0x%"PRIx64" must be "
+ "within pre-queue offload capabilities 0x%"PRIx64" in %s()\n",
+ port_id, tx_queue_id, local_conf.offloads,
+ dev_info.tx_queue_offload_capa,
+ __func__);
+ return -EINVAL;
}
return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
@@ -2009,19 +1899,19 @@ rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
if (!id) {
- RTE_PMD_DEBUG_TRACE("Error: id pointer is NULL\n");
+ RTE_ETHDEV_LOG(ERR, "Id pointer is NULL\n");
return -ENOMEM;
}
if (!xstat_name) {
- RTE_PMD_DEBUG_TRACE("Error: xstat_name pointer is NULL\n");
+ RTE_ETHDEV_LOG(ERR, "xstat_name pointer is NULL\n");
return -ENOMEM;
}
/* Get count */
cnt_xstats = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL);
if (cnt_xstats < 0) {
- RTE_PMD_DEBUG_TRACE("Error: Cannot get count of xstats\n");
+ RTE_ETHDEV_LOG(ERR, "Cannot get count of xstats\n");
return -ENODEV;
}
@@ -2030,7 +1920,7 @@ rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
if (cnt_xstats != rte_eth_xstats_get_names_by_id(
port_id, xstats_names, cnt_xstats, NULL)) {
- RTE_PMD_DEBUG_TRACE("Error: Cannot get xstats lookup\n");
+ RTE_ETHDEV_LOG(ERR, "Cannot get xstats lookup\n");
return -1;
}
@@ -2153,7 +2043,7 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
sizeof(struct rte_eth_xstat_name));
if (!xstats_names_copy) {
- RTE_PMD_DEBUG_TRACE("ERROR: can't allocate memory");
+ RTE_ETHDEV_LOG(ERR, "Can't allocate memory\n");
return -ENOMEM;
}
@@ -2181,7 +2071,7 @@ rte_eth_xstats_get_names_by_id(uint16_t port_id,
/* Filter stats */
for (i = 0; i < size; i++) {
if (ids[i] >= expected_entries) {
- RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n");
+ RTE_ETHDEV_LOG(ERR, "Id value isn't valid\n");
free(xstats_names_copy);
return -1;
}
@@ -2366,7 +2256,7 @@ rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids,
/* Filter stats */
for (i = 0; i < size; i++) {
if (ids[i] >= expected_entries) {
- RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n");
+ RTE_ETHDEV_LOG(ERR, "Id value isn't valid\n");
return -1;
}
values[i] = xstats[ids[i]].value;
@@ -2456,6 +2346,16 @@ set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx,
dev = &rte_eth_devices[port_id];
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP);
+
+ if (is_rx && (queue_id >= dev->data->nb_rx_queues))
+ return -EINVAL;
+
+ if (!is_rx && (queue_id >= dev->data->nb_tx_queues))
+ return -EINVAL;
+
+ if (stat_idx >= RTE_ETHDEV_QUEUE_STAT_CNTRS)
+ return -EINVAL;
+
return (*dev->dev_ops->queue_stats_mapping_set)
(dev, queue_id, stat_idx, is_rx);
}
@@ -2495,7 +2395,6 @@ void
rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info)
{
struct rte_eth_dev *dev;
- struct rte_eth_txconf *txconf;
const struct rte_eth_desc_lim lim = {
.nb_max = UINT16_MAX,
.nb_min = 0,
@@ -2517,9 +2416,6 @@ rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info)
dev_info->nb_tx_queues = dev->data->nb_tx_queues;
dev_info->dev_flags = &dev->data->dev_flags;
- txconf = &dev_info->default_txconf;
- /* convert offload to txq_flags to support legacy app */
- rte_eth_convert_tx_offload(txconf->offloads, &txconf->txq_flags);
}
int
@@ -2598,13 +2494,14 @@ rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on)
dev = &rte_eth_devices[port_id];
if (!(dev->data->dev_conf.rxmode.offloads &
DEV_RX_OFFLOAD_VLAN_FILTER)) {
- RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id);
+ RTE_ETHDEV_LOG(ERR, "Port %u: vlan-filtering disabled\n",
+ port_id);
return -ENOSYS;
}
if (vlan_id > 4095) {
- RTE_PMD_DEBUG_TRACE("(port_id=%d) invalid vlan_id=%u > 4095\n",
- port_id, (unsigned) vlan_id);
+ RTE_ETHDEV_LOG(ERR, "Port_id=%u invalid vlan_id=%u > 4095\n",
+ port_id, vlan_id);
return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP);
@@ -2637,7 +2534,7 @@ rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id,
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
dev = &rte_eth_devices[port_id];
if (rx_queue_id >= dev->data->nb_rx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid rx_queue_id=%d\n", port_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid rx_queue_id=%u\n", rx_queue_id);
return -EINVAL;
}
@@ -2786,7 +2683,7 @@ rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf)
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) {
- RTE_PMD_DEBUG_TRACE("Invalid send_xon, only 0/1 allowed\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid send_xon, only 0/1 allowed\n");
return -EINVAL;
}
@@ -2803,7 +2700,7 @@ rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id,
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) {
- RTE_PMD_DEBUG_TRACE("Invalid priority, only 0-7 allowed\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid priority, only 0-7 allowed\n");
return -EINVAL;
}
@@ -2844,7 +2741,7 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
return -EINVAL;
if (max_rxq == 0) {
- RTE_PMD_DEBUG_TRACE("No receive queue is available\n");
+ RTE_ETHDEV_LOG(ERR, "No receive queue is available\n");
return -EINVAL;
}
@@ -2853,8 +2750,9 @@ rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf,
shift = i % RTE_RETA_GROUP_SIZE;
if ((reta_conf[idx].mask & (1ULL << shift)) &&
(reta_conf[idx].reta[shift] >= max_rxq)) {
- RTE_PMD_DEBUG_TRACE("reta_conf[%u]->reta[%u]: %u exceeds "
- "the maximum rxq index: %u\n", idx, shift,
+ RTE_ETHDEV_LOG(ERR,
+ "reta_conf[%u]->reta[%u]: %u exceeds the maximum rxq index: %u\n",
+ idx, shift,
reta_conf[idx].reta[shift], max_rxq);
return -EINVAL;
}
@@ -2923,11 +2821,11 @@ rte_eth_dev_rss_hash_update(uint16_t port_id,
rte_eth_dev_info_get(port_id, &dev_info);
if ((dev_info.flow_type_rss_offloads | rss_conf->rss_hf) !=
dev_info.flow_type_rss_offloads) {
- RTE_PMD_DEBUG_TRACE("ethdev port_id=%d invalid rss_hf: "
- "0x%"PRIx64", valid value: 0x%"PRIx64"\n",
- port_id,
- rss_conf->rss_hf,
- dev_info.flow_type_rss_offloads);
+ RTE_ETHDEV_LOG(ERR,
+ "Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n",
+ port_id, rss_conf->rss_hf,
+ dev_info.flow_type_rss_offloads);
+ return -EINVAL;
}
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP);
return eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev,
@@ -2955,12 +2853,12 @@ rte_eth_dev_udp_tunnel_port_add(uint16_t port_id,
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
if (udp_tunnel == NULL) {
- RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid udp_tunnel parameter\n");
return -EINVAL;
}
if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
- RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
return -EINVAL;
}
@@ -2980,12 +2878,12 @@ rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id,
dev = &rte_eth_devices[port_id];
if (udp_tunnel == NULL) {
- RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid udp_tunnel parameter\n");
return -EINVAL;
}
if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) {
- RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
return -EINVAL;
}
@@ -3053,12 +2951,12 @@ rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr,
RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP);
if (is_zero_ether_addr(addr)) {
- RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n",
+ RTE_ETHDEV_LOG(ERR, "Port %u: Cannot add NULL MAC address\n",
port_id);
return -EINVAL;
}
if (pool >= ETH_64_POOLS) {
- RTE_PMD_DEBUG_TRACE("pool id must be 0-%d\n", ETH_64_POOLS - 1);
+ RTE_ETHDEV_LOG(ERR, "Pool id must be 0-%d\n", ETH_64_POOLS - 1);
return -EINVAL;
}
@@ -3066,7 +2964,7 @@ rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr,
if (index < 0) {
index = get_mac_addr_index(port_id, &null_mac_addr);
if (index < 0) {
- RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n",
+ RTE_ETHDEV_LOG(ERR, "Port %u: MAC address array full\n",
port_id);
return -ENOSPC;
}
@@ -3104,7 +3002,9 @@ rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr)
index = get_mac_addr_index(port_id, addr);
if (index == 0) {
- RTE_PMD_DEBUG_TRACE("port %d: Cannot remove default MAC address\n", port_id);
+ RTE_ETHDEV_LOG(ERR,
+ "Port %u: Cannot remove default MAC address\n",
+ port_id);
return -EADDRINUSE;
} else if (index < 0)
return 0; /* Do nothing if address wasn't found */
@@ -3181,7 +3081,7 @@ rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr,
dev = &rte_eth_devices[port_id];
if (is_zero_ether_addr(addr)) {
- RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n",
+ RTE_ETHDEV_LOG(ERR, "Port %u: Cannot add NULL MAC address\n",
port_id);
return -EINVAL;
}
@@ -3193,15 +3093,16 @@ rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr,
if (index < 0) {
if (!on) {
- RTE_PMD_DEBUG_TRACE("port %d: the MAC address was not "
- "set in UTA\n", port_id);
+ RTE_ETHDEV_LOG(ERR,
+ "Port %u: the MAC address was not set in UTA\n",
+ port_id);
return -EINVAL;
}
index = get_hash_mac_addr_index(port_id, &null_mac_addr);
if (index < 0) {
- RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n",
- port_id);
+ RTE_ETHDEV_LOG(ERR, "Port %u: MAC address array full\n",
+ port_id);
return -ENOSPC;
}
}
@@ -3249,14 +3150,15 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
link = dev->data->dev_link;
if (queue_idx > dev_info.max_tx_queues) {
- RTE_PMD_DEBUG_TRACE("set queue rate limit:port %d: "
- "invalid queue id=%d\n", port_id, queue_idx);
+ RTE_ETHDEV_LOG(ERR,
+ "Set queue rate limit:port %u: invalid queue id=%u\n",
+ port_id, queue_idx);
return -EINVAL;
}
if (tx_rate > link.link_speed) {
- RTE_PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, "
- "bigger than link speed= %d\n",
+ RTE_ETHDEV_LOG(ERR,
+ "Set queue rate limit:invalid tx_rate=%u, bigger than link speed= %d\n",
tx_rate, link.link_speed);
return -EINVAL;
}
@@ -3275,26 +3177,28 @@ rte_eth_mirror_rule_set(uint16_t port_id,
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
if (mirror_conf->rule_type == 0) {
- RTE_PMD_DEBUG_TRACE("mirror rule type can not be 0.\n");
+ RTE_ETHDEV_LOG(ERR, "Mirror rule type can not be 0\n");
return -EINVAL;
}
if (mirror_conf->dst_pool >= ETH_64_POOLS) {
- RTE_PMD_DEBUG_TRACE("Invalid dst pool, pool id must be 0-%d\n",
- ETH_64_POOLS - 1);
+ RTE_ETHDEV_LOG(ERR, "Invalid dst pool, pool id must be 0-%d\n",
+ ETH_64_POOLS - 1);
return -EINVAL;
}
if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP |
ETH_MIRROR_VIRTUAL_POOL_DOWN)) &&
(mirror_conf->pool_mask == 0)) {
- RTE_PMD_DEBUG_TRACE("Invalid mirror pool, pool mask can not be 0.\n");
+ RTE_ETHDEV_LOG(ERR,
+ "Invalid mirror pool, pool mask can not be 0\n");
return -EINVAL;
}
if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) &&
mirror_conf->vlan.vlan_mask == 0) {
- RTE_PMD_DEBUG_TRACE("Invalid vlan mask, vlan mask can not be 0.\n");
+ RTE_ETHDEV_LOG(ERR,
+ "Invalid vlan mask, vlan mask can not be 0\n");
return -EINVAL;
}
@@ -3341,7 +3245,7 @@ rte_eth_dev_callback_register(uint16_t port_id,
return -EINVAL;
if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) {
- ethdev_log(ERR, "Invalid port_id=%d", port_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid port_id=%d\n", port_id);
return -EINVAL;
}
@@ -3404,7 +3308,7 @@ rte_eth_dev_callback_unregister(uint16_t port_id,
return -EINVAL;
if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) {
- ethdev_log(ERR, "Invalid port_id=%d", port_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid port_id=%d\n", port_id);
return -EINVAL;
}
@@ -3498,13 +3402,13 @@ rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
dev = &rte_eth_devices[port_id];
if (!dev->intr_handle) {
- RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n");
+ RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n");
return -ENOTSUP;
}
intr_handle = dev->intr_handle;
if (!intr_handle->intr_vec) {
- RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
+ RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n");
return -EPERM;
}
@@ -3512,9 +3416,9 @@ rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
vec = intr_handle->intr_vec[qid];
rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
if (rc && rc != -EEXIST) {
- RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error"
- " op %d epfd %d vec %u\n",
- port_id, qid, op, epfd, vec);
+ RTE_ETHDEV_LOG(ERR,
+ "p %u q %u rx ctl error op %d epfd %d vec %u\n",
+ port_id, qid, op, epfd, vec);
}
}
@@ -3649,27 +3553,27 @@ rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id,
dev = &rte_eth_devices[port_id];
if (queue_id >= dev->data->nb_rx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
return -EINVAL;
}
if (!dev->intr_handle) {
- RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n");
+ RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n");
return -ENOTSUP;
}
intr_handle = dev->intr_handle;
if (!intr_handle->intr_vec) {
- RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n");
+ RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n");
return -EPERM;
}
vec = intr_handle->intr_vec[queue_id];
rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data);
if (rc && rc != -EEXIST) {
- RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error"
- " op %d epfd %d vec %u\n",
- port_id, queue_id, op, epfd, vec);
+ RTE_ETHDEV_LOG(ERR,
+ "p %u q %u rx ctl error op %d epfd %d vec %u\n",
+ port_id, queue_id, op, epfd, vec);
return rc;
}
@@ -3936,7 +3840,7 @@ rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
dev = &rte_eth_devices[port_id];
if (queue_id >= dev->data->nb_rx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
return -EINVAL;
}
@@ -3952,7 +3856,6 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
struct rte_eth_txq_info *qinfo)
{
struct rte_eth_dev *dev;
- struct rte_eth_txconf *txconf = &qinfo->conf;
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
@@ -3961,7 +3864,7 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
dev = &rte_eth_devices[port_id];
if (queue_id >= dev->data->nb_tx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
return -EINVAL;
}
@@ -3969,8 +3872,6 @@ rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
memset(qinfo, 0, sizeof(*qinfo));
dev->dev_ops->txq_info_get(dev, queue_id, qinfo);
- /* convert offload to txq_flags to support legacy app */
- rte_eth_convert_tx_offload(txconf->offloads, &txconf->txq_flags);
return 0;
}
@@ -4178,12 +4079,12 @@ rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id,
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
if (l2_tunnel == NULL) {
- RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid l2_tunnel parameter\n");
return -EINVAL;
}
if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) {
- RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
return -EINVAL;
}
@@ -4205,17 +4106,17 @@ rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id,
RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
if (l2_tunnel == NULL) {
- RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid l2_tunnel parameter\n");
return -EINVAL;
}
if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) {
- RTE_PMD_DEBUG_TRACE("Invalid tunnel type.\n");
+ RTE_ETHDEV_LOG(ERR, "Invalid tunnel type\n");
return -EINVAL;
}
if (mask == 0) {
- RTE_PMD_DEBUG_TRACE("Mask should have a value.\n");
+ RTE_ETHDEV_LOG(ERR, "Mask should have a value\n");
return -EINVAL;
}
@@ -4516,11 +4417,9 @@ parse_cleanup:
return result;
}
-RTE_INIT(ethdev_init_log);
-static void
-ethdev_init_log(void)
+RTE_INIT(ethdev_init_log)
{
- ethdev_logtype = rte_log_register("lib.ethdev");
- if (ethdev_logtype >= 0)
- rte_log_set_level(ethdev_logtype, RTE_LOG_INFO);
+ rte_eth_dev_logtype = rte_log_register("lib.ethdev");
+ if (rte_eth_dev_logtype >= 0)
+ rte_log_set_level(rte_eth_dev_logtype, RTE_LOG_INFO);
}
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index 36e3984e..7070e9ab 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -159,6 +159,11 @@ extern "C" {
#include "rte_eth_ctrl.h"
#include "rte_dev_info.h"
+extern int rte_eth_dev_logtype;
+
+#define RTE_ETHDEV_LOG(level, ...) \
+ rte_log(RTE_LOG_ ## level, rte_eth_dev_logtype, "" __VA_ARGS__)
+
struct rte_mbuf;
/**
@@ -321,7 +326,7 @@ enum rte_eth_tx_mq_mode {
struct rte_eth_rxmode {
/** The multi-queue packet distribution mode to be used, e.g. RSS. */
enum rte_eth_rx_mq_mode mq_mode;
- uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */
+ uint32_t max_rx_pkt_len; /**< Only used if JUMBO_FRAME enabled. */
uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/
/**
* Per-port Rx offloads to be set using DEV_RX_OFFLOAD_* flags.
@@ -329,33 +334,6 @@ struct rte_eth_rxmode {
* structure are allowed to be set.
*/
uint64_t offloads;
- __extension__
- /**
- * Below bitfield API is obsolete. Application should
- * enable per-port offloads using the offload field
- * above.
- */
- uint16_t header_split : 1, /**< Header Split enable. */
- hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */
- hw_vlan_filter : 1, /**< VLAN filter enable. */
- hw_vlan_strip : 1, /**< VLAN strip enable. */
- hw_vlan_extend : 1, /**< Extended VLAN enable. */
- jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */
- hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */
- enable_scatter : 1, /**< Enable scatter packets rx handler */
- enable_lro : 1, /**< Enable LRO */
- hw_timestamp : 1, /**< Enable HW timestamp */
- security : 1, /**< Enable rte_security offloads */
- /**
- * When set the offload bitfield should be ignored.
- * Instead per-port Rx offloads should be set on offloads
- * field above.
- * Per-queue offloads shuold be set on rte_eth_rxq_conf
- * structure.
- * This bit is temporary till rxmode bitfield offloads API will
- * be deprecated.
- */
- ignore_offload_bitfield : 1;
};
/**
@@ -702,28 +680,6 @@ struct rte_eth_rxconf {
uint64_t offloads;
};
-#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */
-#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */
-#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */
-#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */
-#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */
-#define ETH_TXQ_FLAGS_NOXSUMUDP 0x0400 /**< disable UDP checksum offload */
-#define ETH_TXQ_FLAGS_NOXSUMTCP 0x0800 /**< disable TCP checksum offload */
-#define ETH_TXQ_FLAGS_NOOFFLOADS \
- (ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \
- ETH_TXQ_FLAGS_NOXSUMUDP | ETH_TXQ_FLAGS_NOXSUMTCP)
-#define ETH_TXQ_FLAGS_NOXSUMS \
- (ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \
- ETH_TXQ_FLAGS_NOXSUMTCP)
-/**
- * When set the txq_flags should be ignored,
- * instead per-queue Tx offloads will be set on offloads field
- * located on rte_eth_txq_conf struct.
- * This flag is temporary till the rte_eth_txq_conf.txq_flags
- * API will be deprecated.
- */
-#define ETH_TXQ_FLAGS_IGNORE 0x8000
-
/**
* A structure used to configure a TX ring of an Ethernet port.
*/
@@ -733,7 +689,6 @@ struct rte_eth_txconf {
uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are
less free descriptors than this value. */
- uint32_t txq_flags; /**< Set flags for the Tx queue */
uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */
/**
* Per-queue Tx offloads to be set using DEV_TX_OFFLOAD_* flags.
@@ -939,6 +894,12 @@ struct rte_eth_conf {
#define DEV_RX_OFFLOAD_SCATTER 0x00002000
#define DEV_RX_OFFLOAD_TIMESTAMP 0x00004000
#define DEV_RX_OFFLOAD_SECURITY 0x00008000
+
+/**
+ * Invalid to set both DEV_RX_OFFLOAD_CRC_STRIP and DEV_RX_OFFLOAD_KEEP_CRC
+ * No DEV_RX_OFFLOAD_CRC_STRIP flag means keep CRC
+ */
+#define DEV_RX_OFFLOAD_KEEP_CRC 0x00010000
#define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \
DEV_RX_OFFLOAD_UDP_CKSUM | \
DEV_RX_OFFLOAD_TCP_CKSUM)
@@ -1003,8 +964,6 @@ struct rte_eth_conf {
* mentioned in rte_tx_offload_names in rte_ethdev.c file.
*/
-struct rte_pci_device;
-
/*
* Fallback default preferred Rx/Tx port parameters.
* These are used if an application requests default parameters
@@ -1194,14 +1153,14 @@ struct rte_eth_dcb_info {
/* Macros to check for valid port */
#define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \
if (!rte_eth_dev_is_valid_port(port_id)) { \
- RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \
+ RTE_ETHDEV_LOG(ERR, "Invalid port_id=%u\n", port_id); \
return retval; \
} \
} while (0)
#define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \
if (!rte_eth_dev_is_valid_port(port_id)) { \
- RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \
+ RTE_ETHDEV_LOG(ERR, "Invalid port_id=%u\n", port_id); \
return; \
} \
} while (0)
@@ -1472,6 +1431,7 @@ uint16_t __rte_experimental rte_eth_dev_count_total(void);
* @return
* 0 on success and port_id is filled, negative on error
*/
+__rte_deprecated
int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
/**
@@ -1487,6 +1447,7 @@ int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
* @return
* 0 on success and devname is filled, negative on error
*/
+__rte_deprecated
int rte_eth_dev_detach(uint16_t port_id, char *devname);
/**
@@ -1554,9 +1515,11 @@ const char * __rte_experimental rte_eth_dev_tx_offload_name(uint64_t offload);
* the [rt]x_offload_capa returned from rte_eth_dev_infos_get().
* Any type of device supported offloading set in the input argument
* eth_conf->[rt]xmode.offloads to rte_eth_dev_configure() is enabled
- * on all queues and it can't be disabled in rte_eth_[rt]x_queue_setup().
- * - the Receive Side Scaling (RSS) configuration when using multiple RX
- * queues per port.
+ * on all queues and it can't be disabled in rte_eth_[rt]x_queue_setup()
+ * - the Receive Side Scaling (RSS) configuration when using multiple RX
+ * queues per port. Any RSS hash function set in eth_conf->rss_conf.rss_hf
+ * must be within the flow_type_rss_offloads provided by drivers via
+ * rte_eth_dev_infos_get() API.
*
* Embedding all configuration information in a single data structure
* is the more flexible method that allows the addition of new features
@@ -1669,12 +1632,6 @@ int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
* The *tx_rs_thresh* value should be less or equal then
* *tx_free_thresh* value, and both of them should be less then
* *nb_tx_desc* - 3.
- * - The *txq_flags* member contains flags to pass to the TX queue setup
- * function to configure the behavior of the TX queue. This should be set
- * to 0 if no special configuration is required.
- * This API is obsolete and will be deprecated. Applications
- * should set it to ETH_TXQ_FLAGS_IGNORE and use
- * the offloads field below.
* - The *offloads* member contains Tx offloads to be enabled.
* If an offloading set in tx_conf->offloads
* hasn't been set in the input argument eth_conf->txmode.offloads
@@ -2003,6 +1960,15 @@ int rte_eth_stats_reset(uint16_t port_id);
/**
* Retrieve names of extended statistics of an Ethernet device.
*
+ * There is an assumption that 'xstat_names' and 'xstats' arrays are matched
+ * by array index:
+ * xstats_names[i].name => xstats[i].value
+ *
+ * And the array index is same with id field of 'struct rte_eth_xstat':
+ * xstats[i].id == i
+ *
+ * This assumption makes key-value pair matching less flexible but simpler.
+ *
* @param port_id
* The port identifier of the Ethernet device.
* @param xstats_names
@@ -2027,13 +1993,20 @@ int rte_eth_xstats_get_names(uint16_t port_id,
/**
* Retrieve extended statistics of an Ethernet device.
*
+ * There is an assumption that 'xstat_names' and 'xstats' arrays are matched
+ * by array index:
+ * xstats_names[i].name => xstats[i].value
+ *
+ * And the array index is same with id field of 'struct rte_eth_xstat':
+ * xstats[i].id == i
+ *
+ * This assumption makes key-value pair matching less flexible but simpler.
+ *
* @param port_id
* The port identifier of the Ethernet device.
* @param xstats
* A pointer to a table of structure of type *rte_eth_xstat*
- * to be filled with device statistics ids and values: id is the
- * index of the name string in xstats_names (see rte_eth_xstats_get_names()),
- * and value is the statistic counter.
+ * to be filled with device statistics ids and values.
* This parameter can be set to NULL if n is 0.
* @param n
* The size of the xstats array (number of elements).
@@ -2144,7 +2117,7 @@ void rte_eth_xstats_reset(uint16_t port_id);
* @param stat_idx
* The per-queue packet statistics functionality number that the transmit
* queue is to be assigned.
- * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ * The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1].
* @return
* Zero if successful. Non-zero otherwise.
*/
@@ -2164,7 +2137,7 @@ int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id,
* @param stat_idx
* The per-queue packet statistics functionality number that the receive
* queue is to be assigned.
- * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ * The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1].
* @return
* Zero if successful. Non-zero otherwise.
*/
@@ -3656,11 +3629,11 @@ rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id,
uint8_t en);
/**
-* Get the port id from pci address or device name
-* Example:
-* - PCIe, 0000:2:00.0
-* - SoC, fsl-gmac0
-* - vdev, net_pcap0
+* Get the port id from device name. The device name should be specified
+* as below:
+* - PCIe address (Domain:Bus:Device.Function), for example- 0000:2:00.0
+* - SoC device name, for example- fsl-gmac0
+* - vdev dpdk name, for example- net_[pcap0|null0|tap0]
*
* @param name
* pci address or name of the device
@@ -3674,11 +3647,10 @@ int
rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id);
/**
-* Get the device name from port id
-* Example:
-* - PCIe Bus:Domain:Function, 0000:02:00.0
-* - SoC device name, fsl-gmac0
-* - vdev dpdk name, net_[pcap0|null0|tun0|tap0]
+* Get the device name from port id. The device name is specified as below:
+* - PCIe address (Domain:Bus:Device.Function), for example- 0000:02:00.0
+* - SoC device name, for example- fsl-gmac0
+* - vdev dpdk name, for example- net_[pcap0|null0|tun0|tap0]
*
* @param port_id
* Port identifier of the device.
@@ -3837,7 +3809,7 @@ rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id,
RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0);
if (queue_id >= dev->data->nb_rx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
return 0;
}
#endif
@@ -4070,7 +4042,7 @@ static inline int rte_eth_tx_descriptor_status(uint16_t port_id,
*
* If the PMD is DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can
* invoke this function concurrently on the same tx queue without SW lock.
- * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags
+ * @see rte_eth_dev_info_get, struct rte_eth_txconf::offloads
*
* @see rte_eth_tx_prepare to perform some prior checks or adjustments
* for offloads.
@@ -4103,7 +4075,7 @@ rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id,
RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0);
if (queue_id >= dev->data->nb_tx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
return 0;
}
#endif
@@ -4189,7 +4161,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
if (!rte_eth_dev_is_valid_port(port_id)) {
- RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid TX port_id=%u\n", port_id);
rte_errno = -EINVAL;
return 0;
}
@@ -4199,7 +4171,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id,
#ifdef RTE_LIBRTE_ETHDEV_DEBUG
if (queue_id >= dev->data->nb_tx_queues) {
- RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+ RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
rte_errno = -EINVAL;
return 0;
}
diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h
index c9c825e3..c6d9bc1a 100644
--- a/lib/librte_ethdev/rte_ethdev_driver.h
+++ b/lib/librte_ethdev/rte_ethdev_driver.h
@@ -38,7 +38,6 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
* to that slot for the driver to use.
*
* @param name Unique identifier name for each Ethernet device
- * @param type Device type of this Ethernet device
* @return
* - Slot in the rte_dev_devices array for a new device;
*/
@@ -325,6 +324,32 @@ typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev);
int __rte_experimental
rte_eth_dev_destroy(struct rte_eth_dev *ethdev, ethdev_uninit_t ethdev_uninit);
+/**
+ * PMD helper function to check if keeping CRC is requested
+ *
+ * @note
+ * When CRC_STRIP offload flag is removed and default behavior switch to
+ * strip CRC, as planned, this helper function is not that useful and will be
+ * removed. In PMDs this function will be replaced with check:
+ * if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
+ *
+ * @param rx_offloads
+ * offload bits to be applied
+ *
+ * @return
+ * Return positive if keeping CRC is requested,
+ * zero if stripping CRC is requested
+ */
+static inline int
+rte_eth_dev_must_keep_crc(uint64_t rx_offloads)
+{
+ if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP)
+ return 0;
+
+ /* no KEEP_CRC or CRC_STRIP offload flags means keep CRC */
+ return 1;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ethdev/rte_ethdev_pci.h b/lib/librte_ethdev/rte_ethdev_pci.h
index 2cfd3727..f652596f 100644
--- a/lib/librte_ethdev/rte_ethdev_pci.h
+++ b/lib/librte_ethdev/rte_ethdev_pci.h
@@ -53,8 +53,8 @@ rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev,
struct rte_pci_device *pci_dev)
{
if ((eth_dev == NULL) || (pci_dev == NULL)) {
- RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n",
- eth_dev, pci_dev);
+ RTE_ETHDEV_LOG(ERR, "NULL pointer eth_dev=%p pci_dev=%p",
+ (void *)eth_dev, (void *)pci_dev);
return;
}
diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map
index 40cf42b8..38f117f0 100644
--- a/lib/librte_ethdev/rte_ethdev_version.map
+++ b/lib/librte_ethdev/rte_ethdev_version.map
@@ -213,6 +213,13 @@ DPDK_18.05 {
} DPDK_18.02;
+DPDK_18.08 {
+ global:
+
+ rte_eth_dev_logtype;
+
+} DPDK_18.05;
+
EXPERIMENTAL {
global:
@@ -232,6 +239,7 @@ EXPERIMENTAL {
rte_eth_dev_tx_offload_name;
rte_eth_switch_domain_alloc;
rte_eth_switch_domain_free;
+ rte_flow_expand_rss;
rte_mtr_capabilities_get;
rte_mtr_create;
rte_mtr_destroy;
diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c
index b2afba08..cff4b520 100644
--- a/lib/librte_ethdev/rte_flow.c
+++ b/lib/librte_ethdev/rte_flow.c
@@ -84,7 +84,7 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
MK_FLOW_ACTION(FLAG, 0),
MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
MK_FLOW_ACTION(DROP, 0),
- MK_FLOW_ACTION(COUNT, 0),
+ MK_FLOW_ACTION(COUNT, sizeof(struct rte_flow_action_count)),
MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)),
MK_FLOW_ACTION(PF, 0),
MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
@@ -526,3 +526,110 @@ store:
}
return 0;
}
+
+/**
+ * Expand RSS flows into several possible flows according to the RSS hash
+ * fields requested and the driver capabilities.
+ */
+int __rte_experimental
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+ const struct rte_flow_item *pattern, uint64_t types,
+ const struct rte_flow_expand_node graph[],
+ int graph_root_index)
+{
+ const int elt_n = 8;
+ const struct rte_flow_item *item;
+ const struct rte_flow_expand_node *node = &graph[graph_root_index];
+ const int *next_node;
+ const int *stack[elt_n];
+ int stack_pos = 0;
+ struct rte_flow_item flow_items[elt_n];
+ unsigned int i;
+ size_t lsize;
+ size_t user_pattern_size = 0;
+ void *addr = NULL;
+
+ lsize = offsetof(struct rte_flow_expand_rss, entry) +
+ elt_n * sizeof(buf->entry[0]);
+ if (lsize <= size) {
+ buf->entry[0].priority = 0;
+ buf->entry[0].pattern = (void *)&buf->entry[elt_n];
+ buf->entries = 0;
+ addr = buf->entry[0].pattern;
+ }
+ for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+ const struct rte_flow_expand_node *next = NULL;
+
+ for (i = 0; node->next && node->next[i]; ++i) {
+ next = &graph[node->next[i]];
+ if (next->type == item->type)
+ break;
+ }
+ if (next)
+ node = next;
+ user_pattern_size += sizeof(*item);
+ }
+ user_pattern_size += sizeof(*item); /* Handle END item. */
+ lsize += user_pattern_size;
+ /* Copy the user pattern in the first entry of the buffer. */
+ if (lsize <= size) {
+ rte_memcpy(addr, pattern, user_pattern_size);
+ addr = (void *)(((uintptr_t)addr) + user_pattern_size);
+ buf->entries = 1;
+ }
+ /* Start expanding. */
+ memset(flow_items, 0, sizeof(flow_items));
+ user_pattern_size -= sizeof(*item);
+ next_node = node->next;
+ stack[stack_pos] = next_node;
+ node = next_node ? &graph[*next_node] : NULL;
+ while (node) {
+ flow_items[stack_pos].type = node->type;
+ if (node->rss_types & types) {
+ /*
+ * compute the number of items to copy from the
+ * expansion and copy it.
+ * When the stack_pos is 0, there are 1 element in it,
+ * plus the addition END item.
+ */
+ int elt = stack_pos + 2;
+
+ flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
+ lsize += elt * sizeof(*item) + user_pattern_size;
+ if (lsize <= size) {
+ size_t n = elt * sizeof(*item);
+
+ buf->entry[buf->entries].priority =
+ stack_pos + 1;
+ buf->entry[buf->entries].pattern = addr;
+ buf->entries++;
+ rte_memcpy(addr, buf->entry[0].pattern,
+ user_pattern_size);
+ addr = (void *)(((uintptr_t)addr) +
+ user_pattern_size);
+ rte_memcpy(addr, flow_items, n);
+ addr = (void *)(((uintptr_t)addr) + n);
+ }
+ }
+ /* Go deeper. */
+ if (node->next) {
+ next_node = node->next;
+ if (stack_pos++ == elt_n) {
+ rte_errno = E2BIG;
+ return -rte_errno;
+ }
+ stack[stack_pos] = next_node;
+ } else if (*(next_node + 1)) {
+ /* Follow up with the next possibility. */
+ ++next_node;
+ } else {
+ /* Move to the next path. */
+ if (stack_pos)
+ next_node = stack[--stack_pos];
+ next_node++;
+ stack[stack_pos] = next_node;
+ }
+ node = *next_node ? &graph[*next_node] : NULL;
+ };
+ return lsize;
+}
diff --git a/lib/librte_ethdev/rte_flow_driver.h b/lib/librte_ethdev/rte_flow_driver.h
index 1c90c600..688f7230 100644
--- a/lib/librte_ethdev/rte_flow_driver.h
+++ b/lib/librte_ethdev/rte_flow_driver.h
@@ -114,6 +114,69 @@ struct rte_flow_ops {
const struct rte_flow_ops *
rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
+/** Helper macro to build input graph for rte_flow_expand_rss(). */
+#define RTE_FLOW_EXPAND_RSS_NEXT(...) \
+ (const int []){ \
+ __VA_ARGS__, 0, \
+ }
+
+/** Node object of input graph for rte_flow_expand_rss(). */
+struct rte_flow_expand_node {
+ const int *const next;
+ /**<
+ * List of next node indexes. Index 0 is interpreted as a terminator.
+ */
+ const enum rte_flow_item_type type;
+ /**< Pattern item type of current node. */
+ uint64_t rss_types;
+ /**<
+ * RSS types bit-field associated with this node
+ * (see ETH_RSS_* definitions).
+ */
+};
+
+/** Object returned by rte_flow_expand_rss(). */
+struct rte_flow_expand_rss {
+ uint32_t entries;
+ /**< Number of entries @p patterns and @p priorities. */
+ struct {
+ struct rte_flow_item *pattern; /**< Expanded pattern array. */
+ uint32_t priority; /**< Priority offset for each expansion. */
+ } entry[];
+};
+
+/**
+ * Expand RSS flows into several possible flows according to the RSS hash
+ * fields requested and the driver capabilities.
+ *
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * @param[out] buf
+ * Buffer to store the result expansion.
+ * @param[in] size
+ * Buffer size in bytes. If 0, @p buf can be NULL.
+ * @param[in] pattern
+ * User flow pattern.
+ * @param[in] types
+ * RSS types to expand (see ETH_RSS_* definitions).
+ * @param[in] graph
+ * Input graph to expand @p pattern according to @p types.
+ * @param[in] graph_root_index
+ * Index of root node in @p graph, typically 0.
+ *
+ * @return
+ * A positive value representing the size of @p buf in bytes regardless of
+ * @p size on success, a negative errno value otherwise and rte_errno is
+ * set, the following errors are defined:
+ *
+ * -E2BIG: graph-depth @p graph is too deep.
+ */
+int __rte_experimental
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+ const struct rte_flow_item *pattern, uint64_t types,
+ const struct rte_flow_expand_node graph[],
+ int graph_root_index);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_ethdev/rte_tm.h b/lib/librte_ethdev/rte_tm.h
index 72554038..955f02ff 100644
--- a/lib/librte_ethdev/rte_tm.h
+++ b/lib/librte_ethdev/rte_tm.h
@@ -1569,6 +1569,10 @@ rte_tm_hierarchy_commit(uint16_t port_id,
/**
* Traffic manager node parent update
*
+ * This function may be used to move a node and its children to a different
+ * parent. Additionally, if the new parent is the same as the current parent,
+ * this function will update the priority/weight of an existing node.
+ *
* Restriction for root node: its parent cannot be changed.
*
* This function can only be called after the rte_tm_hierarchy_commit()
diff --git a/lib/librte_eventdev/Makefile b/lib/librte_eventdev/Makefile
index b3e25464..47f599a6 100644
--- a/lib/librte_eventdev/Makefile
+++ b/lib/librte_eventdev/Makefile
@@ -8,14 +8,19 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_eventdev.a
# library version
-LIBABIVER := 4
+LIBABIVER := 5
# build flags
CFLAGS += -DALLOW_EXPERIMENTAL_API
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+CFLAGS += -DLINUX
+else
+CFLAGS += -DBSD
+endif
LDLIBS += -lrte_eal -lrte_ring -lrte_ethdev -lrte_hash -lrte_mempool -lrte_timer
-LDLIBS += -lrte_mbuf -lrte_cryptodev
+LDLIBS += -lrte_mbuf -lrte_cryptodev -lpthread
# library source files
SRCS-y += rte_eventdev.c
diff --git a/lib/librte_eventdev/meson.build b/lib/librte_eventdev/meson.build
index bd138bd5..3cbaf298 100644
--- a/lib/librte_eventdev/meson.build
+++ b/lib/librte_eventdev/meson.build
@@ -1,8 +1,15 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation
-version = 4
+version = 5
allow_experimental_apis = true
+
+if host_machine.system() == 'linux'
+ cflags += '-DLINUX'
+else
+ cflags += '-DBSD'
+endif
+
sources = files('rte_eventdev.c',
'rte_event_ring.c',
'rte_event_eth_rx_adapter.c',
diff --git a/lib/librte_eventdev/rte_event_crypto_adapter.c b/lib/librte_eventdev/rte_event_crypto_adapter.c
index ba63a87b..11b28ca9 100644
--- a/lib/librte_eventdev/rte_event_crypto_adapter.c
+++ b/lib/librte_eventdev/rte_event_crypto_adapter.c
@@ -342,7 +342,7 @@ eca_enq_to_cryptodev(struct rte_event_crypto_adapter *adapter,
if (crypto_op == NULL)
continue;
if (crypto_op->sess_type == RTE_CRYPTO_OP_WITH_SESSION) {
- m_data = rte_cryptodev_sym_session_get_private_data(
+ m_data = rte_cryptodev_sym_session_get_user_data(
crypto_op->sym->session);
if (m_data == NULL) {
rte_pktmbuf_free(crypto_op->sym->m_src);
@@ -512,7 +512,7 @@ eca_ops_enqueue_burst(struct rte_event_crypto_adapter *adapter,
for (i = 0; i < num; i++) {
struct rte_event *ev = &events[nb_ev++];
if (ops[i]->sess_type == RTE_CRYPTO_OP_WITH_SESSION) {
- m_data = rte_cryptodev_sym_session_get_private_data(
+ m_data = rte_cryptodev_sym_session_get_user_data(
ops[i]->sym->session);
} else if (ops[i]->sess_type == RTE_CRYPTO_OP_SESSIONLESS &&
ops[i]->private_data_offset) {
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
index 6f705095..f5e5a0b5 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -2,6 +2,11 @@
* Copyright(c) 2017 Intel Corporation.
* All rights reserved.
*/
+#if defined(LINUX)
+#include <sys/epoll.h>
+#endif
+#include <unistd.h>
+
#include <rte_cycles.h>
#include <rte_common.h>
#include <rte_dev.h>
@@ -11,6 +16,7 @@
#include <rte_malloc.h>
#include <rte_service_component.h>
#include <rte_thash.h>
+#include <rte_interrupts.h>
#include "rte_eventdev.h"
#include "rte_eventdev_pmd.h"
@@ -24,6 +30,22 @@
#define ETH_RX_ADAPTER_MEM_NAME_LEN 32
#define RSS_KEY_SIZE 40
+/* value written to intr thread pipe to signal thread exit */
+#define ETH_BRIDGE_INTR_THREAD_EXIT 1
+/* Sentinel value to detect initialized file handle */
+#define INIT_FD -1
+
+/*
+ * Used to store port and queue ID of interrupting Rx queue
+ */
+union queue_data {
+ RTE_STD_C11
+ void *ptr;
+ struct {
+ uint16_t port;
+ uint16_t queue;
+ };
+};
/*
* There is an instance of this struct per polled Rx queue added to the
@@ -75,6 +97,30 @@ struct rte_event_eth_rx_adapter {
uint16_t enq_block_count;
/* Block start ts */
uint64_t rx_enq_block_start_ts;
+ /* epoll fd used to wait for Rx interrupts */
+ int epd;
+ /* Num of interrupt driven interrupt queues */
+ uint32_t num_rx_intr;
+ /* Used to send <dev id, queue id> of interrupting Rx queues from
+ * the interrupt thread to the Rx thread
+ */
+ struct rte_ring *intr_ring;
+ /* Rx Queue data (dev id, queue id) for the last non-empty
+ * queue polled
+ */
+ union queue_data qd;
+ /* queue_data is valid */
+ int qd_valid;
+ /* Interrupt ring lock, synchronizes Rx thread
+ * and interrupt thread
+ */
+ rte_spinlock_t intr_ring_lock;
+ /* event array passed to rte_poll_wait */
+ struct rte_epoll_event *epoll_events;
+ /* Count of interrupt vectors in use */
+ uint32_t num_intr_vec;
+ /* Thread blocked on Rx interrupts */
+ pthread_t rx_intr_thread;
/* Configuration callback for rte_service configuration */
rte_event_eth_rx_adapter_conf_cb conf_cb;
/* Configuration callback argument */
@@ -91,12 +137,20 @@ struct rte_event_eth_rx_adapter {
int socket_id;
/* Per adapter EAL service */
uint32_t service_id;
+ /* Adapter started flag */
+ uint8_t rxa_started;
+ /* Adapter ID */
+ uint8_t id;
} __rte_cache_aligned;
/* Per eth device */
struct eth_device_info {
struct rte_eth_dev *dev;
struct eth_rx_queue_info *rx_queue;
+ /* Rx callback */
+ rte_event_eth_rx_adapter_cb_fn cb_fn;
+ /* Rx callback argument */
+ void *cb_arg;
/* Set if ethdev->eventdev packet transfer uses a
* hardware mechanism
*/
@@ -107,15 +161,42 @@ struct eth_device_info {
* rx_adapter_stop callback needs to be invoked
*/
uint8_t dev_rx_started;
- /* If nb_dev_queues > 0, the start callback will
+ /* Number of queues added for this device */
+ uint16_t nb_dev_queues;
+ /* Number of poll based queues
+ * If nb_rx_poll > 0, the start callback will
* be invoked if not already invoked
*/
- uint16_t nb_dev_queues;
+ uint16_t nb_rx_poll;
+ /* Number of interrupt based queues
+ * If nb_rx_intr > 0, the start callback will
+ * be invoked if not already invoked.
+ */
+ uint16_t nb_rx_intr;
+ /* Number of queues that use the shared interrupt */
+ uint16_t nb_shared_intr;
+ /* sum(wrr(q)) for all queues within the device
+ * useful when deleting all device queues
+ */
+ uint32_t wrr_len;
+ /* Intr based queue index to start polling from, this is used
+ * if the number of shared interrupts is non-zero
+ */
+ uint16_t next_q_idx;
+ /* Intr based queue indices */
+ uint16_t *intr_queue;
+ /* device generates per Rx queue interrupt for queue index
+ * for queue indices < RTE_MAX_RXTX_INTR_VEC_ID - 1
+ */
+ int multi_intr_cap;
+ /* shared interrupt enabled */
+ int shared_intr_enabled;
};
/* Per Rx queue */
struct eth_rx_queue_info {
int queue_enabled; /* True if added */
+ int intr_enabled;
uint16_t wt; /* Polling weight */
uint8_t event_queue_id; /* Event queue to enqueue packets to */
uint8_t sched_type; /* Sched type for events */
@@ -127,30 +208,30 @@ struct eth_rx_queue_info {
static struct rte_event_eth_rx_adapter **event_eth_rx_adapter;
static inline int
-valid_id(uint8_t id)
+rxa_validate_id(uint8_t id)
{
return id < RTE_EVENT_ETH_RX_ADAPTER_MAX_INSTANCE;
}
#define RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) do { \
- if (!valid_id(id)) { \
+ if (!rxa_validate_id(id)) { \
RTE_EDEV_LOG_ERR("Invalid eth Rx adapter id = %d\n", id); \
return retval; \
} \
} while (0)
static inline int
-sw_rx_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_sw_adapter_queue_count(struct rte_event_eth_rx_adapter *rx_adapter)
{
- return rx_adapter->num_rx_polled;
+ return rx_adapter->num_rx_polled + rx_adapter->num_rx_intr;
}
/* Greatest common divisor */
-static uint16_t gcd_u16(uint16_t a, uint16_t b)
+static uint16_t rxa_gcd_u16(uint16_t a, uint16_t b)
{
uint16_t r = a % b;
- return r ? gcd_u16(b, r) : b;
+ return r ? rxa_gcd_u16(b, r) : b;
}
/* Returns the next queue in the polling sequence
@@ -158,7 +239,7 @@ static uint16_t gcd_u16(uint16_t a, uint16_t b)
* http://kb.linuxvirtualserver.org/wiki/Weighted_Round-Robin_Scheduling
*/
static int
-wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
unsigned int n, int *cw,
struct eth_rx_poll_entry *eth_rx_poll, uint16_t max_wt,
uint16_t gcd, int prev)
@@ -186,13 +267,298 @@ wrr_next(struct rte_event_eth_rx_adapter *rx_adapter,
}
}
-/* Precalculate WRR polling sequence for all queues in rx_adapter */
+static inline int
+rxa_shared_intr(struct eth_device_info *dev_info,
+ int rx_queue_id)
+{
+ int multi_intr_cap;
+
+ if (dev_info->dev->intr_handle == NULL)
+ return 0;
+
+ multi_intr_cap = rte_intr_cap_multiple(dev_info->dev->intr_handle);
+ return !multi_intr_cap ||
+ rx_queue_id >= RTE_MAX_RXTX_INTR_VEC_ID - 1;
+}
+
+static inline int
+rxa_intr_queue(struct eth_device_info *dev_info,
+ int rx_queue_id)
+{
+ struct eth_rx_queue_info *queue_info;
+
+ queue_info = &dev_info->rx_queue[rx_queue_id];
+ return dev_info->rx_queue &&
+ !dev_info->internal_event_port &&
+ queue_info->queue_enabled && queue_info->wt == 0;
+}
+
+static inline int
+rxa_polled_queue(struct eth_device_info *dev_info,
+ int rx_queue_id)
+{
+ struct eth_rx_queue_info *queue_info;
+
+ queue_info = &dev_info->rx_queue[rx_queue_id];
+ return !dev_info->internal_event_port &&
+ dev_info->rx_queue &&
+ queue_info->queue_enabled && queue_info->wt != 0;
+}
+
+/* Calculate change in number of vectors after Rx queue ID is add/deleted */
static int
-eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_nb_intr_vect(struct eth_device_info *dev_info, int rx_queue_id, int add)
+{
+ uint16_t i;
+ int n, s;
+ uint16_t nbq;
+
+ nbq = dev_info->dev->data->nb_rx_queues;
+ n = 0; /* non shared count */
+ s = 0; /* shared count */
+
+ if (rx_queue_id == -1) {
+ for (i = 0; i < nbq; i++) {
+ if (!rxa_shared_intr(dev_info, i))
+ n += add ? !rxa_intr_queue(dev_info, i) :
+ rxa_intr_queue(dev_info, i);
+ else
+ s += add ? !rxa_intr_queue(dev_info, i) :
+ rxa_intr_queue(dev_info, i);
+ }
+
+ if (s > 0) {
+ if ((add && dev_info->nb_shared_intr == 0) ||
+ (!add && dev_info->nb_shared_intr))
+ n += 1;
+ }
+ } else {
+ if (!rxa_shared_intr(dev_info, rx_queue_id))
+ n = add ? !rxa_intr_queue(dev_info, rx_queue_id) :
+ rxa_intr_queue(dev_info, rx_queue_id);
+ else
+ n = add ? !dev_info->nb_shared_intr :
+ dev_info->nb_shared_intr == 1;
+ }
+
+ return add ? n : -n;
+}
+
+/* Calculate nb_rx_intr after deleting interrupt mode rx queues
+ */
+static void
+rxa_calc_nb_post_intr_del(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int rx_queue_id,
+ uint32_t *nb_rx_intr)
+{
+ uint32_t intr_diff;
+
+ if (rx_queue_id == -1)
+ intr_diff = dev_info->nb_rx_intr;
+ else
+ intr_diff = rxa_intr_queue(dev_info, rx_queue_id);
+
+ *nb_rx_intr = rx_adapter->num_rx_intr - intr_diff;
+}
+
+/* Calculate nb_rx_* after adding interrupt mode rx queues, newly added
+ * interrupt queues could currently be poll mode Rx queues
+ */
+static void
+rxa_calc_nb_post_add_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int rx_queue_id,
+ uint32_t *nb_rx_poll,
+ uint32_t *nb_rx_intr,
+ uint32_t *nb_wrr)
+{
+ uint32_t intr_diff;
+ uint32_t poll_diff;
+ uint32_t wrr_len_diff;
+
+ if (rx_queue_id == -1) {
+ intr_diff = dev_info->dev->data->nb_rx_queues -
+ dev_info->nb_rx_intr;
+ poll_diff = dev_info->nb_rx_poll;
+ wrr_len_diff = dev_info->wrr_len;
+ } else {
+ intr_diff = !rxa_intr_queue(dev_info, rx_queue_id);
+ poll_diff = rxa_polled_queue(dev_info, rx_queue_id);
+ wrr_len_diff = poll_diff ? dev_info->rx_queue[rx_queue_id].wt :
+ 0;
+ }
+
+ *nb_rx_intr = rx_adapter->num_rx_intr + intr_diff;
+ *nb_rx_poll = rx_adapter->num_rx_polled - poll_diff;
+ *nb_wrr = rx_adapter->wrr_len - wrr_len_diff;
+}
+
+/* Calculate size of the eth_rx_poll and wrr_sched arrays
+ * after deleting poll mode rx queues
+ */
+static void
+rxa_calc_nb_post_poll_del(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int rx_queue_id,
+ uint32_t *nb_rx_poll,
+ uint32_t *nb_wrr)
+{
+ uint32_t poll_diff;
+ uint32_t wrr_len_diff;
+
+ if (rx_queue_id == -1) {
+ poll_diff = dev_info->nb_rx_poll;
+ wrr_len_diff = dev_info->wrr_len;
+ } else {
+ poll_diff = rxa_polled_queue(dev_info, rx_queue_id);
+ wrr_len_diff = poll_diff ? dev_info->rx_queue[rx_queue_id].wt :
+ 0;
+ }
+
+ *nb_rx_poll = rx_adapter->num_rx_polled - poll_diff;
+ *nb_wrr = rx_adapter->wrr_len - wrr_len_diff;
+}
+
+/* Calculate nb_rx_* after adding poll mode rx queues
+ */
+static void
+rxa_calc_nb_post_add_poll(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int rx_queue_id,
+ uint16_t wt,
+ uint32_t *nb_rx_poll,
+ uint32_t *nb_rx_intr,
+ uint32_t *nb_wrr)
+{
+ uint32_t intr_diff;
+ uint32_t poll_diff;
+ uint32_t wrr_len_diff;
+
+ if (rx_queue_id == -1) {
+ intr_diff = dev_info->nb_rx_intr;
+ poll_diff = dev_info->dev->data->nb_rx_queues -
+ dev_info->nb_rx_poll;
+ wrr_len_diff = wt*dev_info->dev->data->nb_rx_queues
+ - dev_info->wrr_len;
+ } else {
+ intr_diff = rxa_intr_queue(dev_info, rx_queue_id);
+ poll_diff = !rxa_polled_queue(dev_info, rx_queue_id);
+ wrr_len_diff = rxa_polled_queue(dev_info, rx_queue_id) ?
+ wt - dev_info->rx_queue[rx_queue_id].wt :
+ wt;
+ }
+
+ *nb_rx_poll = rx_adapter->num_rx_polled + poll_diff;
+ *nb_rx_intr = rx_adapter->num_rx_intr - intr_diff;
+ *nb_wrr = rx_adapter->wrr_len + wrr_len_diff;
+}
+
+/* Calculate nb_rx_* after adding rx_queue_id */
+static void
+rxa_calc_nb_post_add(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int rx_queue_id,
+ uint16_t wt,
+ uint32_t *nb_rx_poll,
+ uint32_t *nb_rx_intr,
+ uint32_t *nb_wrr)
+{
+ if (wt != 0)
+ rxa_calc_nb_post_add_poll(rx_adapter, dev_info, rx_queue_id,
+ wt, nb_rx_poll, nb_rx_intr, nb_wrr);
+ else
+ rxa_calc_nb_post_add_intr(rx_adapter, dev_info, rx_queue_id,
+ nb_rx_poll, nb_rx_intr, nb_wrr);
+}
+
+/* Calculate nb_rx_* after deleting rx_queue_id */
+static void
+rxa_calc_nb_post_del(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int rx_queue_id,
+ uint32_t *nb_rx_poll,
+ uint32_t *nb_rx_intr,
+ uint32_t *nb_wrr)
+{
+ rxa_calc_nb_post_poll_del(rx_adapter, dev_info, rx_queue_id, nb_rx_poll,
+ nb_wrr);
+ rxa_calc_nb_post_intr_del(rx_adapter, dev_info, rx_queue_id,
+ nb_rx_intr);
+}
+
+/*
+ * Allocate the rx_poll array
+ */
+static struct eth_rx_poll_entry *
+rxa_alloc_poll(struct rte_event_eth_rx_adapter *rx_adapter,
+ uint32_t num_rx_polled)
+{
+ size_t len;
+
+ len = RTE_ALIGN(num_rx_polled * sizeof(*rx_adapter->eth_rx_poll),
+ RTE_CACHE_LINE_SIZE);
+ return rte_zmalloc_socket(rx_adapter->mem_name,
+ len,
+ RTE_CACHE_LINE_SIZE,
+ rx_adapter->socket_id);
+}
+
+/*
+ * Allocate the WRR array
+ */
+static uint32_t *
+rxa_alloc_wrr(struct rte_event_eth_rx_adapter *rx_adapter, int nb_wrr)
+{
+ size_t len;
+
+ len = RTE_ALIGN(nb_wrr * sizeof(*rx_adapter->wrr_sched),
+ RTE_CACHE_LINE_SIZE);
+ return rte_zmalloc_socket(rx_adapter->mem_name,
+ len,
+ RTE_CACHE_LINE_SIZE,
+ rx_adapter->socket_id);
+}
+
+static int
+rxa_alloc_poll_arrays(struct rte_event_eth_rx_adapter *rx_adapter,
+ uint32_t nb_poll,
+ uint32_t nb_wrr,
+ struct eth_rx_poll_entry **rx_poll,
+ uint32_t **wrr_sched)
+{
+
+ if (nb_poll == 0) {
+ *rx_poll = NULL;
+ *wrr_sched = NULL;
+ return 0;
+ }
+
+ *rx_poll = rxa_alloc_poll(rx_adapter, nb_poll);
+ if (*rx_poll == NULL) {
+ *wrr_sched = NULL;
+ return -ENOMEM;
+ }
+
+ *wrr_sched = rxa_alloc_wrr(rx_adapter, nb_wrr);
+ if (*wrr_sched == NULL) {
+ rte_free(*rx_poll);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/* Precalculate WRR polling sequence for all queues in rx_adapter */
+static void
+rxa_calc_wrr_sequence(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_rx_poll_entry *rx_poll,
+ uint32_t *rx_wrr)
{
uint16_t d;
uint16_t q;
unsigned int i;
+ int prev = -1;
+ int cw = -1;
/* Initialize variables for calculation of wrr schedule */
uint16_t max_wrr_pos = 0;
@@ -200,79 +566,52 @@ eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter)
uint16_t max_wt = 0;
uint16_t gcd = 0;
- struct eth_rx_poll_entry *rx_poll = NULL;
- uint32_t *rx_wrr = NULL;
+ if (rx_poll == NULL)
+ return;
- if (rx_adapter->num_rx_polled) {
- size_t len = RTE_ALIGN(rx_adapter->num_rx_polled *
- sizeof(*rx_adapter->eth_rx_poll),
- RTE_CACHE_LINE_SIZE);
- rx_poll = rte_zmalloc_socket(rx_adapter->mem_name,
- len,
- RTE_CACHE_LINE_SIZE,
- rx_adapter->socket_id);
- if (rx_poll == NULL)
- return -ENOMEM;
+ /* Generate array of all queues to poll, the size of this
+ * array is poll_q
+ */
+ RTE_ETH_FOREACH_DEV(d) {
+ uint16_t nb_rx_queues;
+ struct eth_device_info *dev_info =
+ &rx_adapter->eth_devices[d];
+ nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+ if (dev_info->rx_queue == NULL)
+ continue;
+ if (dev_info->internal_event_port)
+ continue;
+ dev_info->wrr_len = 0;
+ for (q = 0; q < nb_rx_queues; q++) {
+ struct eth_rx_queue_info *queue_info =
+ &dev_info->rx_queue[q];
+ uint16_t wt;
- /* Generate array of all queues to poll, the size of this
- * array is poll_q
- */
- RTE_ETH_FOREACH_DEV(d) {
- uint16_t nb_rx_queues;
- struct eth_device_info *dev_info =
- &rx_adapter->eth_devices[d];
- nb_rx_queues = dev_info->dev->data->nb_rx_queues;
- if (dev_info->rx_queue == NULL)
+ if (!rxa_polled_queue(dev_info, q))
continue;
- for (q = 0; q < nb_rx_queues; q++) {
- struct eth_rx_queue_info *queue_info =
- &dev_info->rx_queue[q];
- if (queue_info->queue_enabled == 0)
- continue;
-
- uint16_t wt = queue_info->wt;
- rx_poll[poll_q].eth_dev_id = d;
- rx_poll[poll_q].eth_rx_qid = q;
- max_wrr_pos += wt;
- max_wt = RTE_MAX(max_wt, wt);
- gcd = (gcd) ? gcd_u16(gcd, wt) : wt;
- poll_q++;
- }
- }
-
- len = RTE_ALIGN(max_wrr_pos * sizeof(*rx_wrr),
- RTE_CACHE_LINE_SIZE);
- rx_wrr = rte_zmalloc_socket(rx_adapter->mem_name,
- len,
- RTE_CACHE_LINE_SIZE,
- rx_adapter->socket_id);
- if (rx_wrr == NULL) {
- rte_free(rx_poll);
- return -ENOMEM;
- }
-
- /* Generate polling sequence based on weights */
- int prev = -1;
- int cw = -1;
- for (i = 0; i < max_wrr_pos; i++) {
- rx_wrr[i] = wrr_next(rx_adapter, poll_q, &cw,
- rx_poll, max_wt, gcd, prev);
- prev = rx_wrr[i];
+ wt = queue_info->wt;
+ rx_poll[poll_q].eth_dev_id = d;
+ rx_poll[poll_q].eth_rx_qid = q;
+ max_wrr_pos += wt;
+ dev_info->wrr_len += wt;
+ max_wt = RTE_MAX(max_wt, wt);
+ gcd = (gcd) ? rxa_gcd_u16(gcd, wt) : wt;
+ poll_q++;
}
}
- rte_free(rx_adapter->eth_rx_poll);
- rte_free(rx_adapter->wrr_sched);
-
- rx_adapter->eth_rx_poll = rx_poll;
- rx_adapter->wrr_sched = rx_wrr;
- rx_adapter->wrr_len = max_wrr_pos;
-
- return 0;
+ /* Generate polling sequence based on weights */
+ prev = -1;
+ cw = -1;
+ for (i = 0; i < max_wrr_pos; i++) {
+ rx_wrr[i] = rxa_wrr_next(rx_adapter, poll_q, &cw,
+ rx_poll, max_wt, gcd, prev);
+ prev = rx_wrr[i];
+ }
}
static inline void
-mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
+rxa_mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
struct ipv6_hdr **ipv6_hdr)
{
struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
@@ -311,7 +650,7 @@ mtoip(struct rte_mbuf *m, struct ipv4_hdr **ipv4_hdr,
/* Calculate RSS hash for IPv4/6 */
static inline uint32_t
-do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
+rxa_do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
{
uint32_t input_len;
void *tuple;
@@ -320,7 +659,7 @@ do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
struct ipv4_hdr *ipv4_hdr;
struct ipv6_hdr *ipv6_hdr;
- mtoip(m, &ipv4_hdr, &ipv6_hdr);
+ rxa_mtoip(m, &ipv4_hdr, &ipv6_hdr);
if (ipv4_hdr) {
ipv4_tuple.src_addr = rte_be_to_cpu_32(ipv4_hdr->src_addr);
@@ -339,13 +678,13 @@ do_softrss(struct rte_mbuf *m, const uint8_t *rss_key_be)
}
static inline int
-rx_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_enq_blocked(struct rte_event_eth_rx_adapter *rx_adapter)
{
return !!rx_adapter->enq_block_count;
}
static inline void
-rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
{
if (rx_adapter->rx_enq_block_start_ts)
return;
@@ -358,13 +697,13 @@ rx_enq_block_start_ts(struct rte_event_eth_rx_adapter *rx_adapter)
}
static inline void
-rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
struct rte_event_eth_rx_adapter_stats *stats)
{
if (unlikely(!stats->rx_enq_start_ts))
stats->rx_enq_start_ts = rte_get_tsc_cycles();
- if (likely(!rx_enq_blocked(rx_adapter)))
+ if (likely(!rxa_enq_blocked(rx_adapter)))
return;
rx_adapter->enq_block_count = 0;
@@ -380,8 +719,8 @@ rx_enq_block_end_ts(struct rte_event_eth_rx_adapter *rx_adapter,
* this function
*/
static inline void
-buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
- struct rte_event *ev)
+rxa_buffer_event(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct rte_event *ev)
{
struct rte_eth_event_enqueue_buffer *buf =
&rx_adapter->event_enqueue_buffer;
@@ -390,7 +729,7 @@ buf_event_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
/* Enqueue buffered events to event device */
static inline uint16_t
-flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
{
struct rte_eth_event_enqueue_buffer *buf =
&rx_adapter->event_enqueue_buffer;
@@ -407,8 +746,8 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
stats->rx_enq_retry++;
}
- n ? rx_enq_block_end_ts(rx_adapter, stats) :
- rx_enq_block_start_ts(rx_adapter);
+ n ? rxa_enq_block_end_ts(rx_adapter, stats) :
+ rxa_enq_block_start_ts(rx_adapter);
buf->count -= n;
stats->rx_enq_count += n;
@@ -417,18 +756,19 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
}
static inline void
-fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
- uint8_t dev_id,
- uint16_t rx_queue_id,
- struct rte_mbuf **mbufs,
- uint16_t num)
+rxa_buffer_mbufs(struct rte_event_eth_rx_adapter *rx_adapter,
+ uint16_t eth_dev_id,
+ uint16_t rx_queue_id,
+ struct rte_mbuf **mbufs,
+ uint16_t num)
{
uint32_t i;
- struct eth_device_info *eth_device_info =
- &rx_adapter->eth_devices[dev_id];
+ struct eth_device_info *dev_info =
+ &rx_adapter->eth_devices[eth_dev_id];
struct eth_rx_queue_info *eth_rx_queue_info =
- &eth_device_info->rx_queue[rx_queue_id];
-
+ &dev_info->rx_queue[rx_queue_id];
+ struct rte_eth_event_enqueue_buffer *buf =
+ &rx_adapter->event_enqueue_buffer;
int32_t qid = eth_rx_queue_info->event_queue_id;
uint8_t sched_type = eth_rx_queue_info->sched_type;
uint8_t priority = eth_rx_queue_info->priority;
@@ -439,6 +779,8 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
uint32_t rss;
int do_rss;
uint64_t ts;
+ struct rte_mbuf *cb_mbufs[BATCH_SIZE];
+ uint16_t nb_cb;
/* 0xffff ffff if PKT_RX_RSS_HASH is set, otherwise 0 */
rss_mask = ~(((m->ol_flags & PKT_RX_RSS_HASH) != 0) - 1);
@@ -454,12 +796,26 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
}
}
+
+ nb_cb = dev_info->cb_fn ? dev_info->cb_fn(eth_dev_id, rx_queue_id,
+ ETH_EVENT_BUFFER_SIZE,
+ buf->count, mbufs,
+ num,
+ dev_info->cb_arg,
+ cb_mbufs) :
+ num;
+ if (nb_cb < num) {
+ mbufs = cb_mbufs;
+ num = nb_cb;
+ }
+
for (i = 0; i < num; i++) {
m = mbufs[i];
struct rte_event *ev = &events[i];
rss = do_rss ?
- do_softrss(m, rx_adapter->rss_key_be) : m->hash.rss;
+ rxa_do_softrss(m, rx_adapter->rss_key_be) :
+ m->hash.rss;
flow_id =
eth_rx_queue_info->flow_id &
eth_rx_queue_info->flow_id_mask;
@@ -473,8 +829,275 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
ev->priority = priority;
ev->mbuf = m;
- buf_event_enqueue(rx_adapter, ev);
+ rxa_buffer_event(rx_adapter, ev);
+ }
+}
+
+/* Enqueue packets from <port, q> to event buffer */
+static inline uint32_t
+rxa_eth_rx(struct rte_event_eth_rx_adapter *rx_adapter,
+ uint16_t port_id,
+ uint16_t queue_id,
+ uint32_t rx_count,
+ uint32_t max_rx,
+ int *rxq_empty)
+{
+ struct rte_mbuf *mbufs[BATCH_SIZE];
+ struct rte_eth_event_enqueue_buffer *buf =
+ &rx_adapter->event_enqueue_buffer;
+ struct rte_event_eth_rx_adapter_stats *stats =
+ &rx_adapter->stats;
+ uint16_t n;
+ uint32_t nb_rx = 0;
+
+ if (rxq_empty)
+ *rxq_empty = 0;
+ /* Don't do a batch dequeue from the rx queue if there isn't
+ * enough space in the enqueue buffer.
+ */
+ while (BATCH_SIZE <= (RTE_DIM(buf->events) - buf->count)) {
+ if (buf->count >= BATCH_SIZE)
+ rxa_flush_event_buffer(rx_adapter);
+
+ stats->rx_poll_count++;
+ n = rte_eth_rx_burst(port_id, queue_id, mbufs, BATCH_SIZE);
+ if (unlikely(!n)) {
+ if (rxq_empty)
+ *rxq_empty = 1;
+ break;
+ }
+ rxa_buffer_mbufs(rx_adapter, port_id, queue_id, mbufs, n);
+ nb_rx += n;
+ if (rx_count + nb_rx > max_rx)
+ break;
}
+
+ if (buf->count >= BATCH_SIZE)
+ rxa_flush_event_buffer(rx_adapter);
+
+ return nb_rx;
+}
+
+static inline void
+rxa_intr_ring_enqueue(struct rte_event_eth_rx_adapter *rx_adapter,
+ void *data)
+{
+ uint16_t port_id;
+ uint16_t queue;
+ int err;
+ union queue_data qd;
+ struct eth_device_info *dev_info;
+ struct eth_rx_queue_info *queue_info;
+ int *intr_enabled;
+
+ qd.ptr = data;
+ port_id = qd.port;
+ queue = qd.queue;
+
+ dev_info = &rx_adapter->eth_devices[port_id];
+ queue_info = &dev_info->rx_queue[queue];
+ rte_spinlock_lock(&rx_adapter->intr_ring_lock);
+ if (rxa_shared_intr(dev_info, queue))
+ intr_enabled = &dev_info->shared_intr_enabled;
+ else
+ intr_enabled = &queue_info->intr_enabled;
+
+ if (*intr_enabled) {
+ *intr_enabled = 0;
+ err = rte_ring_enqueue(rx_adapter->intr_ring, data);
+ /* Entry should always be available.
+ * The ring size equals the maximum number of interrupt
+ * vectors supported (an interrupt vector is shared in
+ * case of shared interrupts)
+ */
+ if (err)
+ RTE_EDEV_LOG_ERR("Failed to enqueue interrupt"
+ " to ring: %s", strerror(err));
+ else
+ rte_eth_dev_rx_intr_disable(port_id, queue);
+ }
+ rte_spinlock_unlock(&rx_adapter->intr_ring_lock);
+}
+
+static int
+rxa_intr_ring_check_avail(struct rte_event_eth_rx_adapter *rx_adapter,
+ uint32_t num_intr_vec)
+{
+ if (rx_adapter->num_intr_vec + num_intr_vec >
+ RTE_EVENT_ETH_INTR_RING_SIZE) {
+ RTE_EDEV_LOG_ERR("Exceeded intr ring slots current"
+ " %d needed %d limit %d", rx_adapter->num_intr_vec,
+ num_intr_vec, RTE_EVENT_ETH_INTR_RING_SIZE);
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+/* Delete entries for (dev, queue) from the interrupt ring */
+static void
+rxa_intr_ring_del_entries(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ uint16_t rx_queue_id)
+{
+ int i, n;
+ union queue_data qd;
+
+ rte_spinlock_lock(&rx_adapter->intr_ring_lock);
+
+ n = rte_ring_count(rx_adapter->intr_ring);
+ for (i = 0; i < n; i++) {
+ rte_ring_dequeue(rx_adapter->intr_ring, &qd.ptr);
+ if (!rxa_shared_intr(dev_info, rx_queue_id)) {
+ if (qd.port == dev_info->dev->data->port_id &&
+ qd.queue == rx_queue_id)
+ continue;
+ } else {
+ if (qd.port == dev_info->dev->data->port_id)
+ continue;
+ }
+ rte_ring_enqueue(rx_adapter->intr_ring, qd.ptr);
+ }
+
+ rte_spinlock_unlock(&rx_adapter->intr_ring_lock);
+}
+
+/* pthread callback handling interrupt mode receive queues
+ * After receiving an Rx interrupt, it enqueues the port id and queue id of the
+ * interrupting queue to the adapter's ring buffer for interrupt events.
+ * These events are picked up by rxa_intr_ring_dequeue() which is invoked from
+ * the adapter service function.
+ */
+static void *
+rxa_intr_thread(void *arg)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter = arg;
+ struct rte_epoll_event *epoll_events = rx_adapter->epoll_events;
+ int n, i;
+
+ while (1) {
+ n = rte_epoll_wait(rx_adapter->epd, epoll_events,
+ RTE_EVENT_ETH_INTR_RING_SIZE, -1);
+ if (unlikely(n < 0))
+ RTE_EDEV_LOG_ERR("rte_epoll_wait returned error %d",
+ n);
+ for (i = 0; i < n; i++) {
+ rxa_intr_ring_enqueue(rx_adapter,
+ epoll_events[i].epdata.data);
+ }
+ }
+
+ return NULL;
+}
+
+/* Dequeue <port, q> from interrupt ring and enqueue received
+ * mbufs to eventdev
+ */
+static inline uint32_t
+rxa_intr_ring_dequeue(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ uint32_t n;
+ uint32_t nb_rx = 0;
+ int rxq_empty;
+ struct rte_eth_event_enqueue_buffer *buf;
+ rte_spinlock_t *ring_lock;
+ uint8_t max_done = 0;
+
+ if (rx_adapter->num_rx_intr == 0)
+ return 0;
+
+ if (rte_ring_count(rx_adapter->intr_ring) == 0
+ && !rx_adapter->qd_valid)
+ return 0;
+
+ buf = &rx_adapter->event_enqueue_buffer;
+ ring_lock = &rx_adapter->intr_ring_lock;
+
+ if (buf->count >= BATCH_SIZE)
+ rxa_flush_event_buffer(rx_adapter);
+
+ while (BATCH_SIZE <= (RTE_DIM(buf->events) - buf->count)) {
+ struct eth_device_info *dev_info;
+ uint16_t port;
+ uint16_t queue;
+ union queue_data qd = rx_adapter->qd;
+ int err;
+
+ if (!rx_adapter->qd_valid) {
+ struct eth_rx_queue_info *queue_info;
+
+ rte_spinlock_lock(ring_lock);
+ err = rte_ring_dequeue(rx_adapter->intr_ring, &qd.ptr);
+ if (err) {
+ rte_spinlock_unlock(ring_lock);
+ break;
+ }
+
+ port = qd.port;
+ queue = qd.queue;
+ rx_adapter->qd = qd;
+ rx_adapter->qd_valid = 1;
+ dev_info = &rx_adapter->eth_devices[port];
+ if (rxa_shared_intr(dev_info, queue))
+ dev_info->shared_intr_enabled = 1;
+ else {
+ queue_info = &dev_info->rx_queue[queue];
+ queue_info->intr_enabled = 1;
+ }
+ rte_eth_dev_rx_intr_enable(port, queue);
+ rte_spinlock_unlock(ring_lock);
+ } else {
+ port = qd.port;
+ queue = qd.queue;
+
+ dev_info = &rx_adapter->eth_devices[port];
+ }
+
+ if (rxa_shared_intr(dev_info, queue)) {
+ uint16_t i;
+ uint16_t nb_queues;
+
+ nb_queues = dev_info->dev->data->nb_rx_queues;
+ n = 0;
+ for (i = dev_info->next_q_idx; i < nb_queues; i++) {
+ uint8_t enq_buffer_full;
+
+ if (!rxa_intr_queue(dev_info, i))
+ continue;
+ n = rxa_eth_rx(rx_adapter, port, i, nb_rx,
+ rx_adapter->max_nb_rx,
+ &rxq_empty);
+ nb_rx += n;
+
+ enq_buffer_full = !rxq_empty && n == 0;
+ max_done = nb_rx > rx_adapter->max_nb_rx;
+
+ if (enq_buffer_full || max_done) {
+ dev_info->next_q_idx = i;
+ goto done;
+ }
+ }
+
+ rx_adapter->qd_valid = 0;
+
+ /* Reinitialize for next interrupt */
+ dev_info->next_q_idx = dev_info->multi_intr_cap ?
+ RTE_MAX_RXTX_INTR_VEC_ID - 1 :
+ 0;
+ } else {
+ n = rxa_eth_rx(rx_adapter, port, queue, nb_rx,
+ rx_adapter->max_nb_rx,
+ &rxq_empty);
+ rx_adapter->qd_valid = !rxq_empty;
+ nb_rx += n;
+ if (nb_rx > rx_adapter->max_nb_rx)
+ break;
+ }
+ }
+
+done:
+ rx_adapter->stats.rx_intr_packets += nb_rx;
+ return nb_rx;
}
/*
@@ -491,12 +1114,10 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
* it.
*/
static inline uint32_t
-eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
+rxa_poll(struct rte_event_eth_rx_adapter *rx_adapter)
{
uint32_t num_queue;
- uint16_t n;
uint32_t nb_rx = 0;
- struct rte_mbuf *mbufs[BATCH_SIZE];
struct rte_eth_event_enqueue_buffer *buf;
uint32_t wrr_pos;
uint32_t max_nb_rx;
@@ -504,7 +1125,7 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
wrr_pos = rx_adapter->wrr_pos;
max_nb_rx = rx_adapter->max_nb_rx;
buf = &rx_adapter->event_enqueue_buffer;
- struct rte_event_eth_rx_adapter_stats *stats = &rx_adapter->stats;
+ stats = &rx_adapter->stats;
/* Iterate through a WRR sequence */
for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) {
@@ -516,45 +1137,42 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
* enough space in the enqueue buffer.
*/
if (buf->count >= BATCH_SIZE)
- flush_event_buffer(rx_adapter);
- if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count))
- break;
-
- stats->rx_poll_count++;
- n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE);
+ rxa_flush_event_buffer(rx_adapter);
+ if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) {
+ rx_adapter->wrr_pos = wrr_pos;
+ return nb_rx;
+ }
- if (n) {
- stats->rx_packets += n;
- /* The check before rte_eth_rx_burst() ensures that
- * all n mbufs can be buffered
- */
- fill_event_buffer(rx_adapter, d, qid, mbufs, n);
- nb_rx += n;
- if (nb_rx > max_nb_rx) {
- rx_adapter->wrr_pos =
+ nb_rx += rxa_eth_rx(rx_adapter, d, qid, nb_rx, max_nb_rx,
+ NULL);
+ if (nb_rx > max_nb_rx) {
+ rx_adapter->wrr_pos =
(wrr_pos + 1) % rx_adapter->wrr_len;
- return nb_rx;
- }
+ break;
}
if (++wrr_pos == rx_adapter->wrr_len)
wrr_pos = 0;
}
-
return nb_rx;
}
static int
-event_eth_rx_adapter_service_func(void *args)
+rxa_service_func(void *args)
{
struct rte_event_eth_rx_adapter *rx_adapter = args;
- struct rte_eth_event_enqueue_buffer *buf;
+ struct rte_event_eth_rx_adapter_stats *stats;
- buf = &rx_adapter->event_enqueue_buffer;
if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
return 0;
- if (eth_rx_poll(rx_adapter) == 0 && buf->count)
- flush_event_buffer(rx_adapter);
+ if (!rx_adapter->rxa_started) {
+ return 0;
+ rte_spinlock_unlock(&rx_adapter->rx_lock);
+ }
+
+ stats = &rx_adapter->stats;
+ stats->rx_packets += rxa_intr_ring_dequeue(rx_adapter);
+ stats->rx_packets += rxa_poll(rx_adapter);
rte_spinlock_unlock(&rx_adapter->rx_lock);
return 0;
}
@@ -586,14 +1204,14 @@ rte_event_eth_rx_adapter_init(void)
}
static inline struct rte_event_eth_rx_adapter *
-id_to_rx_adapter(uint8_t id)
+rxa_id_to_adapter(uint8_t id)
{
return event_eth_rx_adapter ?
event_eth_rx_adapter[id] : NULL;
}
static int
-default_conf_cb(uint8_t id, uint8_t dev_id,
+rxa_default_conf_cb(uint8_t id, uint8_t dev_id,
struct rte_event_eth_rx_adapter_conf *conf, void *arg)
{
int ret;
@@ -602,7 +1220,7 @@ default_conf_cb(uint8_t id, uint8_t dev_id,
int started;
uint8_t port_id;
struct rte_event_port_conf *port_conf = arg;
- struct rte_event_eth_rx_adapter *rx_adapter = id_to_rx_adapter(id);
+ struct rte_event_eth_rx_adapter *rx_adapter = rxa_id_to_adapter(id);
dev = &rte_eventdevs[rx_adapter->eventdev_id];
dev_conf = dev->data->dev_conf;
@@ -639,7 +1257,351 @@ default_conf_cb(uint8_t id, uint8_t dev_id,
}
static int
-init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
+rxa_epoll_create1(void)
+{
+#if defined(LINUX)
+ int fd;
+ fd = epoll_create1(EPOLL_CLOEXEC);
+ return fd < 0 ? -errno : fd;
+#elif defined(BSD)
+ return -ENOTSUP;
+#endif
+}
+
+static int
+rxa_init_epd(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ if (rx_adapter->epd != INIT_FD)
+ return 0;
+
+ rx_adapter->epd = rxa_epoll_create1();
+ if (rx_adapter->epd < 0) {
+ int err = rx_adapter->epd;
+ rx_adapter->epd = INIT_FD;
+ RTE_EDEV_LOG_ERR("epoll_create1() failed, err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+rxa_create_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ int err;
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ if (rx_adapter->intr_ring)
+ return 0;
+
+ rx_adapter->intr_ring = rte_ring_create("intr_ring",
+ RTE_EVENT_ETH_INTR_RING_SIZE,
+ rte_socket_id(), 0);
+ if (!rx_adapter->intr_ring)
+ return -ENOMEM;
+
+ rx_adapter->epoll_events = rte_zmalloc_socket(rx_adapter->mem_name,
+ RTE_EVENT_ETH_INTR_RING_SIZE *
+ sizeof(struct rte_epoll_event),
+ RTE_CACHE_LINE_SIZE,
+ rx_adapter->socket_id);
+ if (!rx_adapter->epoll_events) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ rte_spinlock_init(&rx_adapter->intr_ring_lock);
+
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+ "rx-intr-thread-%d", rx_adapter->id);
+
+ err = rte_ctrl_thread_create(&rx_adapter->rx_intr_thread, thread_name,
+ NULL, rxa_intr_thread, rx_adapter);
+ if (!err) {
+ rte_thread_setname(rx_adapter->rx_intr_thread, thread_name);
+ return 0;
+ }
+
+ RTE_EDEV_LOG_ERR("Failed to create interrupt thread err = %d\n", err);
+error:
+ rte_ring_free(rx_adapter->intr_ring);
+ rx_adapter->intr_ring = NULL;
+ rx_adapter->epoll_events = NULL;
+ return err;
+}
+
+static int
+rxa_destroy_intr_thread(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ int err;
+
+ err = pthread_cancel(rx_adapter->rx_intr_thread);
+ if (err)
+ RTE_EDEV_LOG_ERR("Can't cancel interrupt thread err = %d\n",
+ err);
+
+ err = pthread_join(rx_adapter->rx_intr_thread, NULL);
+ if (err)
+ RTE_EDEV_LOG_ERR("Can't join interrupt thread err = %d\n", err);
+
+ rte_free(rx_adapter->epoll_events);
+ rte_ring_free(rx_adapter->intr_ring);
+ rx_adapter->intr_ring = NULL;
+ rx_adapter->epoll_events = NULL;
+ return 0;
+}
+
+static int
+rxa_free_intr_resources(struct rte_event_eth_rx_adapter *rx_adapter)
+{
+ int ret;
+
+ if (rx_adapter->num_rx_intr == 0)
+ return 0;
+
+ ret = rxa_destroy_intr_thread(rx_adapter);
+ if (ret)
+ return ret;
+
+ close(rx_adapter->epd);
+ rx_adapter->epd = INIT_FD;
+
+ return ret;
+}
+
+static int
+rxa_disable_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ uint16_t rx_queue_id)
+{
+ int err;
+ uint16_t eth_dev_id = dev_info->dev->data->port_id;
+ int sintr = rxa_shared_intr(dev_info, rx_queue_id);
+
+ err = rte_eth_dev_rx_intr_disable(eth_dev_id, rx_queue_id);
+ if (err) {
+ RTE_EDEV_LOG_ERR("Could not disable interrupt for Rx queue %u",
+ rx_queue_id);
+ return err;
+ }
+
+ err = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+ rx_adapter->epd,
+ RTE_INTR_EVENT_DEL,
+ 0);
+ if (err)
+ RTE_EDEV_LOG_ERR("Interrupt event deletion failed %d", err);
+
+ if (sintr)
+ dev_info->rx_queue[rx_queue_id].intr_enabled = 0;
+ else
+ dev_info->shared_intr_enabled = 0;
+ return err;
+}
+
+static int
+rxa_del_intr_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int rx_queue_id)
+{
+ int err;
+ int i;
+ int s;
+
+ if (dev_info->nb_rx_intr == 0)
+ return 0;
+
+ err = 0;
+ if (rx_queue_id == -1) {
+ s = dev_info->nb_shared_intr;
+ for (i = 0; i < dev_info->nb_rx_intr; i++) {
+ int sintr;
+ uint16_t q;
+
+ q = dev_info->intr_queue[i];
+ sintr = rxa_shared_intr(dev_info, q);
+ s -= sintr;
+
+ if (!sintr || s == 0) {
+
+ err = rxa_disable_intr(rx_adapter, dev_info,
+ q);
+ if (err)
+ return err;
+ rxa_intr_ring_del_entries(rx_adapter, dev_info,
+ q);
+ }
+ }
+ } else {
+ if (!rxa_intr_queue(dev_info, rx_queue_id))
+ return 0;
+ if (!rxa_shared_intr(dev_info, rx_queue_id) ||
+ dev_info->nb_shared_intr == 1) {
+ err = rxa_disable_intr(rx_adapter, dev_info,
+ rx_queue_id);
+ if (err)
+ return err;
+ rxa_intr_ring_del_entries(rx_adapter, dev_info,
+ rx_queue_id);
+ }
+
+ for (i = 0; i < dev_info->nb_rx_intr; i++) {
+ if (dev_info->intr_queue[i] == rx_queue_id) {
+ for (; i < dev_info->nb_rx_intr - 1; i++)
+ dev_info->intr_queue[i] =
+ dev_info->intr_queue[i + 1];
+ break;
+ }
+ }
+ }
+
+ return err;
+}
+
+static int
+rxa_config_intr(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ uint16_t rx_queue_id)
+{
+ int err, err1;
+ uint16_t eth_dev_id = dev_info->dev->data->port_id;
+ union queue_data qd;
+ int init_fd;
+ uint16_t *intr_queue;
+ int sintr = rxa_shared_intr(dev_info, rx_queue_id);
+
+ if (rxa_intr_queue(dev_info, rx_queue_id))
+ return 0;
+
+ intr_queue = dev_info->intr_queue;
+ if (dev_info->intr_queue == NULL) {
+ size_t len =
+ dev_info->dev->data->nb_rx_queues * sizeof(uint16_t);
+ dev_info->intr_queue =
+ rte_zmalloc_socket(
+ rx_adapter->mem_name,
+ len,
+ 0,
+ rx_adapter->socket_id);
+ if (dev_info->intr_queue == NULL)
+ return -ENOMEM;
+ }
+
+ init_fd = rx_adapter->epd;
+ err = rxa_init_epd(rx_adapter);
+ if (err)
+ goto err_free_queue;
+
+ qd.port = eth_dev_id;
+ qd.queue = rx_queue_id;
+
+ err = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+ rx_adapter->epd,
+ RTE_INTR_EVENT_ADD,
+ qd.ptr);
+ if (err) {
+ RTE_EDEV_LOG_ERR("Failed to add interrupt event for"
+ " Rx Queue %u err %d", rx_queue_id, err);
+ goto err_del_fd;
+ }
+
+ err = rte_eth_dev_rx_intr_enable(eth_dev_id, rx_queue_id);
+ if (err) {
+ RTE_EDEV_LOG_ERR("Could not enable interrupt for"
+ " Rx Queue %u err %d", rx_queue_id, err);
+
+ goto err_del_event;
+ }
+
+ err = rxa_create_intr_thread(rx_adapter);
+ if (!err) {
+ if (sintr)
+ dev_info->shared_intr_enabled = 1;
+ else
+ dev_info->rx_queue[rx_queue_id].intr_enabled = 1;
+ return 0;
+ }
+
+
+ err = rte_eth_dev_rx_intr_disable(eth_dev_id, rx_queue_id);
+ if (err)
+ RTE_EDEV_LOG_ERR("Could not disable interrupt for"
+ " Rx Queue %u err %d", rx_queue_id, err);
+err_del_event:
+ err1 = rte_eth_dev_rx_intr_ctl_q(eth_dev_id, rx_queue_id,
+ rx_adapter->epd,
+ RTE_INTR_EVENT_DEL,
+ 0);
+ if (err1) {
+ RTE_EDEV_LOG_ERR("Could not delete event for"
+ " Rx Queue %u err %d", rx_queue_id, err1);
+ }
+err_del_fd:
+ if (init_fd == INIT_FD) {
+ close(rx_adapter->epd);
+ rx_adapter->epd = -1;
+ }
+err_free_queue:
+ if (intr_queue == NULL)
+ rte_free(dev_info->intr_queue);
+
+ return err;
+}
+
+static int
+rxa_add_intr_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int rx_queue_id)
+
+{
+ int i, j, err;
+ int si = -1;
+ int shared_done = (dev_info->nb_shared_intr > 0);
+
+ if (rx_queue_id != -1) {
+ if (rxa_shared_intr(dev_info, rx_queue_id) && shared_done)
+ return 0;
+ return rxa_config_intr(rx_adapter, dev_info, rx_queue_id);
+ }
+
+ err = 0;
+ for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++) {
+
+ if (rxa_shared_intr(dev_info, i) && shared_done)
+ continue;
+
+ err = rxa_config_intr(rx_adapter, dev_info, i);
+
+ shared_done = err == 0 && rxa_shared_intr(dev_info, i);
+ if (shared_done) {
+ si = i;
+ dev_info->shared_intr_enabled = 1;
+ }
+ if (err)
+ break;
+ }
+
+ if (err == 0)
+ return 0;
+
+ shared_done = (dev_info->nb_shared_intr > 0);
+ for (j = 0; j < i; j++) {
+ if (rxa_intr_queue(dev_info, j))
+ continue;
+ if (rxa_shared_intr(dev_info, j) && si != j)
+ continue;
+ err = rxa_disable_intr(rx_adapter, dev_info, j);
+ if (err)
+ break;
+
+ }
+
+ return err;
+}
+
+
+static int
+rxa_init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
{
int ret;
struct rte_service_spec service;
@@ -652,7 +1614,7 @@ init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
snprintf(service.name, ETH_RX_ADAPTER_SERVICE_NAME_LEN,
"rte_event_eth_rx_adapter_%d", id);
service.socket_id = rx_adapter->socket_id;
- service.callback = event_eth_rx_adapter_service_func;
+ service.callback = rxa_service_func;
service.callback_userdata = rx_adapter;
/* Service function handles locking for queue add/del updates */
service.capabilities = RTE_SERVICE_CAP_MT_SAFE;
@@ -673,6 +1635,7 @@ init_service(struct rte_event_eth_rx_adapter *rx_adapter, uint8_t id)
rx_adapter->event_port_id = rx_adapter_conf.event_port_id;
rx_adapter->max_nb_rx = rx_adapter_conf.max_nb_rx;
rx_adapter->service_inited = 1;
+ rx_adapter->epd = INIT_FD;
return 0;
err_done:
@@ -680,9 +1643,8 @@ err_done:
return ret;
}
-
static void
-update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
+rxa_update_queue(struct rte_event_eth_rx_adapter *rx_adapter,
struct eth_device_info *dev_info,
int32_t rx_queue_id,
uint8_t add)
@@ -696,7 +1658,7 @@ update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
if (rx_queue_id == -1) {
for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
- update_queue_info(rx_adapter, dev_info, i, add);
+ rxa_update_queue(rx_adapter, dev_info, i, add);
} else {
queue_info = &dev_info->rx_queue[rx_queue_id];
enabled = queue_info->queue_enabled;
@@ -711,31 +1673,65 @@ update_queue_info(struct rte_event_eth_rx_adapter *rx_adapter,
}
}
-static int
-event_eth_rx_adapter_queue_del(struct rte_event_eth_rx_adapter *rx_adapter,
- struct eth_device_info *dev_info,
- uint16_t rx_queue_id)
+static void
+rxa_sw_del(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int32_t rx_queue_id)
{
- struct eth_rx_queue_info *queue_info;
+ int pollq;
+ int intrq;
+ int sintrq;
+
if (rx_adapter->nb_queues == 0)
- return 0;
+ return;
- queue_info = &dev_info->rx_queue[rx_queue_id];
- rx_adapter->num_rx_polled -= queue_info->queue_enabled;
- update_queue_info(rx_adapter, dev_info, rx_queue_id, 0);
- return 0;
+ if (rx_queue_id == -1) {
+ uint16_t nb_rx_queues;
+ uint16_t i;
+
+ nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+ for (i = 0; i < nb_rx_queues; i++)
+ rxa_sw_del(rx_adapter, dev_info, i);
+ return;
+ }
+
+ pollq = rxa_polled_queue(dev_info, rx_queue_id);
+ intrq = rxa_intr_queue(dev_info, rx_queue_id);
+ sintrq = rxa_shared_intr(dev_info, rx_queue_id);
+ rxa_update_queue(rx_adapter, dev_info, rx_queue_id, 0);
+ rx_adapter->num_rx_polled -= pollq;
+ dev_info->nb_rx_poll -= pollq;
+ rx_adapter->num_rx_intr -= intrq;
+ dev_info->nb_rx_intr -= intrq;
+ dev_info->nb_shared_intr -= intrq && sintrq;
}
static void
-event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter,
- struct eth_device_info *dev_info,
- uint16_t rx_queue_id,
- const struct rte_event_eth_rx_adapter_queue_conf *conf)
-
+rxa_add_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+ struct eth_device_info *dev_info,
+ int32_t rx_queue_id,
+ const struct rte_event_eth_rx_adapter_queue_conf *conf)
{
struct eth_rx_queue_info *queue_info;
const struct rte_event *ev = &conf->ev;
+ int pollq;
+ int intrq;
+ int sintrq;
+
+ if (rx_queue_id == -1) {
+ uint16_t nb_rx_queues;
+ uint16_t i;
+
+ nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+ for (i = 0; i < nb_rx_queues; i++)
+ rxa_add_queue(rx_adapter, dev_info, i, conf);
+ return;
+ }
+
+ pollq = rxa_polled_queue(dev_info, rx_queue_id);
+ intrq = rxa_intr_queue(dev_info, rx_queue_id);
+ sintrq = rxa_shared_intr(dev_info, rx_queue_id);
queue_info = &dev_info->rx_queue[rx_queue_id];
queue_info->event_queue_id = ev->queue_id;
@@ -749,69 +1745,162 @@ event_eth_rx_adapter_queue_add(struct rte_event_eth_rx_adapter *rx_adapter,
queue_info->flow_id_mask = ~0;
}
- /* The same queue can be added more than once */
- rx_adapter->num_rx_polled += !queue_info->queue_enabled;
- update_queue_info(rx_adapter, dev_info, rx_queue_id, 1);
+ rxa_update_queue(rx_adapter, dev_info, rx_queue_id, 1);
+ if (rxa_polled_queue(dev_info, rx_queue_id)) {
+ rx_adapter->num_rx_polled += !pollq;
+ dev_info->nb_rx_poll += !pollq;
+ rx_adapter->num_rx_intr -= intrq;
+ dev_info->nb_rx_intr -= intrq;
+ dev_info->nb_shared_intr -= intrq && sintrq;
+ }
+
+ if (rxa_intr_queue(dev_info, rx_queue_id)) {
+ rx_adapter->num_rx_polled -= pollq;
+ dev_info->nb_rx_poll -= pollq;
+ rx_adapter->num_rx_intr += !intrq;
+ dev_info->nb_rx_intr += !intrq;
+ dev_info->nb_shared_intr += !intrq && sintrq;
+ if (dev_info->nb_shared_intr == 1) {
+ if (dev_info->multi_intr_cap)
+ dev_info->next_q_idx =
+ RTE_MAX_RXTX_INTR_VEC_ID - 1;
+ else
+ dev_info->next_q_idx = 0;
+ }
+ }
}
-static int add_rx_queue(struct rte_event_eth_rx_adapter *rx_adapter,
+static int rxa_sw_add(struct rte_event_eth_rx_adapter *rx_adapter,
uint16_t eth_dev_id,
int rx_queue_id,
const struct rte_event_eth_rx_adapter_queue_conf *queue_conf)
{
struct eth_device_info *dev_info = &rx_adapter->eth_devices[eth_dev_id];
struct rte_event_eth_rx_adapter_queue_conf temp_conf;
- uint32_t i;
int ret;
+ struct eth_rx_poll_entry *rx_poll;
+ struct eth_rx_queue_info *rx_queue;
+ uint32_t *rx_wrr;
+ uint16_t nb_rx_queues;
+ uint32_t nb_rx_poll, nb_wrr;
+ uint32_t nb_rx_intr;
+ int num_intr_vec;
+ uint16_t wt;
if (queue_conf->servicing_weight == 0) {
-
struct rte_eth_dev_data *data = dev_info->dev->data;
- if (data->dev_conf.intr_conf.rxq) {
- RTE_EDEV_LOG_ERR("Interrupt driven queues"
- " not supported");
- return -ENOTSUP;
- }
- temp_conf = *queue_conf;
- /* If Rx interrupts are disabled set wt = 1 */
- temp_conf.servicing_weight = 1;
+ temp_conf = *queue_conf;
+ if (!data->dev_conf.intr_conf.rxq) {
+ /* If Rx interrupts are disabled set wt = 1 */
+ temp_conf.servicing_weight = 1;
+ }
queue_conf = &temp_conf;
}
+ nb_rx_queues = dev_info->dev->data->nb_rx_queues;
+ rx_queue = dev_info->rx_queue;
+ wt = queue_conf->servicing_weight;
+
if (dev_info->rx_queue == NULL) {
dev_info->rx_queue =
rte_zmalloc_socket(rx_adapter->mem_name,
- dev_info->dev->data->nb_rx_queues *
+ nb_rx_queues *
sizeof(struct eth_rx_queue_info), 0,
rx_adapter->socket_id);
if (dev_info->rx_queue == NULL)
return -ENOMEM;
}
+ rx_wrr = NULL;
+ rx_poll = NULL;
- if (rx_queue_id == -1) {
- for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
- event_eth_rx_adapter_queue_add(rx_adapter,
- dev_info, i,
- queue_conf);
+ rxa_calc_nb_post_add(rx_adapter, dev_info, rx_queue_id,
+ queue_conf->servicing_weight,
+ &nb_rx_poll, &nb_rx_intr, &nb_wrr);
+
+ if (dev_info->dev->intr_handle)
+ dev_info->multi_intr_cap =
+ rte_intr_cap_multiple(dev_info->dev->intr_handle);
+
+ ret = rxa_alloc_poll_arrays(rx_adapter, nb_rx_poll, nb_wrr,
+ &rx_poll, &rx_wrr);
+ if (ret)
+ goto err_free_rxqueue;
+
+ if (wt == 0) {
+ num_intr_vec = rxa_nb_intr_vect(dev_info, rx_queue_id, 1);
+
+ ret = rxa_intr_ring_check_avail(rx_adapter, num_intr_vec);
+ if (ret)
+ goto err_free_rxqueue;
+
+ ret = rxa_add_intr_queue(rx_adapter, dev_info, rx_queue_id);
+ if (ret)
+ goto err_free_rxqueue;
} else {
- event_eth_rx_adapter_queue_add(rx_adapter, dev_info,
- (uint16_t)rx_queue_id,
- queue_conf);
+
+ num_intr_vec = 0;
+ if (rx_adapter->num_rx_intr > nb_rx_intr) {
+ num_intr_vec = rxa_nb_intr_vect(dev_info,
+ rx_queue_id, 0);
+ /* interrupt based queues are being converted to
+ * poll mode queues, delete the interrupt configuration
+ * for those.
+ */
+ ret = rxa_del_intr_queue(rx_adapter,
+ dev_info, rx_queue_id);
+ if (ret)
+ goto err_free_rxqueue;
+ }
}
- ret = eth_poll_wrr_calc(rx_adapter);
- if (ret) {
- event_eth_rx_adapter_queue_del(rx_adapter,
- dev_info, rx_queue_id);
- return ret;
+ if (nb_rx_intr == 0) {
+ ret = rxa_free_intr_resources(rx_adapter);
+ if (ret)
+ goto err_free_rxqueue;
}
- return ret;
+ if (wt == 0) {
+ uint16_t i;
+
+ if (rx_queue_id == -1) {
+ for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
+ dev_info->intr_queue[i] = i;
+ } else {
+ if (!rxa_intr_queue(dev_info, rx_queue_id))
+ dev_info->intr_queue[nb_rx_intr - 1] =
+ rx_queue_id;
+ }
+ }
+
+
+
+ rxa_add_queue(rx_adapter, dev_info, rx_queue_id, queue_conf);
+ rxa_calc_wrr_sequence(rx_adapter, rx_poll, rx_wrr);
+
+ rte_free(rx_adapter->eth_rx_poll);
+ rte_free(rx_adapter->wrr_sched);
+
+ rx_adapter->eth_rx_poll = rx_poll;
+ rx_adapter->wrr_sched = rx_wrr;
+ rx_adapter->wrr_len = nb_wrr;
+ rx_adapter->num_intr_vec += num_intr_vec;
+ return 0;
+
+err_free_rxqueue:
+ if (rx_queue == NULL) {
+ rte_free(dev_info->rx_queue);
+ dev_info->rx_queue = NULL;
+ }
+
+ rte_free(rx_poll);
+ rte_free(rx_wrr);
+
+ return 0;
}
static int
-rx_adapter_ctrl(uint8_t id, int start)
+rxa_ctrl(uint8_t id, int start)
{
struct rte_event_eth_rx_adapter *rx_adapter;
struct rte_eventdev *dev;
@@ -821,7 +1910,7 @@ rx_adapter_ctrl(uint8_t id, int start)
int stop = !start;
RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
- rx_adapter = id_to_rx_adapter(id);
+ rx_adapter = rxa_id_to_adapter(id);
if (rx_adapter == NULL)
return -EINVAL;
@@ -845,8 +1934,12 @@ rx_adapter_ctrl(uint8_t id, int start)
&rte_eth_devices[i]);
}
- if (use_service)
+ if (use_service) {
+ rte_spinlock_lock(&rx_adapter->rx_lock);
+ rx_adapter->rxa_started = start;
rte_service_runstate_set(rx_adapter->service_id, start);
+ rte_spinlock_unlock(&rx_adapter->rx_lock);
+ }
return 0;
}
@@ -880,7 +1973,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
return ret;
}
- rx_adapter = id_to_rx_adapter(id);
+ rx_adapter = rxa_id_to_adapter(id);
if (rx_adapter != NULL) {
RTE_EDEV_LOG_ERR("Eth Rx adapter exists id = %" PRIu8, id);
return -EEXIST;
@@ -902,6 +1995,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
rx_adapter->socket_id = socket_id;
rx_adapter->conf_cb = conf_cb;
rx_adapter->conf_arg = conf_arg;
+ rx_adapter->id = id;
strcpy(rx_adapter->mem_name, mem_name);
rx_adapter->eth_devices = rte_zmalloc_socket(rx_adapter->mem_name,
/* FIXME: incompatible with hotplug */
@@ -922,7 +2016,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
rx_adapter->eth_devices[i].dev = &rte_eth_devices[i];
event_eth_rx_adapter[id] = rx_adapter;
- if (conf_cb == default_conf_cb)
+ if (conf_cb == rxa_default_conf_cb)
rx_adapter->default_cb_arg = 1;
return 0;
}
@@ -943,7 +2037,7 @@ rte_event_eth_rx_adapter_create(uint8_t id, uint8_t dev_id,
return -ENOMEM;
*pc = *port_config;
ret = rte_event_eth_rx_adapter_create_ext(id, dev_id,
- default_conf_cb,
+ rxa_default_conf_cb,
pc);
if (ret)
rte_free(pc);
@@ -957,7 +2051,7 @@ rte_event_eth_rx_adapter_free(uint8_t id)
RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
- rx_adapter = id_to_rx_adapter(id);
+ rx_adapter = rxa_id_to_adapter(id);
if (rx_adapter == NULL)
return -EINVAL;
@@ -987,12 +2081,11 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
struct rte_event_eth_rx_adapter *rx_adapter;
struct rte_eventdev *dev;
struct eth_device_info *dev_info;
- int start_service;
RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
- rx_adapter = id_to_rx_adapter(id);
+ rx_adapter = rxa_id_to_adapter(id);
if ((rx_adapter == NULL) || (queue_conf == NULL))
return -EINVAL;
@@ -1030,7 +2123,6 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
return -EINVAL;
}
- start_service = 0;
dev_info = &rx_adapter->eth_devices[eth_dev_id];
if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) {
@@ -1050,28 +2142,29 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
&rte_eth_devices[eth_dev_id],
rx_queue_id, queue_conf);
if (ret == 0) {
- update_queue_info(rx_adapter,
+ dev_info->internal_event_port = 1;
+ rxa_update_queue(rx_adapter,
&rx_adapter->eth_devices[eth_dev_id],
rx_queue_id,
1);
}
} else {
rte_spinlock_lock(&rx_adapter->rx_lock);
- ret = init_service(rx_adapter, id);
- if (ret == 0)
- ret = add_rx_queue(rx_adapter, eth_dev_id, rx_queue_id,
+ dev_info->internal_event_port = 0;
+ ret = rxa_init_service(rx_adapter, id);
+ if (ret == 0) {
+ uint32_t service_id = rx_adapter->service_id;
+ ret = rxa_sw_add(rx_adapter, eth_dev_id, rx_queue_id,
queue_conf);
+ rte_service_component_runstate_set(service_id,
+ rxa_sw_adapter_queue_count(rx_adapter));
+ }
rte_spinlock_unlock(&rx_adapter->rx_lock);
- if (ret == 0)
- start_service = !!sw_rx_adapter_queue_count(rx_adapter);
}
if (ret)
return ret;
- if (start_service)
- rte_service_component_runstate_set(rx_adapter->service_id, 1);
-
return 0;
}
@@ -1084,12 +2177,17 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id,
struct rte_event_eth_rx_adapter *rx_adapter;
struct eth_device_info *dev_info;
uint32_t cap;
- uint16_t i;
+ uint32_t nb_rx_poll = 0;
+ uint32_t nb_wrr = 0;
+ uint32_t nb_rx_intr;
+ struct eth_rx_poll_entry *rx_poll = NULL;
+ uint32_t *rx_wrr = NULL;
+ int num_intr_vec;
RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
- rx_adapter = id_to_rx_adapter(id);
+ rx_adapter = rxa_id_to_adapter(id);
if (rx_adapter == NULL)
return -EINVAL;
@@ -1116,7 +2214,7 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id,
&rte_eth_devices[eth_dev_id],
rx_queue_id);
if (ret == 0) {
- update_queue_info(rx_adapter,
+ rxa_update_queue(rx_adapter,
&rx_adapter->eth_devices[eth_dev_id],
rx_queue_id,
0);
@@ -1126,48 +2224,78 @@ rte_event_eth_rx_adapter_queue_del(uint8_t id, uint16_t eth_dev_id,
}
}
} else {
- int rc;
+ rxa_calc_nb_post_del(rx_adapter, dev_info, rx_queue_id,
+ &nb_rx_poll, &nb_rx_intr, &nb_wrr);
+
+ ret = rxa_alloc_poll_arrays(rx_adapter, nb_rx_poll, nb_wrr,
+ &rx_poll, &rx_wrr);
+ if (ret)
+ return ret;
+
rte_spinlock_lock(&rx_adapter->rx_lock);
- if (rx_queue_id == -1) {
- for (i = 0; i < dev_info->dev->data->nb_rx_queues; i++)
- event_eth_rx_adapter_queue_del(rx_adapter,
- dev_info,
- i);
- } else {
- event_eth_rx_adapter_queue_del(rx_adapter,
- dev_info,
- (uint16_t)rx_queue_id);
+
+ num_intr_vec = 0;
+ if (rx_adapter->num_rx_intr > nb_rx_intr) {
+
+ num_intr_vec = rxa_nb_intr_vect(dev_info,
+ rx_queue_id, 0);
+ ret = rxa_del_intr_queue(rx_adapter, dev_info,
+ rx_queue_id);
+ if (ret)
+ goto unlock_ret;
+ }
+
+ if (nb_rx_intr == 0) {
+ ret = rxa_free_intr_resources(rx_adapter);
+ if (ret)
+ goto unlock_ret;
+ }
+
+ rxa_sw_del(rx_adapter, dev_info, rx_queue_id);
+ rxa_calc_wrr_sequence(rx_adapter, rx_poll, rx_wrr);
+
+ rte_free(rx_adapter->eth_rx_poll);
+ rte_free(rx_adapter->wrr_sched);
+
+ if (nb_rx_intr == 0) {
+ rte_free(dev_info->intr_queue);
+ dev_info->intr_queue = NULL;
}
- rc = eth_poll_wrr_calc(rx_adapter);
- if (rc)
- RTE_EDEV_LOG_ERR("WRR recalculation failed %" PRId32,
- rc);
+ rx_adapter->eth_rx_poll = rx_poll;
+ rx_adapter->wrr_sched = rx_wrr;
+ rx_adapter->wrr_len = nb_wrr;
+ rx_adapter->num_intr_vec += num_intr_vec;
if (dev_info->nb_dev_queues == 0) {
rte_free(dev_info->rx_queue);
dev_info->rx_queue = NULL;
}
-
+unlock_ret:
rte_spinlock_unlock(&rx_adapter->rx_lock);
+ if (ret) {
+ rte_free(rx_poll);
+ rte_free(rx_wrr);
+ return ret;
+ }
+
rte_service_component_runstate_set(rx_adapter->service_id,
- sw_rx_adapter_queue_count(rx_adapter));
+ rxa_sw_adapter_queue_count(rx_adapter));
}
return ret;
}
-
int
rte_event_eth_rx_adapter_start(uint8_t id)
{
- return rx_adapter_ctrl(id, 1);
+ return rxa_ctrl(id, 1);
}
int
rte_event_eth_rx_adapter_stop(uint8_t id)
{
- return rx_adapter_ctrl(id, 0);
+ return rxa_ctrl(id, 0);
}
int
@@ -1184,7 +2312,7 @@ rte_event_eth_rx_adapter_stats_get(uint8_t id,
RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
- rx_adapter = id_to_rx_adapter(id);
+ rx_adapter = rxa_id_to_adapter(id);
if (rx_adapter == NULL || stats == NULL)
return -EINVAL;
@@ -1222,7 +2350,7 @@ rte_event_eth_rx_adapter_stats_reset(uint8_t id)
RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
- rx_adapter = id_to_rx_adapter(id);
+ rx_adapter = rxa_id_to_adapter(id);
if (rx_adapter == NULL)
return -EINVAL;
@@ -1247,7 +2375,7 @@ rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
- rx_adapter = id_to_rx_adapter(id);
+ rx_adapter = rxa_id_to_adapter(id);
if (rx_adapter == NULL || service_id == NULL)
return -EINVAL;
@@ -1256,3 +2384,47 @@ rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
return rx_adapter->service_inited ? 0 : -ESRCH;
}
+
+int rte_event_eth_rx_adapter_cb_register(uint8_t id,
+ uint16_t eth_dev_id,
+ rte_event_eth_rx_adapter_cb_fn cb_fn,
+ void *cb_arg)
+{
+ struct rte_event_eth_rx_adapter *rx_adapter;
+ struct eth_device_info *dev_info;
+ uint32_t cap;
+ int ret;
+
+ RTE_EVENT_ETH_RX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+ RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+
+ rx_adapter = rxa_id_to_adapter(id);
+ if (rx_adapter == NULL)
+ return -EINVAL;
+
+ dev_info = &rx_adapter->eth_devices[eth_dev_id];
+ if (dev_info->rx_queue == NULL)
+ return -EINVAL;
+
+ ret = rte_event_eth_rx_adapter_caps_get(rx_adapter->eventdev_id,
+ eth_dev_id,
+ &cap);
+ if (ret) {
+ RTE_EDEV_LOG_ERR("Failed to get adapter caps edev %" PRIu8
+ "eth port %" PRIu16, id, eth_dev_id);
+ return ret;
+ }
+
+ if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT) {
+ RTE_EDEV_LOG_ERR("Rx callback not supported for eth port %"
+ PRIu16, eth_dev_id);
+ return -EINVAL;
+ }
+
+ rte_spinlock_lock(&rx_adapter->rx_lock);
+ dev_info->cb_fn = cb_fn;
+ dev_info->cb_arg = cb_arg;
+ rte_spinlock_unlock(&rx_adapter->rx_lock);
+
+ return 0;
+}
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.h b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
index 307b2b50..332ee216 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.h
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.h
@@ -63,9 +63,22 @@
* rte_event_eth_rx_adapter_service_id_get() function can be used to retrieve
* the service function ID of the adapter in this case.
*
+ * For SW based packet transfers, i.e., when the
+ * RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT is not set in the adapter's
+ * capabilities flags for a particular ethernet device, the service function
+ * temporarily enqueues mbufs to an event buffer before batch enqueueing these
+ * to the event device. If the buffer fills up, the service function stops
+ * dequeueing packets from the ethernet device. The application may want to
+ * monitor the buffer fill level and instruct the service function to
+ * selectively buffer packets. The application may also use some other
+ * criteria to decide which packets should enter the event device even when
+ * the event buffer fill level is low. The
+ * rte_event_eth_rx_adapter_cb_register() function allows the
+ * application to register a callback that selects which packets to enqueue
+ * to the event device.
+ *
* Note:
- * 1) Interrupt driven receive queues are currently unimplemented.
- * 2) Devices created after an instance of rte_event_eth_rx_adapter_create
+ * 1) Devices created after an instance of rte_event_eth_rx_adapter_create
* should be added to a new instance of the rx adapter.
*/
@@ -199,12 +212,55 @@ struct rte_event_eth_rx_adapter_stats {
* block cycles can be used to compute the percentage of
* cycles the service is blocked by the event device.
*/
+ uint64_t rx_intr_packets;
+ /**< Received packet count for interrupt mode Rx queues */
};
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
+ * Callback function invoked by the SW adapter before it continues
+ * to process packets. The callback is passed the size of the enqueue
+ * buffer in the SW adapter and the occupancy of the buffer. The
+ * callback can use these values to decide which mbufs should be
+ * enqueued to the event device. If the return value of the callback
+ * is less than nb_mbuf then the SW adapter uses the return value to
+ * enqueue enq_mbuf[] to the event device.
+ *
+ * @param eth_dev_id
+ * Port identifier of the Ethernet device.
+ * @param queue_id
+ * Receive queue index.
+ * @param enqueue_buf_size
+ * Total enqueue buffer size.
+ * @param enqueue_buf_count
+ * mbuf count in enqueue buffer.
+ * @param mbuf
+ * mbuf array.
+ * @param nb_mbuf
+ * mbuf count.
+ * @param cb_arg
+ * Callback argument.
+ * @param[out] enq_mbuf
+ * The adapter enqueues enq_mbuf[] if the return value of the
+ * callback is less than nb_mbuf
+ * @return
+ * Returns the number of mbufs should be enqueued to eventdev
+ */
+typedef uint16_t (*rte_event_eth_rx_adapter_cb_fn)(uint16_t eth_dev_id,
+ uint16_t queue_id,
+ uint32_t enqueue_buf_size,
+ uint32_t enqueue_buf_count,
+ struct rte_mbuf **mbuf,
+ uint16_t nb_mbuf,
+ void *cb_arg,
+ struct rte_mbuf **enq_buf);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
* Create a new ethernet Rx event adapter with the specified identifier.
*
* @param id
@@ -425,6 +481,32 @@ int rte_event_eth_rx_adapter_stats_reset(uint8_t id);
*/
int rte_event_eth_rx_adapter_service_id_get(uint8_t id, uint32_t *service_id);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register callback to process Rx packets, this is supported for
+ * SW based packet transfers.
+ * @see rte_event_eth_rx_cb_fn
+ *
+ * @param id
+ * Adapter identifier.
+ * @param eth_dev_id
+ * Port identifier of Ethernet device.
+ * @param cb_fn
+ * Callback function.
+ * @param cb_arg
+ * Callback arg.
+ * @return
+ * - 0: Success
+ * - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_rx_adapter_cb_register(uint8_t id,
+ uint16_t eth_dev_id,
+ rte_event_eth_rx_adapter_cb_fn cb_fn,
+ void *cb_arg);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eventdev/rte_event_ring.c b/lib/librte_eventdev/rte_event_ring.c
index eb67751d..16d02a95 100644
--- a/lib/librte_eventdev/rte_event_ring.c
+++ b/lib/librte_eventdev/rte_event_ring.c
@@ -82,11 +82,16 @@ rte_event_ring_create(const char *name, unsigned int count, int socket_id,
mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags);
if (mz != NULL) {
r = mz->addr;
- /*
- * no need to check return value here, we already checked the
- * arguments above
- */
- rte_event_ring_init(r, name, requested_count, flags);
+ /* Check return value in case rte_ring_init() fails on size */
+ int err = rte_event_ring_init(r, name, requested_count, flags);
+ if (err) {
+ RTE_LOG(ERR, RING, "Ring init failed\n");
+ if (rte_memzone_free(mz) != 0)
+ RTE_LOG(ERR, RING, "Cannot free memzone\n");
+ rte_free(te);
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+ return NULL;
+ }
te->data = (void *) r;
r->r.memzone = mz;
diff --git a/lib/librte_eventdev/rte_event_timer_adapter.c b/lib/librte_eventdev/rte_event_timer_adapter.c
index 6f1d672c..79070d48 100644
--- a/lib/librte_eventdev/rte_event_timer_adapter.c
+++ b/lib/librte_eventdev/rte_event_timer_adapter.c
@@ -1282,9 +1282,7 @@ static const struct rte_event_timer_adapter_ops sw_event_adapter_timer_ops = {
.cancel_burst = sw_event_timer_cancel_burst,
};
-RTE_INIT(event_timer_adapter_init_log);
-static void
-event_timer_adapter_init_log(void)
+RTE_INIT(event_timer_adapter_init_log)
{
evtim_logtype = rte_log_register("lib.eventdev.adapter.timer");
if (evtim_logtype >= 0)
diff --git a/lib/librte_eventdev/rte_eventdev.c b/lib/librte_eventdev/rte_eventdev.c
index 7ca9fd14..801810ed 100644
--- a/lib/librte_eventdev/rte_eventdev.c
+++ b/lib/librte_eventdev/rte_eventdev.c
@@ -57,16 +57,21 @@ int
rte_event_dev_get_dev_id(const char *name)
{
int i;
+ uint8_t cmp;
if (!name)
return -EINVAL;
- for (i = 0; i < rte_eventdev_globals->nb_devs; i++)
- if ((strcmp(rte_event_devices[i].data->name, name)
- == 0) &&
- (rte_event_devices[i].attached ==
- RTE_EVENTDEV_ATTACHED))
+ for (i = 0; i < rte_eventdev_globals->nb_devs; i++) {
+ cmp = (strncmp(rte_event_devices[i].data->name, name,
+ RTE_EVENTDEV_NAME_MAX_LEN) == 0) ||
+ (rte_event_devices[i].dev ? (strncmp(
+ rte_event_devices[i].dev->driver->name, name,
+ RTE_EVENTDEV_NAME_MAX_LEN) == 0) : 0);
+ if (cmp && (rte_event_devices[i].attached ==
+ RTE_EVENTDEV_ATTACHED))
return i;
+ }
return -ENODEV;
}
diff --git a/lib/librte_eventdev/rte_eventdev_version.map b/lib/librte_eventdev/rte_eventdev_version.map
index c3f18d6d..12835e9f 100644
--- a/lib/librte_eventdev/rte_eventdev_version.map
+++ b/lib/librte_eventdev/rte_eventdev_version.map
@@ -83,6 +83,19 @@ DPDK_18.05 {
EXPERIMENTAL {
global:
+ rte_event_crypto_adapter_caps_get;
+ rte_event_crypto_adapter_create;
+ rte_event_crypto_adapter_create_ext;
+ rte_event_crypto_adapter_event_port_get;
+ rte_event_crypto_adapter_free;
+ rte_event_crypto_adapter_queue_pair_add;
+ rte_event_crypto_adapter_queue_pair_del;
+ rte_event_crypto_adapter_service_id_get;
+ rte_event_crypto_adapter_start;
+ rte_event_crypto_adapter_stats_get;
+ rte_event_crypto_adapter_stats_reset;
+ rte_event_crypto_adapter_stop;
+ rte_event_eth_rx_adapter_cb_register;
rte_event_timer_adapter_caps_get;
rte_event_timer_adapter_create;
rte_event_timer_adapter_create_ext;
@@ -97,16 +110,4 @@ EXPERIMENTAL {
rte_event_timer_arm_burst;
rte_event_timer_arm_tmo_tick_burst;
rte_event_timer_cancel_burst;
- rte_event_crypto_adapter_caps_get;
- rte_event_crypto_adapter_create;
- rte_event_crypto_adapter_create_ext;
- rte_event_crypto_adapter_event_port_get;
- rte_event_crypto_adapter_free;
- rte_event_crypto_adapter_queue_pair_add;
- rte_event_crypto_adapter_queue_pair_del;
- rte_event_crypto_adapter_service_id_get;
- rte_event_crypto_adapter_start;
- rte_event_crypto_adapter_stats_get;
- rte_event_crypto_adapter_stats_reset;
- rte_event_crypto_adapter_stop;
};
diff --git a/lib/librte_flow_classify/rte_flow_classify.c b/lib/librte_flow_classify/rte_flow_classify.c
index 591d98e2..4c3469da 100644
--- a/lib/librte_flow_classify/rte_flow_classify.c
+++ b/lib/librte_flow_classify/rte_flow_classify.c
@@ -673,10 +673,7 @@ rte_flow_classifier_query(struct rte_flow_classifier *cls,
return ret;
}
-RTE_INIT(librte_flow_classify_init_log);
-
-static void
-librte_flow_classify_init_log(void)
+RTE_INIT(librte_flow_classify_init_log)
{
librte_flow_classify_logtype =
rte_log_register("lib.flow_classify");
diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
index 3648ec09..1fac53a8 100644
--- a/lib/librte_gso/Makefile
+++ b/lib/librte_gso/Makefile
@@ -19,6 +19,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_udp4.c
# install this header file
SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
index 5ca59745..6cd764ff 100644
--- a/lib/librte_gso/gso_common.h
+++ b/lib/librte_gso/gso_common.h
@@ -31,6 +31,9 @@
(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
PKT_TX_TUNNEL_GRE))
+#define IS_IPV4_UDP(flag) (((flag) & (PKT_TX_UDP_SEG | PKT_TX_IPV4)) == \
+ (PKT_TX_UDP_SEG | PKT_TX_IPV4))
+
/**
* Internal function which updates the UDP header of a packet, following
* segmentation. This is required to update the header's datagram length field.
diff --git a/lib/librte_gso/gso_udp4.c b/lib/librte_gso/gso_udp4.c
new file mode 100644
index 00000000..927dee12
--- /dev/null
+++ b/lib/librte_gso/gso_udp4.c
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include "gso_common.h"
+#include "gso_udp4.h"
+
+#define IPV4_HDR_MF_BIT (1U << 13)
+
+static inline void
+update_ipv4_udp_headers(struct rte_mbuf *pkt, struct rte_mbuf **segs,
+ uint16_t nb_segs)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ uint16_t frag_offset = 0, is_mf;
+ uint16_t l2_hdrlen = pkt->l2_len, l3_hdrlen = pkt->l3_len;
+ uint16_t tail_idx = nb_segs - 1, length, i;
+
+ /*
+ * Update IP header fields for output segments. Specifically,
+ * keep the same IP id, update fragment offset and total
+ * length.
+ */
+ for (i = 0; i < nb_segs; i++) {
+ ipv4_hdr = rte_pktmbuf_mtod_offset(segs[i], struct ipv4_hdr *,
+ l2_hdrlen);
+ length = segs[i]->pkt_len - l2_hdrlen;
+ ipv4_hdr->total_length = rte_cpu_to_be_16(length);
+
+ is_mf = i < tail_idx ? IPV4_HDR_MF_BIT : 0;
+ ipv4_hdr->fragment_offset =
+ rte_cpu_to_be_16(frag_offset | is_mf);
+ frag_offset += ((length - l3_hdrlen) >> 3);
+ }
+}
+
+int
+gso_udp4_segment(struct rte_mbuf *pkt,
+ uint16_t gso_size,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ uint16_t pyld_unit_size, hdr_offset;
+ uint16_t frag_off;
+ int ret;
+
+ /* Don't process the fragmented packet */
+ ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
+ pkt->l2_len);
+ frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+ if (unlikely(IS_FRAGMENTED(frag_off))) {
+ pkts_out[0] = pkt;
+ return 1;
+ }
+
+ /*
+ * UDP fragmentation is the same as IP fragmentation.
+ * Except the first one, other output packets just have l2
+ * and l3 headers.
+ */
+ hdr_offset = pkt->l2_len + pkt->l3_len;
+
+ /* Don't process the packet without data. */
+ if (unlikely(hdr_offset + pkt->l4_len >= pkt->pkt_len)) {
+ pkts_out[0] = pkt;
+ return 1;
+ }
+
+ pyld_unit_size = gso_size - hdr_offset;
+
+ /* Segment the payload */
+ ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+ indirect_pool, pkts_out, nb_pkts_out);
+ if (ret > 1)
+ update_ipv4_udp_headers(pkt, pkts_out, ret);
+
+ return ret;
+}
diff --git a/lib/librte_gso/gso_udp4.h b/lib/librte_gso/gso_udp4.h
new file mode 100644
index 00000000..b2a2908e
--- /dev/null
+++ b/lib/librte_gso/gso_udp4.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _GSO_UDP4_H_
+#define _GSO_UDP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment an UDP/IPv4 packet. This function doesn't check if the input
+ * packet has correct checksums, and doesn't update checksums for output
+ * GSO segments. Furthermore, it doesn't process IP fragment packets.
+ *
+ * @param pkt
+ * The packet mbuf to segment.
+ * @param gso_size
+ * The max length of a GSO segment, measured in bytes.
+ * @param direct_pool
+ * MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ * MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ * Pointer array used to store the MBUF addresses of output GSO
+ * segments, when the function succeeds. If the memory space in
+ * pkts_out is insufficient, it fails and returns -EINVAL.
+ * @param nb_pkts_out
+ * The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ * - The number of GSO segments filled in pkts_out on success.
+ * - Return -ENOMEM if run out of memory in MBUF pools.
+ * - Return -EINVAL for invalid parameters.
+ */
+int gso_udp4_segment(struct rte_mbuf *pkt,
+ uint16_t gso_size,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out);
+#endif
diff --git a/lib/librte_gso/meson.build b/lib/librte_gso/meson.build
index 056534fb..ad8dd858 100644
--- a/lib/librte_gso/meson.build
+++ b/lib/librte_gso/meson.build
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation
-sources = files('gso_common.c', 'gso_tcp4.c',
+sources = files('gso_common.c', 'gso_tcp4.c', 'gso_udp4.c',
'gso_tunnel_tcp4.c', 'rte_gso.c')
headers = files('rte_gso.h')
deps += ['ethdev']
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
index a44e3d43..751b5b62 100644
--- a/lib/librte_gso/rte_gso.c
+++ b/lib/librte_gso/rte_gso.c
@@ -11,6 +11,17 @@
#include "gso_common.h"
#include "gso_tcp4.h"
#include "gso_tunnel_tcp4.h"
+#include "gso_udp4.h"
+
+#define ILLEGAL_UDP_GSO_CTX(ctx) \
+ ((((ctx)->gso_types & DEV_TX_OFFLOAD_UDP_TSO) == 0) || \
+ (ctx)->gso_size < RTE_GSO_UDP_SEG_SIZE_MIN)
+
+#define ILLEGAL_TCP_GSO_CTX(ctx) \
+ ((((ctx)->gso_types & (DEV_TX_OFFLOAD_TCP_TSO | \
+ DEV_TX_OFFLOAD_VXLAN_TNL_TSO | \
+ DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0) || \
+ (ctx)->gso_size < RTE_GSO_SEG_SIZE_MIN)
int
rte_gso_segment(struct rte_mbuf *pkt,
@@ -27,14 +38,12 @@ rte_gso_segment(struct rte_mbuf *pkt,
if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL ||
nb_pkts_out < 1 ||
- gso_ctx->gso_size < RTE_GSO_SEG_SIZE_MIN ||
- ((gso_ctx->gso_types & (DEV_TX_OFFLOAD_TCP_TSO |
- DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
- DEV_TX_OFFLOAD_GRE_TNL_TSO)) == 0))
+ (ILLEGAL_UDP_GSO_CTX(gso_ctx) &&
+ ILLEGAL_TCP_GSO_CTX(gso_ctx)))
return -EINVAL;
if (gso_ctx->gso_size >= pkt->pkt_len) {
- pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+ pkt->ol_flags &= (~(PKT_TX_TCP_SEG | PKT_TX_UDP_SEG));
pkts_out[0] = pkt;
return 1;
}
@@ -59,6 +68,11 @@ rte_gso_segment(struct rte_mbuf *pkt,
ret = gso_tcp4_segment(pkt, gso_size, ipid_delta,
direct_pool, indirect_pool,
pkts_out, nb_pkts_out);
+ } else if (IS_IPV4_UDP(pkt->ol_flags) &&
+ (gso_ctx->gso_types & DEV_TX_OFFLOAD_UDP_TSO)) {
+ pkt->ol_flags &= (~PKT_TX_UDP_SEG);
+ ret = gso_udp4_segment(pkt, gso_size, direct_pool,
+ indirect_pool, pkts_out, nb_pkts_out);
} else {
/* unsupported packet, skip */
pkts_out[0] = pkt;
diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h
index f4abd61c..a626a11e 100644
--- a/lib/librte_gso/rte_gso.h
+++ b/lib/librte_gso/rte_gso.h
@@ -17,10 +17,14 @@ extern "C" {
#include <stdint.h>
#include <rte_mbuf.h>
-/* Minimum GSO segment size. */
+/* Minimum GSO segment size for TCP based packets. */
#define RTE_GSO_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \
sizeof(struct ipv4_hdr) + sizeof(struct tcp_hdr) + 1)
+/* Minimum GSO segment size for UDP based packets. */
+#define RTE_GSO_UDP_SEG_SIZE_MIN (sizeof(struct ether_hdr) + \
+ sizeof(struct ipv4_hdr) + sizeof(struct udp_hdr) + 1)
+
/* GSO flags for rte_gso_ctx. */
#define RTE_GSO_FLAG_IPID_FIXED (1ULL << 0)
/**< Use fixed IP ids for output GSO segments. Setting
diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build
index e139e1d7..efc06ede 100644
--- a/lib/librte_hash/meson.build
+++ b/lib/librte_hash/meson.build
@@ -6,7 +6,6 @@ headers = files('rte_cmp_arm64.h',
'rte_cmp_x86.h',
'rte_crc_arm64.h',
'rte_cuckoo_hash.h',
- 'rte_cuckoo_hash_x86.h',
'rte_fbk_hash.h',
'rte_hash_crc.h',
'rte_hash.h',
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index a07543a2..f7b86c8c 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -31,9 +31,6 @@
#include "rte_hash.h"
#include "rte_cuckoo_hash.h"
-#if defined(RTE_ARCH_X86)
-#include "rte_cuckoo_hash_x86.h"
-#endif
TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
@@ -93,8 +90,10 @@ rte_hash_create(const struct rte_hash_parameters *params)
void *buckets = NULL;
char ring_name[RTE_RING_NAMESIZE];
unsigned num_key_slots;
- unsigned hw_trans_mem_support = 0;
unsigned i;
+ unsigned int hw_trans_mem_support = 0, multi_writer_support = 0;
+ unsigned int readwrite_concur_support = 0;
+
rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
@@ -107,7 +106,6 @@ rte_hash_create(const struct rte_hash_parameters *params)
/* Check for valid parameters */
if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
(params->entries < RTE_HASH_BUCKET_ENTRIES) ||
- !rte_is_power_of_2(RTE_HASH_BUCKET_ENTRIES) ||
(params->key_len == 0)) {
rte_errno = EINVAL;
RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
@@ -118,21 +116,29 @@ rte_hash_create(const struct rte_hash_parameters *params)
if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
hw_trans_mem_support = 1;
+ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
+ multi_writer_support = 1;
+
+ if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
+ readwrite_concur_support = 1;
+ multi_writer_support = 1;
+ }
+
/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
- if (hw_trans_mem_support)
+ if (multi_writer_support)
/*
* Increase number of slots by total number of indices
* that can be stored in the lcore caches
* except for the first cache
*/
num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
- LCORE_CACHE_SIZE + 1;
+ (LCORE_CACHE_SIZE - 1) + 1;
else
num_key_slots = params->entries + 1;
snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
/* Create ring (Dummy slot index is not enqueued) */
- r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1),
+ r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
params->socket_id, 0);
if (r == NULL) {
RTE_LOG(ERR, HASH, "memory allocation failed\n");
@@ -233,7 +239,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
h->cmp_jump_table_idx = KEY_OTHER_BYTES;
#endif
- if (hw_trans_mem_support) {
+ if (multi_writer_support) {
h->local_free_slots = rte_zmalloc_socket(NULL,
sizeof(struct lcore_cache) * RTE_MAX_LCORE,
RTE_CACHE_LINE_SIZE, params->socket_id);
@@ -261,6 +267,8 @@ rte_hash_create(const struct rte_hash_parameters *params)
h->key_store = k;
h->free_slots = r;
h->hw_trans_mem_support = hw_trans_mem_support;
+ h->multi_writer_support = multi_writer_support;
+ h->readwrite_concur_support = readwrite_concur_support;
#if defined(RTE_ARCH_X86)
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
@@ -271,24 +279,20 @@ rte_hash_create(const struct rte_hash_parameters *params)
#endif
h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
- /* Turn on multi-writer only with explicit flat from user and TM
+ /* Turn on multi-writer only with explicit flag from user and TM
* support.
*/
- if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
- if (h->hw_trans_mem_support) {
- h->add_key = ADD_KEY_MULTIWRITER_TM;
- } else {
- h->add_key = ADD_KEY_MULTIWRITER;
- h->multiwriter_lock = rte_malloc(NULL,
- sizeof(rte_spinlock_t),
- LCORE_CACHE_SIZE);
- rte_spinlock_init(h->multiwriter_lock);
- }
- } else
- h->add_key = ADD_KEY_SINGLEWRITER;
+ if (h->multi_writer_support) {
+ h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
+ RTE_CACHE_LINE_SIZE);
+ if (h->readwrite_lock == NULL)
+ goto err_unlock;
+
+ rte_rwlock_init(h->readwrite_lock);
+ }
/* Populate free slots ring. Entry zero is reserved for key misses. */
- for (i = 1; i < params->entries + 1; i++)
+ for (i = 1; i < num_key_slots; i++)
rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
te->data = (void *) h;
@@ -335,11 +339,10 @@ rte_hash_free(struct rte_hash *h)
rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
- if (h->hw_trans_mem_support)
+ if (h->multi_writer_support) {
rte_free(h->local_free_slots);
-
- if (h->add_key == ADD_KEY_MULTIWRITER)
- rte_free(h->multiwriter_lock);
+ rte_free(h->readwrite_lock);
+ }
rte_ring_free(h->free_slots);
rte_free(h->key_store);
rte_free(h->buckets);
@@ -366,15 +369,78 @@ rte_hash_secondary_hash(const hash_sig_t primary_hash)
return primary_hash ^ ((tag + 1) * alt_bits_xor);
}
+int32_t
+rte_hash_count(const struct rte_hash *h)
+{
+ uint32_t tot_ring_cnt, cached_cnt = 0;
+ uint32_t i, ret;
+
+ if (h == NULL)
+ return -EINVAL;
+
+ if (h->multi_writer_support) {
+ tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
+ (LCORE_CACHE_SIZE - 1);
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ cached_cnt += h->local_free_slots[i].len;
+
+ ret = tot_ring_cnt - rte_ring_count(h->free_slots) -
+ cached_cnt;
+ } else {
+ tot_ring_cnt = h->entries;
+ ret = tot_ring_cnt - rte_ring_count(h->free_slots);
+ }
+ return ret;
+}
+
+/* Read write locks implemented using rte_rwlock */
+static inline void
+__hash_rw_writer_lock(const struct rte_hash *h)
+{
+ if (h->multi_writer_support && h->hw_trans_mem_support)
+ rte_rwlock_write_lock_tm(h->readwrite_lock);
+ else if (h->multi_writer_support)
+ rte_rwlock_write_lock(h->readwrite_lock);
+}
+
+
+static inline void
+__hash_rw_reader_lock(const struct rte_hash *h)
+{
+ if (h->readwrite_concur_support && h->hw_trans_mem_support)
+ rte_rwlock_read_lock_tm(h->readwrite_lock);
+ else if (h->readwrite_concur_support)
+ rte_rwlock_read_lock(h->readwrite_lock);
+}
+
+static inline void
+__hash_rw_writer_unlock(const struct rte_hash *h)
+{
+ if (h->multi_writer_support && h->hw_trans_mem_support)
+ rte_rwlock_write_unlock_tm(h->readwrite_lock);
+ else if (h->multi_writer_support)
+ rte_rwlock_write_unlock(h->readwrite_lock);
+}
+
+static inline void
+__hash_rw_reader_unlock(const struct rte_hash *h)
+{
+ if (h->readwrite_concur_support && h->hw_trans_mem_support)
+ rte_rwlock_read_unlock_tm(h->readwrite_lock);
+ else if (h->readwrite_concur_support)
+ rte_rwlock_read_unlock(h->readwrite_lock);
+}
+
void
rte_hash_reset(struct rte_hash *h)
{
void *ptr;
- unsigned i;
+ uint32_t tot_ring_cnt, i;
if (h == NULL)
return;
+ __hash_rw_writer_lock(h);
memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
@@ -383,97 +449,260 @@ rte_hash_reset(struct rte_hash *h)
rte_pause();
/* Repopulate the free slots ring. Entry zero is reserved for key misses */
- for (i = 1; i < h->entries + 1; i++)
+ if (h->multi_writer_support)
+ tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
+ (LCORE_CACHE_SIZE - 1);
+ else
+ tot_ring_cnt = h->entries;
+
+ for (i = 1; i < tot_ring_cnt + 1; i++)
rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
- if (h->hw_trans_mem_support) {
+ if (h->multi_writer_support) {
/* Reset local caches per lcore */
for (i = 0; i < RTE_MAX_LCORE; i++)
h->local_free_slots[i].len = 0;
}
+ __hash_rw_writer_unlock(h);
}
-/* Search for an entry that can be pushed to its alternative location */
-static inline int
-make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt,
- unsigned int *nr_pushes)
+/*
+ * Function called to enqueue back an index in the cache/ring,
+ * as slot has not being used and it can be used in the
+ * next addition attempt.
+ */
+static inline void
+enqueue_slot_back(const struct rte_hash *h,
+ struct lcore_cache *cached_free_slots,
+ void *slot_id)
{
- unsigned i, j;
- int ret;
- uint32_t next_bucket_idx;
- struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES];
+ if (h->multi_writer_support) {
+ cached_free_slots->objs[cached_free_slots->len] = slot_id;
+ cached_free_slots->len++;
+ } else
+ rte_ring_sp_enqueue(h->free_slots, slot_id);
+}
+
+/* Search a key from bucket and update its data */
+static inline int32_t
+search_and_update(const struct rte_hash *h, void *data, const void *key,
+ struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash)
+{
+ int i;
+ struct rte_hash_key *k, *keys = h->key_store;
- /*
- * Push existing item (search for bucket with space in
- * alternative locations) to its alternative location
- */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- /* Search for space in alternative locations */
- next_bucket_idx = bkt->sig_alt[i] & h->bucket_bitmask;
- next_bkt[i] = &h->buckets[next_bucket_idx];
- for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
- if (next_bkt[i]->key_idx[j] == EMPTY_SLOT)
- break;
+ if (bkt->sig_current[i] == sig &&
+ bkt->sig_alt[i] == alt_hash) {
+ k = (struct rte_hash_key *) ((char *)keys +
+ bkt->key_idx[i] * h->key_entry_size);
+ if (rte_hash_cmp_eq(key, k->key, h) == 0) {
+ /* Update data */
+ k->pdata = data;
+ /*
+ * Return index where key is stored,
+ * subtracting the first dummy index
+ */
+ return bkt->key_idx[i] - 1;
+ }
}
-
- if (j != RTE_HASH_BUCKET_ENTRIES)
- break;
}
+ return -1;
+}
- /* Alternative location has spare room (end of recursive function) */
- if (i != RTE_HASH_BUCKET_ENTRIES) {
- next_bkt[i]->sig_alt[j] = bkt->sig_current[i];
- next_bkt[i]->sig_current[j] = bkt->sig_alt[i];
- next_bkt[i]->key_idx[j] = bkt->key_idx[i];
- return i;
+/* Only tries to insert at one bucket (@prim_bkt) without trying to push
+ * buckets around.
+ * return 1 if matching existing key, return 0 if succeeds, return -1 for no
+ * empty entry.
+ */
+static inline int32_t
+rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
+ struct rte_hash_bucket *prim_bkt,
+ struct rte_hash_bucket *sec_bkt,
+ const struct rte_hash_key *key, void *data,
+ hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+ int32_t *ret_val)
+{
+ unsigned int i;
+ struct rte_hash_bucket *cur_bkt = prim_bkt;
+ int32_t ret;
+
+ __hash_rw_writer_lock(h);
+ /* Check if key was inserted after last check but before this
+ * protected region in case of inserting duplicated keys.
+ */
+ ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+ if (ret != -1) {
+ __hash_rw_writer_unlock(h);
+ *ret_val = ret;
+ return 1;
+ }
+ ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
+ if (ret != -1) {
+ __hash_rw_writer_unlock(h);
+ *ret_val = ret;
+ return 1;
}
- /* Pick entry that has not been pushed yet */
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++)
- if (bkt->flag[i] == 0)
+ /* Insert new entry if there is room in the primary
+ * bucket.
+ */
+ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+ /* Check if slot is available */
+ if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
+ prim_bkt->sig_current[i] = sig;
+ prim_bkt->sig_alt[i] = alt_hash;
+ prim_bkt->key_idx[i] = new_idx;
break;
+ }
+ }
+ __hash_rw_writer_unlock(h);
- /* All entries have been pushed, so entry cannot be added */
- if (i == RTE_HASH_BUCKET_ENTRIES || ++(*nr_pushes) > RTE_HASH_MAX_PUSHES)
- return -ENOSPC;
+ if (i != RTE_HASH_BUCKET_ENTRIES)
+ return 0;
- /* Set flag to indicate that this entry is going to be pushed */
- bkt->flag[i] = 1;
+ /* no empty entry */
+ return -1;
+}
- /* Need room in alternative bucket to insert the pushed entry */
- ret = make_space_bucket(h, next_bkt[i], nr_pushes);
- /*
- * After recursive function.
- * Clear flags and insert the pushed entry
- * in its alternative location if successful,
- * or return error
+/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
+ * the path head with new entry (sig, alt_hash, new_idx)
+ * return 1 if matched key found, return -1 if cuckoo path invalided and fail,
+ * return 0 if succeeds.
+ */
+static inline int
+rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
+ struct rte_hash_bucket *bkt,
+ struct rte_hash_bucket *alt_bkt,
+ const struct rte_hash_key *key, void *data,
+ struct queue_node *leaf, uint32_t leaf_slot,
+ hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+ int32_t *ret_val)
+{
+ uint32_t prev_alt_bkt_idx;
+ struct rte_hash_bucket *cur_bkt = bkt;
+ struct queue_node *prev_node, *curr_node = leaf;
+ struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
+ uint32_t prev_slot, curr_slot = leaf_slot;
+ int32_t ret;
+
+ __hash_rw_writer_lock(h);
+
+ /* In case empty slot was gone before entering protected region */
+ if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) {
+ __hash_rw_writer_unlock(h);
+ return -1;
+ }
+
+ /* Check if key was inserted after last check but before this
+ * protected region.
*/
- bkt->flag[i] = 0;
- if (ret >= 0) {
- next_bkt[i]->sig_alt[ret] = bkt->sig_current[i];
- next_bkt[i]->sig_current[ret] = bkt->sig_alt[i];
- next_bkt[i]->key_idx[ret] = bkt->key_idx[i];
- return i;
- } else
- return ret;
+ ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+ if (ret != -1) {
+ __hash_rw_writer_unlock(h);
+ *ret_val = ret;
+ return 1;
+ }
+
+ ret = search_and_update(h, data, key, alt_bkt, alt_hash, sig);
+ if (ret != -1) {
+ __hash_rw_writer_unlock(h);
+ *ret_val = ret;
+ return 1;
+ }
+
+ while (likely(curr_node->prev != NULL)) {
+ prev_node = curr_node->prev;
+ prev_bkt = prev_node->bkt;
+ prev_slot = curr_node->prev_slot;
+
+ prev_alt_bkt_idx =
+ prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask;
+
+ if (unlikely(&h->buckets[prev_alt_bkt_idx]
+ != curr_bkt)) {
+ /* revert it to empty, otherwise duplicated keys */
+ curr_bkt->key_idx[curr_slot] = EMPTY_SLOT;
+ __hash_rw_writer_unlock(h);
+ return -1;
+ }
+
+ /* Need to swap current/alt sig to allow later
+ * Cuckoo insert to move elements back to its
+ * primary bucket if available
+ */
+ curr_bkt->sig_alt[curr_slot] =
+ prev_bkt->sig_current[prev_slot];
+ curr_bkt->sig_current[curr_slot] =
+ prev_bkt->sig_alt[prev_slot];
+ curr_bkt->key_idx[curr_slot] =
+ prev_bkt->key_idx[prev_slot];
+
+ curr_slot = prev_slot;
+ curr_node = prev_node;
+ curr_bkt = curr_node->bkt;
+ }
+
+ curr_bkt->sig_current[curr_slot] = sig;
+ curr_bkt->sig_alt[curr_slot] = alt_hash;
+ curr_bkt->key_idx[curr_slot] = new_idx;
+
+ __hash_rw_writer_unlock(h);
+
+ return 0;
}
/*
- * Function called to enqueue back an index in the cache/ring,
- * as slot has not being used and it can be used in the
- * next addition attempt.
+ * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
+ * Cuckoo
*/
-static inline void
-enqueue_slot_back(const struct rte_hash *h,
- struct lcore_cache *cached_free_slots,
- void *slot_id)
+static inline int
+rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
+ struct rte_hash_bucket *bkt,
+ struct rte_hash_bucket *sec_bkt,
+ const struct rte_hash_key *key, void *data,
+ hash_sig_t sig, hash_sig_t alt_hash,
+ uint32_t new_idx, int32_t *ret_val)
{
- if (h->hw_trans_mem_support) {
- cached_free_slots->objs[cached_free_slots->len] = slot_id;
- cached_free_slots->len++;
- } else
- rte_ring_sp_enqueue(h->free_slots, slot_id);
+ unsigned int i;
+ struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
+ struct queue_node *tail, *head;
+ struct rte_hash_bucket *curr_bkt, *alt_bkt;
+
+ tail = queue;
+ head = queue + 1;
+ tail->bkt = bkt;
+ tail->prev = NULL;
+ tail->prev_slot = -1;
+
+ /* Cuckoo bfs Search */
+ while (likely(tail != head && head <
+ queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
+ RTE_HASH_BUCKET_ENTRIES)) {
+ curr_bkt = tail->bkt;
+ for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+ if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
+ int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
+ bkt, sec_bkt, key, data,
+ tail, i, sig, alt_hash,
+ new_idx, ret_val);
+ if (likely(ret != -1))
+ return ret;
+ }
+
+ /* Enqueue new node and keep prev node info */
+ alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
+ & h->bucket_bitmask]);
+ head->bkt = alt_bkt;
+ head->prev = tail;
+ head->prev_slot = i;
+ head++;
+ }
+ tail++;
+ }
+
+ return -ENOSPC;
}
static inline int32_t
@@ -482,19 +711,15 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
{
hash_sig_t alt_hash;
uint32_t prim_bucket_idx, sec_bucket_idx;
- unsigned i;
struct rte_hash_bucket *prim_bkt, *sec_bkt;
- struct rte_hash_key *new_k, *k, *keys = h->key_store;
+ struct rte_hash_key *new_k, *keys = h->key_store;
void *slot_id = NULL;
uint32_t new_idx;
int ret;
unsigned n_slots;
unsigned lcore_id;
struct lcore_cache *cached_free_slots = NULL;
- unsigned int nr_pushes = 0;
-
- if (h->add_key == ADD_KEY_MULTIWRITER)
- rte_spinlock_lock(h->multiwriter_lock);
+ int32_t ret_val;
prim_bucket_idx = sig & h->bucket_bitmask;
prim_bkt = &h->buckets[prim_bucket_idx];
@@ -505,8 +730,24 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
sec_bkt = &h->buckets[sec_bucket_idx];
rte_prefetch0(sec_bkt);
- /* Get a new slot for storing the new key */
- if (h->hw_trans_mem_support) {
+ /* Check if key is already inserted in primary location */
+ __hash_rw_writer_lock(h);
+ ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+ if (ret != -1) {
+ __hash_rw_writer_unlock(h);
+ return ret;
+ }
+
+ /* Check if key is already inserted in secondary location */
+ ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
+ if (ret != -1) {
+ __hash_rw_writer_unlock(h);
+ return ret;
+ }
+ __hash_rw_writer_unlock(h);
+
+ /* Did not find a match, so get a new slot for storing the new key */
+ if (h->multi_writer_support) {
lcore_id = rte_lcore_id();
cached_free_slots = &h->local_free_slots[lcore_id];
/* Try to get a free slot from the local cache */
@@ -516,8 +757,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
cached_free_slots->objs,
LCORE_CACHE_SIZE, NULL);
if (n_slots == 0) {
- ret = -ENOSPC;
- goto failure;
+ return -ENOSPC;
}
cached_free_slots->len += n_slots;
@@ -528,124 +768,50 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
slot_id = cached_free_slots->objs[cached_free_slots->len];
} else {
if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) {
- ret = -ENOSPC;
- goto failure;
+ return -ENOSPC;
}
}
new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size);
- rte_prefetch0(new_k);
new_idx = (uint32_t)((uintptr_t) slot_id);
-
- /* Check if key is already inserted in primary location */
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (prim_bkt->sig_current[i] == sig &&
- prim_bkt->sig_alt[i] == alt_hash) {
- k = (struct rte_hash_key *) ((char *)keys +
- prim_bkt->key_idx[i] * h->key_entry_size);
- if (rte_hash_cmp_eq(key, k->key, h) == 0) {
- /* Enqueue index of free slot back in the ring. */
- enqueue_slot_back(h, cached_free_slots, slot_id);
- /* Update data */
- k->pdata = data;
- /*
- * Return index where key is stored,
- * subtracting the first dummy index
- */
- ret = prim_bkt->key_idx[i] - 1;
- goto failure;
- }
- }
- }
-
- /* Check if key is already inserted in secondary location */
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (sec_bkt->sig_alt[i] == sig &&
- sec_bkt->sig_current[i] == alt_hash) {
- k = (struct rte_hash_key *) ((char *)keys +
- sec_bkt->key_idx[i] * h->key_entry_size);
- if (rte_hash_cmp_eq(key, k->key, h) == 0) {
- /* Enqueue index of free slot back in the ring. */
- enqueue_slot_back(h, cached_free_slots, slot_id);
- /* Update data */
- k->pdata = data;
- /*
- * Return index where key is stored,
- * subtracting the first dummy index
- */
- ret = sec_bkt->key_idx[i] - 1;
- goto failure;
- }
- }
- }
-
/* Copy key */
rte_memcpy(new_k->key, key, h->key_len);
new_k->pdata = data;
-#if defined(RTE_ARCH_X86) /* currently only x86 support HTM */
- if (h->add_key == ADD_KEY_MULTIWRITER_TM) {
- ret = rte_hash_cuckoo_insert_mw_tm(prim_bkt,
- sig, alt_hash, new_idx);
- if (ret >= 0)
- return new_idx - 1;
- /* Primary bucket full, need to make space for new entry */
- ret = rte_hash_cuckoo_make_space_mw_tm(h, prim_bkt, sig,
- alt_hash, new_idx);
+ /* Find an empty slot and insert */
+ ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
+ sig, alt_hash, new_idx, &ret_val);
+ if (ret == 0)
+ return new_idx - 1;
+ else if (ret == 1) {
+ enqueue_slot_back(h, cached_free_slots, slot_id);
+ return ret_val;
+ }
- if (ret >= 0)
- return new_idx - 1;
+ /* Primary bucket full, need to make space for new entry */
+ ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
+ sig, alt_hash, new_idx, &ret_val);
+ if (ret == 0)
+ return new_idx - 1;
+ else if (ret == 1) {
+ enqueue_slot_back(h, cached_free_slots, slot_id);
+ return ret_val;
+ }
- /* Also search secondary bucket to get better occupancy */
- ret = rte_hash_cuckoo_make_space_mw_tm(h, sec_bkt, sig,
- alt_hash, new_idx);
+ /* Also search secondary bucket to get better occupancy */
+ ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
+ alt_hash, sig, new_idx, &ret_val);
- if (ret >= 0)
- return new_idx - 1;
+ if (ret == 0)
+ return new_idx - 1;
+ else if (ret == 1) {
+ enqueue_slot_back(h, cached_free_slots, slot_id);
+ return ret_val;
} else {
-#endif
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- /* Check if slot is available */
- if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
- prim_bkt->sig_current[i] = sig;
- prim_bkt->sig_alt[i] = alt_hash;
- prim_bkt->key_idx[i] = new_idx;
- break;
- }
- }
-
- if (i != RTE_HASH_BUCKET_ENTRIES) {
- if (h->add_key == ADD_KEY_MULTIWRITER)
- rte_spinlock_unlock(h->multiwriter_lock);
- return new_idx - 1;
- }
-
- /* Primary bucket full, need to make space for new entry
- * After recursive function.
- * Insert the new entry in the position of the pushed entry
- * if successful or return error and
- * store the new slot back in the ring
- */
- ret = make_space_bucket(h, prim_bkt, &nr_pushes);
- if (ret >= 0) {
- prim_bkt->sig_current[ret] = sig;
- prim_bkt->sig_alt[ret] = alt_hash;
- prim_bkt->key_idx[ret] = new_idx;
- if (h->add_key == ADD_KEY_MULTIWRITER)
- rte_spinlock_unlock(h->multiwriter_lock);
- return new_idx - 1;
- }
-#if defined(RTE_ARCH_X86)
+ enqueue_slot_back(h, cached_free_slots, slot_id);
+ return ret;
}
-#endif
- /* Error in addition, store new slot back in the ring and return error */
- enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx));
-
-failure:
- if (h->add_key == ADD_KEY_MULTIWRITER)
- rte_spinlock_unlock(h->multiwriter_lock);
- return ret;
}
int32_t
@@ -690,20 +856,15 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
else
return ret;
}
+
+/* Search one bucket to find the match key */
static inline int32_t
-__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
- hash_sig_t sig, void **data)
+search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
+ void **data, const struct rte_hash_bucket *bkt)
{
- uint32_t bucket_idx;
- hash_sig_t alt_hash;
- unsigned i;
- struct rte_hash_bucket *bkt;
+ int i;
struct rte_hash_key *k, *keys = h->key_store;
- bucket_idx = sig & h->bucket_bitmask;
- bkt = &h->buckets[bucket_idx];
-
- /* Check if key is in primary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
if (bkt->sig_current[i] == sig &&
bkt->key_idx[i] != EMPTY_SLOT) {
@@ -720,30 +881,41 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
}
}
}
+ return -1;
+}
+
+static inline int32_t
+__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
+ hash_sig_t sig, void **data)
+{
+ uint32_t bucket_idx;
+ hash_sig_t alt_hash;
+ struct rte_hash_bucket *bkt;
+ int ret;
+
+ bucket_idx = sig & h->bucket_bitmask;
+ bkt = &h->buckets[bucket_idx];
+ __hash_rw_reader_lock(h);
+
+ /* Check if key is in primary location */
+ ret = search_one_bucket(h, key, sig, data, bkt);
+ if (ret != -1) {
+ __hash_rw_reader_unlock(h);
+ return ret;
+ }
/* Calculate secondary hash */
alt_hash = rte_hash_secondary_hash(sig);
bucket_idx = alt_hash & h->bucket_bitmask;
bkt = &h->buckets[bucket_idx];
/* Check if key is in secondary location */
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (bkt->sig_current[i] == alt_hash &&
- bkt->sig_alt[i] == sig) {
- k = (struct rte_hash_key *) ((char *)keys +
- bkt->key_idx[i] * h->key_entry_size);
- if (rte_hash_cmp_eq(key, k->key, h) == 0) {
- if (data != NULL)
- *data = k->pdata;
- /*
- * Return index where key is stored,
- * subtracting the first dummy index
- */
- return bkt->key_idx[i] - 1;
- }
- }
+ ret = search_one_bucket(h, key, alt_hash, data, bkt);
+ if (ret != -1) {
+ __hash_rw_reader_unlock(h);
+ return ret;
}
-
+ __hash_rw_reader_unlock(h);
return -ENOENT;
}
@@ -785,7 +957,7 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
bkt->sig_current[i] = NULL_SIGNATURE;
bkt->sig_alt[i] = NULL_SIGNATURE;
- if (h->hw_trans_mem_support) {
+ if (h->multi_writer_support) {
lcore_id = rte_lcore_id();
cached_free_slots = &h->local_free_slots[lcore_id];
/* Cache full, need to free it. */
@@ -806,20 +978,15 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
}
}
+/* Search one bucket and remove the matched key */
static inline int32_t
-__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
- hash_sig_t sig)
+search_and_remove(const struct rte_hash *h, const void *key,
+ struct rte_hash_bucket *bkt, hash_sig_t sig)
{
- uint32_t bucket_idx;
- hash_sig_t alt_hash;
- unsigned i;
- struct rte_hash_bucket *bkt;
struct rte_hash_key *k, *keys = h->key_store;
+ unsigned int i;
int32_t ret;
- bucket_idx = sig & h->bucket_bitmask;
- bkt = &h->buckets[bucket_idx];
-
/* Check if key is in primary location */
for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
if (bkt->sig_current[i] == sig &&
@@ -839,32 +1006,42 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
}
}
}
+ return -1;
+}
+
+static inline int32_t
+__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
+ hash_sig_t sig)
+{
+ uint32_t bucket_idx;
+ hash_sig_t alt_hash;
+ struct rte_hash_bucket *bkt;
+ int32_t ret;
+
+ bucket_idx = sig & h->bucket_bitmask;
+ bkt = &h->buckets[bucket_idx];
+
+ __hash_rw_writer_lock(h);
+ /* look for key in primary bucket */
+ ret = search_and_remove(h, key, bkt, sig);
+ if (ret != -1) {
+ __hash_rw_writer_unlock(h);
+ return ret;
+ }
/* Calculate secondary hash */
alt_hash = rte_hash_secondary_hash(sig);
bucket_idx = alt_hash & h->bucket_bitmask;
bkt = &h->buckets[bucket_idx];
- /* Check if key is in secondary location */
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (bkt->sig_current[i] == alt_hash &&
- bkt->key_idx[i] != EMPTY_SLOT) {
- k = (struct rte_hash_key *) ((char *)keys +
- bkt->key_idx[i] * h->key_entry_size);
- if (rte_hash_cmp_eq(key, k->key, h) == 0) {
- remove_entry(h, bkt, i);
-
- /*
- * Return index where key is stored,
- * subtracting the first dummy index
- */
- ret = bkt->key_idx[i] - 1;
- bkt->key_idx[i] = EMPTY_SLOT;
- return ret;
- }
- }
+ /* look for key in secondary bucket */
+ ret = search_and_remove(h, key, bkt, alt_hash);
+ if (ret != -1) {
+ __hash_rw_writer_unlock(h);
+ return ret;
}
+ __hash_rw_writer_unlock(h);
return -ENOENT;
}
@@ -1006,6 +1183,7 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
rte_prefetch0(secondary_bkt[i]);
}
+ __hash_rw_reader_lock(h);
/* Compare signatures and prefetch key slot of first hit */
for (i = 0; i < num_keys; i++) {
compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
@@ -1088,6 +1266,8 @@ next_key:
continue;
}
+ __hash_rw_reader_unlock(h);
+
if (hit_mask != NULL)
*hit_mask = hits;
}
@@ -1146,7 +1326,7 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
idx = *next % RTE_HASH_BUCKET_ENTRIES;
}
-
+ __hash_rw_reader_lock(h);
/* Get position of entry in key table */
position = h->buckets[bucket_idx].key_idx[idx];
next_key = (struct rte_hash_key *) ((char *)h->key_store +
@@ -1155,6 +1335,8 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
*key = next_key->key;
*data = next_key->pdata;
+ __hash_rw_reader_unlock(h);
+
/* Increment iterator */
(*next)++;
diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h
index 7a54e555..b43f467d 100644
--- a/lib/librte_hash/rte_cuckoo_hash.h
+++ b/lib/librte_hash/rte_cuckoo_hash.h
@@ -88,15 +88,14 @@ const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
#endif
-enum add_key_case {
- ADD_KEY_SINGLEWRITER = 0,
- ADD_KEY_MULTIWRITER,
- ADD_KEY_MULTIWRITER_TM,
-};
/** Number of items per bucket. */
#define RTE_HASH_BUCKET_ENTRIES 8
+#if !RTE_IS_POWER_OF_2(RTE_HASH_BUCKET_ENTRIES)
+#error RTE_HASH_BUCKET_ENTRIES must be a power of 2
+#endif
+
#define NULL_SIGNATURE 0
#define EMPTY_SLOT 0
@@ -155,18 +154,20 @@ struct rte_hash {
struct rte_ring *free_slots;
/**< Ring that stores all indexes of the free slots in the key table */
- uint8_t hw_trans_mem_support;
- /**< Hardware transactional memory support */
+
struct lcore_cache *local_free_slots;
/**< Local cache per lcore, storing some indexes of the free slots */
- enum add_key_case add_key; /**< Multi-writer hash add behavior */
-
- rte_spinlock_t *multiwriter_lock; /**< Multi-writer spinlock for w/o TM */
/* Fields used in lookup */
uint32_t key_len __rte_cache_aligned;
/**< Length of hash key. */
+ uint8_t hw_trans_mem_support;
+ /**< If hardware transactional memory is used. */
+ uint8_t multi_writer_support;
+ /**< If multi-writer support is enabled. */
+ uint8_t readwrite_concur_support;
+ /**< If read-write concurrency support is enabled */
rte_hash_function hash_func; /**< Function used to calculate hash. */
uint32_t hash_func_init_val; /**< Init value used by hash_func. */
rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
@@ -184,6 +185,7 @@ struct rte_hash {
/**< Table with buckets storing all the hash values and key indexes
* to the key table.
*/
+ rte_rwlock_t *readwrite_lock; /**< Read-write lock thread-safety. */
} __rte_cache_aligned;
struct queue_node {
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
deleted file mode 100644
index 2c5b017e..00000000
--- a/lib/librte_hash/rte_cuckoo_hash_x86.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2016 Intel Corporation
- */
-
-/* rte_cuckoo_hash_x86.h
- * This file holds all x86 specific Cuckoo Hash functions
- */
-
-/* Only tries to insert at one bucket (@prim_bkt) without trying to push
- * buckets around
- */
-static inline unsigned
-rte_hash_cuckoo_insert_mw_tm(struct rte_hash_bucket *prim_bkt,
- hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
-{
- unsigned i, status;
- unsigned try = 0;
-
- while (try < RTE_HASH_TSX_MAX_RETRY) {
- status = rte_xbegin();
- if (likely(status == RTE_XBEGIN_STARTED)) {
- /* Insert new entry if there is room in the primary
- * bucket.
- */
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- /* Check if slot is available */
- if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
- prim_bkt->sig_current[i] = sig;
- prim_bkt->sig_alt[i] = alt_hash;
- prim_bkt->key_idx[i] = new_idx;
- break;
- }
- }
- rte_xend();
-
- if (i != RTE_HASH_BUCKET_ENTRIES)
- return 0;
-
- break; /* break off try loop if transaction commits */
- } else {
- /* If we abort we give up this cuckoo path. */
- try++;
- rte_pause();
- }
- }
-
- return -1;
-}
-
-/* Shift buckets along provided cuckoo_path (@leaf and @leaf_slot) and fill
- * the path head with new entry (sig, alt_hash, new_idx)
- */
-static inline int
-rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
- struct queue_node *leaf, uint32_t leaf_slot,
- hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx)
-{
- unsigned try = 0;
- unsigned status;
- uint32_t prev_alt_bkt_idx;
-
- struct queue_node *prev_node, *curr_node = leaf;
- struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
- uint32_t prev_slot, curr_slot = leaf_slot;
-
- while (try < RTE_HASH_TSX_MAX_RETRY) {
- status = rte_xbegin();
- if (likely(status == RTE_XBEGIN_STARTED)) {
- while (likely(curr_node->prev != NULL)) {
- prev_node = curr_node->prev;
- prev_bkt = prev_node->bkt;
- prev_slot = curr_node->prev_slot;
-
- prev_alt_bkt_idx
- = prev_bkt->sig_alt[prev_slot]
- & h->bucket_bitmask;
-
- if (unlikely(&h->buckets[prev_alt_bkt_idx]
- != curr_bkt)) {
- rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
- }
-
- /* Need to swap current/alt sig to allow later
- * Cuckoo insert to move elements back to its
- * primary bucket if available
- */
- curr_bkt->sig_alt[curr_slot] =
- prev_bkt->sig_current[prev_slot];
- curr_bkt->sig_current[curr_slot] =
- prev_bkt->sig_alt[prev_slot];
- curr_bkt->key_idx[curr_slot]
- = prev_bkt->key_idx[prev_slot];
-
- curr_slot = prev_slot;
- curr_node = prev_node;
- curr_bkt = curr_node->bkt;
- }
-
- curr_bkt->sig_current[curr_slot] = sig;
- curr_bkt->sig_alt[curr_slot] = alt_hash;
- curr_bkt->key_idx[curr_slot] = new_idx;
-
- rte_xend();
-
- return 0;
- }
-
- /* If we abort we give up this cuckoo path, since most likely it's
- * no longer valid as TSX detected data conflict
- */
- try++;
- rte_pause();
- }
-
- return -1;
-}
-
-/*
- * Make space for new key, using bfs Cuckoo Search and Multi-Writer safe
- * Cuckoo
- */
-static inline int
-rte_hash_cuckoo_make_space_mw_tm(const struct rte_hash *h,
- struct rte_hash_bucket *bkt,
- hash_sig_t sig, hash_sig_t alt_hash,
- uint32_t new_idx)
-{
- unsigned i;
- struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
- struct queue_node *tail, *head;
- struct rte_hash_bucket *curr_bkt, *alt_bkt;
-
- tail = queue;
- head = queue + 1;
- tail->bkt = bkt;
- tail->prev = NULL;
- tail->prev_slot = -1;
-
- /* Cuckoo bfs Search */
- while (likely(tail != head && head <
- queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
- RTE_HASH_BUCKET_ENTRIES)) {
- curr_bkt = tail->bkt;
- for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
- if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
- if (likely(rte_hash_cuckoo_move_insert_mw_tm(h,
- tail, i, sig,
- alt_hash, new_idx) == 0))
- return 0;
- }
-
- /* Enqueue new node and keep prev node info */
- alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
- & h->bucket_bitmask]);
- head->bkt = alt_bkt;
- head->prev = tail;
- head->prev_slot = i;
- head++;
- }
- tail++;
- }
-
- return -ENOSPC;
-}
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index f71ca9fb..9e7d9315 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -34,6 +34,9 @@ extern "C" {
/** Default behavior of insertion, single writer/multi writer */
#define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
+/** Flag to support reader writer concurrency */
+#define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
+
/** Signature of key that is stored internally. */
typedef uint32_t hash_sig_t;
@@ -124,9 +127,22 @@ void
rte_hash_reset(struct rte_hash *h);
/**
+ * Return the number of keys in the hash table
+ * @param h
+ * Hash table to query from
+ * @return
+ * - -EINVAL if parameters are invalid
+ * - A value indicating how many keys were inserted in the table.
+ */
+int32_t
+rte_hash_count(const struct rte_hash *h);
+
+/**
* Add a key-value pair to an existing hash table.
* This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to add the key to.
@@ -146,7 +162,9 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data);
* Add a key-value pair with a pre-computed hash value
* to an existing hash table.
* This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to add the key to.
@@ -167,7 +185,9 @@ rte_hash_add_key_with_hash_data(const struct rte_hash *h, const void *key,
/**
* Add a key to an existing hash table. This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to add the key to.
@@ -185,7 +205,9 @@ rte_hash_add_key(const struct rte_hash *h, const void *key);
/**
* Add a key to an existing hash table.
* This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to add the key to.
@@ -205,7 +227,9 @@ rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
/**
* Remove a key from an existing hash table.
* This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to remove the key from.
@@ -224,7 +248,9 @@ rte_hash_del_key(const struct rte_hash *h, const void *key);
/**
* Remove a key from an existing hash table.
* This operation is not multi-thread safe
- * and should only be called from one thread.
+ * and should only be called from one thread by default.
+ * Thread safety can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to remove the key from.
@@ -244,7 +270,9 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
/**
* Find a key in the hash table given the position.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to get the key from.
@@ -254,8 +282,8 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
* Output containing a pointer to the key
* @return
* - 0 if retrieved successfully
- * - EINVAL if the parameters are invalid.
- * - ENOENT if no valid key is found in the given position.
+ * - -EINVAL if the parameters are invalid.
+ * - -ENOENT if no valid key is found in the given position.
*/
int
rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
@@ -263,7 +291,9 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
/**
* Find a key-value pair in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to look in.
@@ -272,9 +302,11 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
* @param data
* Output with pointer to data returned from the hash table.
* @return
- * 0 if successful lookup
- * - EINVAL if the parameters are invalid.
- * - ENOENT if the key is not found.
+ * - A positive value that can be used by the caller as an offset into an
+ * array of user data. This value is unique for this key, and is the same
+ * value that was returned when the key was added.
+ * - -EINVAL if the parameters are invalid.
+ * - -ENOENT if the key is not found.
*/
int
rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
@@ -282,7 +314,9 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
/**
* Find a key-value pair with a pre-computed hash value
* to an existing hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to look in.
@@ -293,9 +327,11 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
* @param data
* Output with pointer to data returned from the hash table.
* @return
- * 0 if successful lookup
- * - EINVAL if the parameters are invalid.
- * - ENOENT if the key is not found.
+ * - A positive value that can be used by the caller as an offset into an
+ * array of user data. This value is unique for this key, and is the same
+ * value that was returned when the key was added.
+ * - -EINVAL if the parameters are invalid.
+ * - -ENOENT if the key is not found.
*/
int
rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key,
@@ -303,7 +339,9 @@ rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key,
/**
* Find a key in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to look in.
@@ -321,7 +359,9 @@ rte_hash_lookup(const struct rte_hash *h, const void *key);
/**
* Find a key in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to look in.
@@ -356,7 +396,9 @@ rte_hash_hash(const struct rte_hash *h, const void *key);
/**
* Find multiple keys in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to look in.
@@ -377,7 +419,9 @@ rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
/**
* Find multiple keys in the hash table.
- * This operation is multi-thread safe.
+ * This operation is multi-thread safe with regarding to other lookup threads.
+ * Read-write concurrency can be enabled by setting flag during
+ * table creation.
*
* @param h
* Hash table to look in.
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index 52a2576f..e216ac8e 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -45,3 +45,11 @@ DPDK_16.07 {
rte_hash_get_key_with_position;
} DPDK_2.2;
+
+
+DPDK_18.08 {
+ global:
+
+ rte_hash_count;
+
+} DPDK_16.07;
diff --git a/lib/librte_kni/meson.build b/lib/librte_kni/meson.build
index c4b21961..a738a033 100644
--- a/lib/librte_kni/meson.build
+++ b/lib/librte_kni/meson.build
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation
-if host_machine.system() != 'linux'
+if host_machine.system() != 'linux' or cc.sizeof('void *') == 4
build = false
endif
version = 2
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 8a8f6c1c..65f6a2b0 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -715,6 +715,9 @@ rte_kni_get(const char *name)
struct rte_kni_memzone_slot *it;
struct rte_kni *kni;
+ if (name == NULL || name[0] == '\0')
+ return NULL;
+
/* Note: could be improved perf-wise if necessary */
for (i = 0; i < kni_memzone_pool.max_ifaces; i++) {
it = &kni_memzone_pool.slots[i];
diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile
index 39d5ac33..87593954 100644
--- a/lib/librte_kvargs/Makefile
+++ b/lib/librte_kvargs/Makefile
@@ -7,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_kvargs.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-LDLIBS += -lrte_eal
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
EXPORT_MAP := rte_kvargs_version.map
diff --git a/lib/librte_kvargs/meson.build b/lib/librte_kvargs/meson.build
index 0c5b9cb2..acd3e543 100644
--- a/lib/librte_kvargs/meson.build
+++ b/lib/librte_kvargs/meson.build
@@ -1,6 +1,11 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2017 Intel Corporation
+includes = [global_inc]
+includes += include_directories('../librte_eal/common/include')
+
version = 1
sources = files('rte_kvargs.c')
headers = files('rte_kvargs.h')
+
+deps += 'compat'
diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c
index d92a5f9d..a28f7694 100644
--- a/lib/librte_kvargs/rte_kvargs.c
+++ b/lib/librte_kvargs/rte_kvargs.c
@@ -6,7 +6,6 @@
#include <string.h>
#include <stdlib.h>
-#include <rte_log.h>
#include <rte_string_fns.h>
#include "rte_kvargs.h"
@@ -28,29 +27,22 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params)
* to pass to rte_strsplit
*/
kvlist->str = strdup(params);
- if (kvlist->str == NULL) {
- RTE_LOG(ERR, PMD, "Cannot parse arguments: not enough memory\n");
+ if (kvlist->str == NULL)
return -1;
- }
/* browse each key/value pair and add it in kvlist */
str = kvlist->str;
while ((str = strtok_r(str, RTE_KVARGS_PAIRS_DELIM, &ctx1)) != NULL) {
i = kvlist->count;
- if (i >= RTE_KVARGS_MAX) {
- RTE_LOG(ERR, PMD, "Cannot parse arguments: list full\n");
+ if (i >= RTE_KVARGS_MAX)
return -1;
- }
kvlist->pairs[i].key = strtok_r(str, RTE_KVARGS_KV_DELIM, &ctx2);
kvlist->pairs[i].value = strtok_r(NULL, RTE_KVARGS_KV_DELIM, &ctx2);
- if (kvlist->pairs[i].key == NULL || kvlist->pairs[i].value == NULL) {
- RTE_LOG(ERR, PMD,
- "Cannot parse arguments: wrong key or value\n"
- "params=<%s>\n", params);
+ if (kvlist->pairs[i].key == NULL ||
+ kvlist->pairs[i].value == NULL)
return -1;
- }
kvlist->count++;
str = NULL;
@@ -89,12 +81,8 @@ check_for_valid_keys(struct rte_kvargs *kvlist,
for (i = 0; i < kvlist->count; i++) {
pair = &kvlist->pairs[i];
ret = is_valid_key(valid, pair->key);
- if (!ret) {
- RTE_LOG(ERR, PMD,
- "Error parsing device, invalid key <%s>\n",
- pair->key);
+ if (!ret)
return -1;
- }
}
return 0;
}
@@ -180,3 +168,38 @@ rte_kvargs_parse(const char *args, const char * const valid_keys[])
return kvlist;
}
+
+__rte_experimental
+struct rte_kvargs *
+rte_kvargs_parse_delim(const char *args, const char * const valid_keys[],
+ const char *valid_ends)
+{
+ struct rte_kvargs *kvlist = NULL;
+ char *copy;
+ size_t len;
+
+ if (valid_ends == NULL)
+ return rte_kvargs_parse(args, valid_keys);
+
+ copy = strdup(args);
+ if (copy == NULL)
+ return NULL;
+
+ len = strcspn(copy, valid_ends);
+ copy[len] = '\0';
+
+ kvlist = rte_kvargs_parse(copy, valid_keys);
+
+ free(copy);
+ return kvlist;
+}
+
+__rte_experimental
+int
+rte_kvargs_strcmp(const char *key __rte_unused,
+ const char *value, void *opaque)
+{
+ const char *str = opaque;
+
+ return -abs(strcmp(str, value));
+}
diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h
index 51b8120b..fc041956 100644
--- a/lib/librte_kvargs/rte_kvargs.h
+++ b/lib/librte_kvargs/rte_kvargs.h
@@ -25,6 +25,8 @@
extern "C" {
#endif
+#include <rte_compat.h>
+
/** Maximum number of key/value associations */
#define RTE_KVARGS_MAX 32
@@ -72,6 +74,36 @@ struct rte_kvargs *rte_kvargs_parse(const char *args,
const char *const valid_keys[]);
/**
+ * Allocate a rte_kvargs and store key/value associations from a string.
+ * This version will consider any byte from valid_ends as a possible
+ * terminating character, and will not parse beyond any of their occurrence.
+ *
+ * The function allocates and fills an rte_kvargs structure from a given
+ * string whose format is key1=value1,key2=value2,...
+ *
+ * The structure can be freed with rte_kvargs_free().
+ *
+ * @param args
+ * The input string containing the key/value associations
+ *
+ * @param valid_keys
+ * A list of valid keys (table of const char *, the last must be NULL).
+ * This argument is ignored if NULL
+ *
+ * @param valid_ends
+ * Acceptable terminating characters.
+ * If NULL, the behavior is the same as ``rte_kvargs_parse``.
+ *
+ * @return
+ * - A pointer to an allocated rte_kvargs structure on success
+ * - NULL on error
+ */
+__rte_experimental
+struct rte_kvargs *rte_kvargs_parse_delim(const char *args,
+ const char *const valid_keys[],
+ const char *valid_ends);
+
+/**
* Free a rte_kvargs structure
*
* Free a rte_kvargs structure previously allocated with
@@ -121,6 +153,32 @@ int rte_kvargs_process(const struct rte_kvargs *kvlist,
unsigned rte_kvargs_count(const struct rte_kvargs *kvlist,
const char *key_match);
+/**
+ * Generic kvarg handler for string comparison.
+ *
+ * This function can be used for a generic string comparison processing
+ * on a list of kvargs.
+ *
+ * @param key
+ * kvarg pair key.
+ *
+ * @param value
+ * kvarg pair value.
+ *
+ * @param opaque
+ * Opaque pointer to a string.
+ *
+ * @return
+ * 0 if the strings match.
+ * !0 otherwise or on error.
+ *
+ * Unless strcmp, comparison ordering is not kept.
+ * In order for rte_kvargs_process to stop processing on match error,
+ * a negative value is returned even if strcmp had returned a positive one.
+ */
+__rte_experimental
+int rte_kvargs_strcmp(const char *key, const char *value, void *opaque);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_kvargs/rte_kvargs_version.map b/lib/librte_kvargs/rte_kvargs_version.map
index 2030ec46..8f4b4e3f 100644
--- a/lib/librte_kvargs/rte_kvargs_version.map
+++ b/lib/librte_kvargs/rte_kvargs_version.map
@@ -8,3 +8,11 @@ DPDK_2.0 {
local: *;
};
+
+EXPERIMENTAL {
+ global:
+
+ rte_kvargs_parse_delim;
+ rte_kvargs_strcmp;
+
+} DPDK_2.0;
diff --git a/lib/librte_latencystats/rte_latencystats.c b/lib/librte_latencystats/rte_latencystats.c
index 46c69bf0..1fdec68e 100644
--- a/lib/librte_latencystats/rte_latencystats.c
+++ b/lib/librte_latencystats/rte_latencystats.c
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2017 Intel Corporation
+ * Copyright(c) 2018 Intel Corporation
*/
#include <unistd.h>
@@ -265,6 +265,7 @@ rte_latencystats_uninit(void)
uint16_t qid;
int ret = 0;
struct rxtx_cbs *cbs = NULL;
+ const struct rte_memzone *mz = NULL;
/** De register Rx/Tx callbacks */
RTE_ETH_FOREACH_DEV(pid) {
@@ -288,6 +289,11 @@ rte_latencystats_uninit(void)
}
}
+ /* free up the memzone */
+ mz = rte_memzone_lookup(MZ_RTE_LATENCY_STATS);
+ if (mz)
+ rte_memzone_free(mz);
+
return 0;
}
diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile
index 8749a00f..e2b98a25 100644
--- a/lib/librte_mbuf/Makefile
+++ b/lib/librte_mbuf/Makefile
@@ -6,7 +6,6 @@ include $(RTE_SDK)/mk/rte.vars.mk
# library name
LIB = librte_mbuf.a
-CFLAGS += -DALLOW_EXPERIMENTAL_API
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
LDLIBS += -lrte_eal -lrte_mempool
diff --git a/lib/librte_mbuf/meson.build b/lib/librte_mbuf/meson.build
index 869c17c1..45ffb0db 100644
--- a/lib/librte_mbuf/meson.build
+++ b/lib/librte_mbuf/meson.build
@@ -2,7 +2,6 @@
# Copyright(c) 2017 Intel Corporation
version = 3
-allow_experimental_apis = true
sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
deps += ['mempool']
diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index fca580ef..e714c5a5 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -107,7 +107,7 @@ rte_pktmbuf_init(struct rte_mempool *mp,
}
/* Helper to create a mbuf pool with given mempool ops name*/
-struct rte_mempool * __rte_experimental
+struct rte_mempool *
rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
int socket_id, const char *ops_name)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 8e6b4d29..9ce5d76d 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -729,6 +729,24 @@ rte_mbuf_to_baddr(struct rte_mbuf *md)
}
/**
+ * Return the starting address of the private data area embedded in
+ * the given mbuf.
+ *
+ * Note that no check is made to ensure that a private data area
+ * actually exists in the supplied mbuf.
+ *
+ * @param m
+ * The pointer to the mbuf.
+ * @return
+ * The starting address of the private data area of the given mbuf.
+ */
+static inline void * __rte_experimental
+rte_mbuf_to_priv(struct rte_mbuf *m)
+{
+ return RTE_PTR_ADD(m, sizeof(struct rte_mbuf));
+}
+
+/**
* Returns TRUE if given mbuf is cloned by mbuf indirection, or FALSE
* otherwise.
*
@@ -1146,7 +1164,7 @@ rte_pktmbuf_pool_create(const char *name, unsigned n,
* - EEXIST - a memzone with the same name already exists
* - ENOMEM - no appropriate memory area found in which to create memzone
*/
-struct rte_mempool * __rte_experimental
+struct rte_mempool *
rte_pktmbuf_pool_create_by_ops(const char *name, unsigned int n,
unsigned int cache_size, uint16_t priv_size, uint16_t data_room_size,
int socket_id, const char *ops_name);
diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.c b/lib/librte_mbuf/rte_mbuf_pool_ops.c
index a1d4699f..5722976f 100644
--- a/lib/librte_mbuf/rte_mbuf_pool_ops.c
+++ b/lib/librte_mbuf/rte_mbuf_pool_ops.c
@@ -9,7 +9,7 @@
#include <rte_errno.h>
#include <rte_mbuf_pool_ops.h>
-int __rte_experimental
+int
rte_mbuf_set_platform_mempool_ops(const char *ops_name)
{
const struct rte_memzone *mz;
@@ -35,7 +35,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name)
return -EEXIST;
}
-const char * __rte_experimental
+const char *
rte_mbuf_platform_mempool_ops(void)
{
const struct rte_memzone *mz;
@@ -46,7 +46,7 @@ rte_mbuf_platform_mempool_ops(void)
return mz->addr;
}
-int __rte_experimental
+int
rte_mbuf_set_user_mempool_ops(const char *ops_name)
{
const struct rte_memzone *mz;
@@ -67,7 +67,7 @@ rte_mbuf_set_user_mempool_ops(const char *ops_name)
}
-const char * __rte_experimental
+const char *
rte_mbuf_user_mempool_ops(void)
{
const struct rte_memzone *mz;
@@ -79,7 +79,7 @@ rte_mbuf_user_mempool_ops(void)
}
/* Return mbuf pool ops name */
-const char * __rte_experimental
+const char *
rte_mbuf_best_mempool_ops(void)
{
/* User defined mempool ops takes the priority */
diff --git a/lib/librte_mbuf/rte_mbuf_pool_ops.h b/lib/librte_mbuf/rte_mbuf_pool_ops.h
index ebf5bf0f..7ed95a49 100644
--- a/lib/librte_mbuf/rte_mbuf_pool_ops.h
+++ b/lib/librte_mbuf/rte_mbuf_pool_ops.h
@@ -12,9 +12,6 @@
* These APIs are for configuring the mbuf pool ops names to be largely used by
* rte_pktmbuf_pool_create(). However, this can also be used to set and inquire
* the best mempool ops available.
- *
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
*/
#include <rte_compat.h>
@@ -34,7 +31,7 @@ extern "C" {
* - On success, zero.
* - On failure, a negative value.
*/
-int __rte_experimental
+int
rte_mbuf_set_platform_mempool_ops(const char *ops_name);
/**
@@ -46,7 +43,7 @@ rte_mbuf_set_platform_mempool_ops(const char *ops_name);
* - On success, platform pool ops name.
* - On failure, NULL.
*/
-const char * __rte_experimental
+const char *
rte_mbuf_platform_mempool_ops(void);
/**
@@ -60,7 +57,7 @@ rte_mbuf_platform_mempool_ops(void);
* - On success, zero.
* - On failure, a negative value.
*/
-int __rte_experimental
+int
rte_mbuf_set_user_mempool_ops(const char *ops_name);
/**
@@ -72,7 +69,7 @@ rte_mbuf_set_user_mempool_ops(const char *ops_name);
* - On success, user pool ops name..
* - On failure, NULL.
*/
-const char * __rte_experimental
+const char *
rte_mbuf_user_mempool_ops(void);
/**
@@ -87,7 +84,7 @@ rte_mbuf_user_mempool_ops(void);
* @return
* returns preferred mbuf pool ops name
*/
-const char * __rte_experimental
+const char *
rte_mbuf_best_mempool_ops(void);
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index 79ea3142..01acc66e 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -653,9 +653,9 @@ extern "C" {
#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4)
/**
- * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by
- * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can
- * determine if it is an IPV4 packet.
+ * Check if the (outer) L3 header is IPv6. To avoid comparing IPv6 types one by
+ * one, bit 6 is selected to be used for IPv6 only. Then checking bit 6 can
+ * determine if it is an IPV6 packet.
*/
#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6)
diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map
index 1bb9538d..cae68db8 100644
--- a/lib/librte_mbuf/rte_mbuf_version.map
+++ b/lib/librte_mbuf/rte_mbuf_version.map
@@ -35,7 +35,7 @@ DPDK_16.11 {
} DPDK_2.1;
-EXPERIMENTAL {
+DPDK_18.08 {
global:
rte_mbuf_best_mempool_ops;
@@ -44,4 +44,4 @@ EXPERIMENTAL {
rte_mbuf_set_user_mempool_ops;
rte_mbuf_user_mempool_ops;
rte_pktmbuf_pool_create_by_ops;
-};
+} DPDK_16.11;
diff --git a/lib/librte_member/rte_member.c b/lib/librte_member/rte_member.c
index e147dd1f..702c01d3 100644
--- a/lib/librte_member/rte_member.c
+++ b/lib/librte_member/rte_member.c
@@ -297,10 +297,7 @@ rte_member_reset(const struct rte_member_setsum *setsum)
}
}
-RTE_INIT(librte_member_init_log);
-
-static void
-librte_member_init_log(void)
+RTE_INIT(librte_member_init_log)
{
librte_member_logtype = rte_log_register("lib.member");
if (librte_member_logtype >= 0)
diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile
index e3c32b14..20bf63fb 100644
--- a/lib/librte_mempool/Makefile
+++ b/lib/librte_mempool/Makefile
@@ -7,15 +7,12 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_mempool.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-# Allow deprecated symbol to use deprecated rte_mempool_populate_iova_tab()
-# from earlier deprecated rte_mempool_populate_phys_tab()
-CFLAGS += -Wno-deprecated-declarations
CFLAGS += -DALLOW_EXPERIMENTAL_API
LDLIBS += -lrte_eal -lrte_ring
EXPORT_MAP := rte_mempool_version.map
-LIBABIVER := 4
+LIBABIVER := 5
# memseg walk is not yet part of stable API
CFLAGS += -DALLOW_EXPERIMENTAL_API
diff --git a/lib/librte_mempool/meson.build b/lib/librte_mempool/meson.build
index d507e551..38d7ae89 100644
--- a/lib/librte_mempool/meson.build
+++ b/lib/librte_mempool/meson.build
@@ -5,17 +5,13 @@ allow_experimental_apis = true
extra_flags = []
-# Allow deprecated symbol to use deprecated rte_mempool_populate_iova_tab()
-# from earlier deprecated rte_mempool_populate_phys_tab()
-extra_flags += '-Wno-deprecated-declarations'
-
foreach flag: extra_flags
if cc.has_argument(flag)
cflags += flag
endif
endforeach
-version = 4
+version = 5
sources = files('rte_mempool.c', 'rte_mempool_ops.c',
'rte_mempool_ops_default.c')
headers = files('rte_mempool.h')
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 8c8b9f80..03e6b5f7 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -225,93 +225,6 @@ rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
return sz->total_size;
}
-
-/*
- * Internal function to calculate required memory chunk size shared
- * by default implementation of the corresponding callback and
- * deprecated external function.
- */
-size_t
-rte_mempool_calc_mem_size_helper(uint32_t elt_num, size_t total_elt_sz,
- uint32_t pg_shift)
-{
- size_t obj_per_page, pg_num, pg_sz;
-
- if (total_elt_sz == 0)
- return 0;
-
- if (pg_shift == 0)
- return total_elt_sz * elt_num;
-
- pg_sz = (size_t)1 << pg_shift;
- obj_per_page = pg_sz / total_elt_sz;
- if (obj_per_page == 0)
- return RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * elt_num;
-
- pg_num = (elt_num + obj_per_page - 1) / obj_per_page;
- return pg_num << pg_shift;
-}
-
-/*
- * Calculate maximum amount of memory required to store given number of objects.
- */
-size_t
-rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz, uint32_t pg_shift,
- __rte_unused unsigned int flags)
-{
- return rte_mempool_calc_mem_size_helper(elt_num, total_elt_sz,
- pg_shift);
-}
-
-/*
- * Calculate how much memory would be actually required with the
- * given memory footprint to store required number of elements.
- */
-ssize_t
-rte_mempool_xmem_usage(__rte_unused void *vaddr, uint32_t elt_num,
- size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
- uint32_t pg_shift, __rte_unused unsigned int flags)
-{
- uint32_t elt_cnt = 0;
- rte_iova_t start, end;
- uint32_t iova_idx;
- size_t pg_sz = (size_t)1 << pg_shift;
-
- /* if iova is NULL, assume contiguous memory */
- if (iova == NULL) {
- start = 0;
- end = pg_sz * pg_num;
- iova_idx = pg_num;
- } else {
- start = iova[0];
- end = iova[0] + pg_sz;
- iova_idx = 1;
- }
- while (elt_cnt < elt_num) {
-
- if (end - start >= total_elt_sz) {
- /* enough contiguous memory, add an object */
- start += total_elt_sz;
- elt_cnt++;
- } else if (iova_idx < pg_num) {
- /* no room to store one obj, add a page */
- if (end == iova[iova_idx]) {
- end += pg_sz;
- } else {
- start = iova[iova_idx];
- end = iova[iova_idx] + pg_sz;
- }
- iova_idx++;
-
- } else {
- /* no more page, return how many elements fit */
- return -(size_t)elt_cnt;
- }
- }
-
- return (size_t)iova_idx << pg_shift;
-}
-
/* free a memchunk allocated with rte_memzone_reserve() */
static void
rte_mempool_memchunk_mz_free(__rte_unused struct rte_mempool_memhdr *memhdr,
@@ -423,63 +336,6 @@ fail:
return ret;
}
-int
-rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
- phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
- void *opaque)
-{
- return rte_mempool_populate_iova(mp, vaddr, paddr, len, free_cb, opaque);
-}
-
-/* Add objects in the pool, using a table of physical pages. Return the
- * number of objects added, or a negative value on error.
- */
-int
-rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
- const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift,
- rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
-{
- uint32_t i, n;
- int ret, cnt = 0;
- size_t pg_sz = (size_t)1 << pg_shift;
-
- /* mempool must not be populated */
- if (mp->nb_mem_chunks != 0)
- return -EEXIST;
-
- if (mp->flags & MEMPOOL_F_NO_IOVA_CONTIG)
- return rte_mempool_populate_iova(mp, vaddr, RTE_BAD_IOVA,
- pg_num * pg_sz, free_cb, opaque);
-
- for (i = 0; i < pg_num && mp->populated_size < mp->size; i += n) {
-
- /* populate with the largest group of contiguous pages */
- for (n = 1; (i + n) < pg_num &&
- iova[i + n - 1] + pg_sz == iova[i + n]; n++)
- ;
-
- ret = rte_mempool_populate_iova(mp, vaddr + i * pg_sz,
- iova[i], n * pg_sz, free_cb, opaque);
- if (ret < 0) {
- rte_mempool_free_memchunks(mp);
- return ret;
- }
- /* no need to call the free callback for next chunks */
- free_cb = NULL;
- cnt += ret;
- }
- return cnt;
-}
-
-int
-rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
- const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
- rte_mempool_memchunk_free_cb_t *free_cb, void *opaque)
-{
- return rte_mempool_populate_iova_tab(mp, vaddr, paddr, pg_num, pg_shift,
- free_cb, opaque);
-}
-
/* Populate the mempool with a virtual area. Return the number of
* objects added, or a negative value on error.
*/
@@ -916,6 +772,12 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
+ /* asked for zero items */
+ if (n == 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
/* asked cache too big */
if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE ||
CALC_CACHE_FLUSHTHRESH(cache_size) > n) {
@@ -1065,66 +927,6 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
return NULL;
}
-/*
- * Create the mempool over already allocated chunk of memory.
- * That external memory buffer can consists of physically disjoint pages.
- * Setting vaddr to NULL, makes mempool to fallback to rte_mempool_create()
- * behavior.
- */
-struct rte_mempool *
-rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
- unsigned cache_size, unsigned private_data_size,
- rte_mempool_ctor_t *mp_init, void *mp_init_arg,
- rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
- int socket_id, unsigned flags, void *vaddr,
- const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift)
-{
- struct rte_mempool *mp = NULL;
- int ret;
-
- /* no virtual address supplied, use rte_mempool_create() */
- if (vaddr == NULL)
- return rte_mempool_create(name, n, elt_size, cache_size,
- private_data_size, mp_init, mp_init_arg,
- obj_init, obj_init_arg, socket_id, flags);
-
- /* check that we have both VA and PA */
- if (iova == NULL) {
- rte_errno = EINVAL;
- return NULL;
- }
-
- /* Check that pg_shift parameter is valid. */
- if (pg_shift > MEMPOOL_PG_SHIFT_MAX) {
- rte_errno = EINVAL;
- return NULL;
- }
-
- mp = rte_mempool_create_empty(name, n, elt_size, cache_size,
- private_data_size, socket_id, flags);
- if (mp == NULL)
- return NULL;
-
- /* call the mempool priv initializer */
- if (mp_init)
- mp_init(mp, mp_init_arg);
-
- ret = rte_mempool_populate_iova_tab(mp, vaddr, iova, pg_num, pg_shift,
- NULL, NULL);
- if (ret < 0 || ret != (int)mp->size)
- goto fail;
-
- /* call the object initializers */
- if (obj_init)
- rte_mempool_obj_iter(mp, obj_init, obj_init_arg);
-
- return mp;
-
- fail:
- rte_mempool_free(mp);
- return NULL;
-}
-
/* Return the number of entries in the mempool */
unsigned int
rte_mempool_avail_count(const struct rte_mempool *mp)
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 1f59553b..7c9cd9a2 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -488,28 +488,6 @@ ssize_t rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
size_t *min_chunk_size, size_t *align);
/**
- * @internal Helper function to calculate memory size required to store
- * specified number of objects in assumption that the memory buffer will
- * be aligned at page boundary.
- *
- * Note that if object size is bigger than page size, then it assumes
- * that pages are grouped in subsets of physically continuous pages big
- * enough to store at least one object.
- *
- * @param elt_num
- * Number of elements.
- * @param total_elt_sz
- * The size of each element, including header and trailer, as returned
- * by rte_mempool_calc_obj_size().
- * @param pg_shift
- * LOG2 of the physical pages size. If set to 0, ignore page boundaries.
- * @return
- * Required memory size aligned at page boundary.
- */
-size_t rte_mempool_calc_mem_size_helper(uint32_t elt_num, size_t total_elt_sz,
- uint32_t pg_shift);
-
-/**
* Function to be called for each populated object.
*
* @param[in] mp
@@ -974,74 +952,6 @@ rte_mempool_create(const char *name, unsigned n, unsigned elt_size,
int socket_id, unsigned flags);
/**
- * @deprecated
- * Create a new mempool named *name* in memory.
- *
- * The pool contains n elements of elt_size. Its size is set to n.
- * This function uses ``memzone_reserve()`` to allocate the mempool header
- * (and the objects if vaddr is NULL).
- * Depending on the input parameters, mempool elements can be either allocated
- * together with the mempool header, or an externally provided memory buffer
- * could be used to store mempool objects. In later case, that external
- * memory buffer can consist of set of disjoint physical pages.
- *
- * @param name
- * The name of the mempool.
- * @param n
- * The number of elements in the mempool. The optimum size (in terms of
- * memory usage) for a mempool is when n is a power of two minus one:
- * n = (2^q - 1).
- * @param elt_size
- * The size of each element.
- * @param cache_size
- * Size of the cache. See rte_mempool_create() for details.
- * @param private_data_size
- * The size of the private data appended after the mempool
- * structure. This is useful for storing some private data after the
- * mempool structure, as is done for rte_mbuf_pool for example.
- * @param mp_init
- * A function pointer that is called for initialization of the pool,
- * before object initialization. The user can initialize the private
- * data in this function if needed. This parameter can be NULL if
- * not needed.
- * @param mp_init_arg
- * An opaque pointer to data that can be used in the mempool
- * constructor function.
- * @param obj_init
- * A function called for each object at initialization of the pool.
- * See rte_mempool_create() for details.
- * @param obj_init_arg
- * An opaque pointer passed to the object constructor function.
- * @param socket_id
- * The *socket_id* argument is the socket identifier in the case of
- * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA
- * constraint for the reserved zone.
- * @param flags
- * Flags controlling the behavior of the mempool. See
- * rte_mempool_create() for details.
- * @param vaddr
- * Virtual address of the externally allocated memory buffer.
- * Will be used to store mempool objects.
- * @param iova
- * Array of IO addresses of the pages that comprises given memory buffer.
- * @param pg_num
- * Number of elements in the iova array.
- * @param pg_shift
- * LOG2 of the physical pages size.
- * @return
- * The pointer to the new allocated mempool, on success. NULL on error
- * with rte_errno set appropriately. See rte_mempool_create() for details.
- */
-__rte_deprecated
-struct rte_mempool *
-rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
- unsigned cache_size, unsigned private_data_size,
- rte_mempool_ctor_t *mp_init, void *mp_init_arg,
- rte_mempool_obj_cb_t *obj_init, void *obj_init_arg,
- int socket_id, unsigned flags, void *vaddr,
- const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift);
-
-/**
* Create an empty mempool
*
* The mempool is allocated and initialized, but it is not populated: no
@@ -1123,48 +1033,6 @@ int rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr,
rte_iova_t iova, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
void *opaque);
-__rte_deprecated
-int rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr,
- phys_addr_t paddr, size_t len, rte_mempool_memchunk_free_cb_t *free_cb,
- void *opaque);
-
-/**
- * @deprecated
- * Add physical memory for objects in the pool at init
- *
- * Add a virtually contiguous memory chunk in the pool where objects can
- * be instantiated. The IO addresses corresponding to the virtual
- * area are described in iova[], pg_num, pg_shift.
- *
- * @param mp
- * A pointer to the mempool structure.
- * @param vaddr
- * The virtual address of memory that should be used to store objects.
- * @param iova
- * An array of IO addresses of each page composing the virtual area.
- * @param pg_num
- * Number of elements in the iova array.
- * @param pg_shift
- * LOG2 of the physical pages size.
- * @param free_cb
- * The callback used to free this chunk when destroying the mempool.
- * @param opaque
- * An opaque argument passed to free_cb.
- * @return
- * The number of objects added on success.
- * On error, the chunks are not added in the memory list of the
- * mempool and a negative errno is returned.
- */
-__rte_deprecated
-int rte_mempool_populate_iova_tab(struct rte_mempool *mp, char *vaddr,
- const rte_iova_t iova[], uint32_t pg_num, uint32_t pg_shift,
- rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
-
-__rte_deprecated
-int rte_mempool_populate_phys_tab(struct rte_mempool *mp, char *vaddr,
- const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift,
- rte_mempool_memchunk_free_cb_t *free_cb, void *opaque);
-
/**
* Add virtually contiguous memory for objects in the pool at init
*
@@ -1746,13 +1614,6 @@ rte_mempool_virt2iova(const void *elt)
return hdr->iova;
}
-__rte_deprecated
-static inline phys_addr_t
-rte_mempool_virt2phy(__rte_unused const struct rte_mempool *mp, const void *elt)
-{
- return rte_mempool_virt2iova(elt);
-}
-
/**
* Check the consistency of mempool objects.
*
@@ -1822,68 +1683,6 @@ uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags,
struct rte_mempool_objsz *sz);
/**
- * @deprecated
- * Get the size of memory required to store mempool elements.
- *
- * Calculate the maximum amount of memory required to store given number
- * of objects. Assume that the memory buffer will be aligned at page
- * boundary.
- *
- * Note that if object size is bigger than page size, then it assumes
- * that pages are grouped in subsets of physically continuous pages big
- * enough to store at least one object.
- *
- * @param elt_num
- * Number of elements.
- * @param total_elt_sz
- * The size of each element, including header and trailer, as returned
- * by rte_mempool_calc_obj_size().
- * @param pg_shift
- * LOG2 of the physical pages size. If set to 0, ignore page boundaries.
- * @param flags
- * The mempool flags.
- * @return
- * Required memory size aligned at page boundary.
- */
-__rte_deprecated
-size_t rte_mempool_xmem_size(uint32_t elt_num, size_t total_elt_sz,
- uint32_t pg_shift, unsigned int flags);
-
-/**
- * @deprecated
- * Get the size of memory required to store mempool elements.
- *
- * Calculate how much memory would be actually required with the given
- * memory footprint to store required number of objects.
- *
- * @param vaddr
- * Virtual address of the externally allocated memory buffer.
- * Will be used to store mempool objects.
- * @param elt_num
- * Number of elements.
- * @param total_elt_sz
- * The size of each element, including header and trailer, as returned
- * by rte_mempool_calc_obj_size().
- * @param iova
- * Array of IO addresses of the pages that comprises given memory buffer.
- * @param pg_num
- * Number of elements in the iova array.
- * @param pg_shift
- * LOG2 of the physical pages size.
- * @param flags
- * The mempool flags.
- * @return
- * On success, the number of bytes needed to store given number of
- * objects, aligned to the given page size. If the provided memory
- * buffer is too small, return a negative value whose absolute value
- * is the actual number of elements that can be stored in that buffer.
- */
-__rte_deprecated
-ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num,
- size_t total_elt_sz, const rte_iova_t iova[], uint32_t pg_num,
- uint32_t pg_shift, unsigned int flags);
-
-/**
* Walk list of all memory pools
*
* @param func
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
index fd63ca13..4e2bfc82 100644
--- a/lib/librte_mempool/rte_mempool_ops_default.c
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -12,12 +12,31 @@ rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
size_t *min_chunk_size, size_t *align)
{
size_t total_elt_sz;
+ size_t obj_per_page, pg_num, pg_sz;
size_t mem_size;
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
-
- mem_size = rte_mempool_calc_mem_size_helper(obj_num, total_elt_sz,
- pg_shift);
+ if (total_elt_sz == 0) {
+ mem_size = 0;
+ } else if (pg_shift == 0) {
+ mem_size = total_elt_sz * obj_num;
+ } else {
+ pg_sz = (size_t)1 << pg_shift;
+ obj_per_page = pg_sz / total_elt_sz;
+ if (obj_per_page == 0) {
+ /*
+ * Note that if object size is bigger than page size,
+ * then it is assumed that pages are grouped in subsets
+ * of physically continuous pages big enough to store
+ * at least one object.
+ */
+ mem_size =
+ RTE_ALIGN_CEIL(total_elt_sz, pg_sz) * obj_num;
+ } else {
+ pg_num = (obj_num + obj_per_page - 1) / obj_per_page;
+ mem_size = pg_num << pg_shift;
+ }
+ }
*min_chunk_size = RTE_MAX((size_t)1 << pg_shift, total_elt_sz);
diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map
index 7091b954..17cbca46 100644
--- a/lib/librte_mempool/rte_mempool_version.map
+++ b/lib/librte_mempool/rte_mempool_version.map
@@ -8,9 +8,6 @@ DPDK_2.0 {
rte_mempool_list_dump;
rte_mempool_lookup;
rte_mempool_walk;
- rte_mempool_xmem_create;
- rte_mempool_xmem_size;
- rte_mempool_xmem_usage;
local: *;
};
@@ -34,8 +31,6 @@ DPDK_16.07 {
rte_mempool_ops_table;
rte_mempool_populate_anon;
rte_mempool_populate_default;
- rte_mempool_populate_phys;
- rte_mempool_populate_phys_tab;
rte_mempool_populate_virt;
rte_mempool_register_ops;
rte_mempool_set_ops_byname;
@@ -46,7 +41,6 @@ DPDK_17.11 {
global:
rte_mempool_populate_iova;
- rte_mempool_populate_iova_tab;
} DPDK_16.07;
diff --git a/lib/librte_meter/rte_meter.c b/lib/librte_meter/rte_meter.c
index 59af5ef2..473f69ab 100644
--- a/lib/librte_meter/rte_meter.c
+++ b/lib/librte_meter/rte_meter.c
@@ -30,7 +30,7 @@ rte_meter_get_tb_params(uint64_t hz, uint64_t rate, uint64_t *tb_period, uint64_
}
}
-int __rte_experimental
+int
rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p,
struct rte_meter_srtcm_params *params)
{
@@ -68,7 +68,7 @@ rte_meter_srtcm_config(struct rte_meter_srtcm *m,
return 0;
}
-int __rte_experimental
+int
rte_meter_trtcm_profile_config(struct rte_meter_trtcm_profile *p,
struct rte_meter_trtcm_params *params)
{
diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h
index 03d80566..58a05158 100644
--- a/lib/librte_meter/rte_meter.h
+++ b/lib/librte_meter/rte_meter.h
@@ -20,7 +20,6 @@ extern "C" {
***/
#include <stdint.h>
-#include <rte_compat.h>
/*
* Application Programmer's Interface (API)
@@ -82,7 +81,7 @@ struct rte_meter_trtcm;
* @return
* 0 upon success, error code otherwise
*/
-int __rte_experimental
+int
rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p,
struct rte_meter_srtcm_params *params);
@@ -96,7 +95,7 @@ rte_meter_srtcm_profile_config(struct rte_meter_srtcm_profile *p,
* @return
* 0 upon success, error code otherwise
*/
-int __rte_experimental
+int
rte_meter_trtcm_profile_config(struct rte_meter_trtcm_profile *p,
struct rte_meter_trtcm_params *params);
diff --git a/lib/librte_meter/rte_meter_version.map b/lib/librte_meter/rte_meter_version.map
index 9215d4cb..cb79f0c2 100644
--- a/lib/librte_meter/rte_meter_version.map
+++ b/lib/librte_meter/rte_meter_version.map
@@ -11,7 +11,7 @@ DPDK_2.0 {
local: *;
};
-EXPERIMENTAL {
+DPDK_18.08 {
global:
rte_meter_srtcm_profile_config;
diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c
index 258f0582..99a96b65 100644
--- a/lib/librte_metrics/rte_metrics.c
+++ b/lib/librte_metrics/rte_metrics.c
@@ -96,6 +96,9 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names)
/* Some sanity checks */
if (cnt_names < 1 || names == NULL)
return -EINVAL;
+ for (idx_name = 0; idx_name < cnt_names; idx_name++)
+ if (names[idx_name] == NULL)
+ return -EINVAL;
memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
if (memzone == NULL)
@@ -159,6 +162,11 @@ rte_metrics_update_values(int port_id,
stats = memzone->addr;
rte_spinlock_lock(&stats->lock);
+
+ if (key >= stats->cnt_stats) {
+ rte_spinlock_unlock(&stats->lock);
+ return -EINVAL;
+ }
idx_metric = key;
cnt_setsize = 1;
while (idx_metric < stats->cnt_stats) {
@@ -200,9 +208,8 @@ rte_metrics_get_names(struct rte_metric_name *names,
int return_value;
memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
- /* If not allocated, fail silently */
if (memzone == NULL)
- return 0;
+ return -EIO;
stats = memzone->addr;
rte_spinlock_lock(&stats->lock);
@@ -238,9 +245,9 @@ rte_metrics_get_values(int port_id,
return -EINVAL;
memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
- /* If not allocated, fail silently */
if (memzone == NULL)
- return 0;
+ return -EIO;
+
stats = memzone->addr;
rte_spinlock_lock(&stats->lock);
diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h
index 72dc2456..f2a8904a 100644
--- a/lib/librte_net/rte_ip.h
+++ b/lib/librte_net/rte_ip.h
@@ -108,25 +108,25 @@ __rte_raw_cksum(const void *buf, size_t len, uint32_t sum)
/* workaround gcc strict-aliasing warning */
uintptr_t ptr = (uintptr_t)buf;
typedef uint16_t __attribute__((__may_alias__)) u16_p;
- const u16_p *u16 = (const u16_p *)ptr;
-
- while (len >= (sizeof(*u16) * 4)) {
- sum += u16[0];
- sum += u16[1];
- sum += u16[2];
- sum += u16[3];
- len -= sizeof(*u16) * 4;
- u16 += 4;
+ const u16_p *u16_buf = (const u16_p *)ptr;
+
+ while (len >= (sizeof(*u16_buf) * 4)) {
+ sum += u16_buf[0];
+ sum += u16_buf[1];
+ sum += u16_buf[2];
+ sum += u16_buf[3];
+ len -= sizeof(*u16_buf) * 4;
+ u16_buf += 4;
}
- while (len >= sizeof(*u16)) {
- sum += *u16;
- len -= sizeof(*u16);
- u16 += 1;
+ while (len >= sizeof(*u16_buf)) {
+ sum += *u16_buf;
+ len -= sizeof(*u16_buf);
+ u16_buf += 1;
}
/* if length is in odd bytes */
if (len == 1)
- sum += *((const uint8_t *)u16);
+ sum += *((const uint8_t *)u16_buf);
return sum;
}
diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h
index 5e8b4ab5..ee638eef 100644
--- a/lib/librte_power/channel_commands.h
+++ b/lib/librte_power/channel_commands.h
@@ -48,7 +48,8 @@ enum workload {HIGH, MEDIUM, LOW};
enum policy_to_use {
TRAFFIC,
TIME,
- WORKLOAD
+ WORKLOAD,
+ BRANCH_RATIO
};
struct traffic {
diff --git a/lib/librte_power/power_acpi_cpufreq.c b/lib/librte_power/power_acpi_cpufreq.c
index bce933e9..cd5978d5 100644
--- a/lib/librte_power/power_acpi_cpufreq.c
+++ b/lib/librte_power/power_acpi_cpufreq.c
@@ -623,3 +623,24 @@ power_acpi_disable_turbo(unsigned int lcore_id)
return 0;
}
+
+int power_acpi_get_capabilities(unsigned int lcore_id,
+ struct rte_power_core_capabilities *caps)
+{
+ struct rte_power_info *pi;
+
+ if (lcore_id >= RTE_MAX_LCORE) {
+ RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
+ return -1;
+ }
+ if (caps == NULL) {
+ RTE_LOG(ERR, POWER, "Invalid argument\n");
+ return -1;
+ }
+
+ pi = &lcore_power_info[lcore_id];
+ caps->capabilities = 0;
+ caps->turbo = !!(pi->turbo_available);
+
+ return 0;
+}
diff --git a/lib/librte_power/power_acpi_cpufreq.h b/lib/librte_power/power_acpi_cpufreq.h
index edeeb27a..1af74160 100644
--- a/lib/librte_power/power_acpi_cpufreq.h
+++ b/lib/librte_power/power_acpi_cpufreq.h
@@ -14,6 +14,7 @@
#include <rte_byteorder.h>
#include <rte_log.h>
#include <rte_string_fns.h>
+#include "rte_power.h"
#ifdef __cplusplus
extern "C" {
@@ -196,6 +197,21 @@ int power_acpi_enable_turbo(unsigned int lcore_id);
*/
int power_acpi_disable_turbo(unsigned int lcore_id);
+/**
+ * Returns power capabilities for a specific lcore.
+ *
+ * @param lcore_id
+ * lcore id.
+ * @param caps
+ * pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int power_acpi_get_capabilities(unsigned int lcore_id,
+ struct rte_power_core_capabilities *caps);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_power/power_kvm_vm.c b/lib/librte_power/power_kvm_vm.c
index 38e9066f..20659b72 100644
--- a/lib/librte_power/power_kvm_vm.c
+++ b/lib/librte_power/power_kvm_vm.c
@@ -124,3 +124,11 @@ power_kvm_vm_disable_turbo(unsigned int lcore_id)
{
return send_msg(lcore_id, CPU_POWER_DISABLE_TURBO);
}
+
+struct rte_power_core_capabilities;
+int power_kvm_vm_get_capabilities(__rte_unused unsigned int lcore_id,
+ __rte_unused struct rte_power_core_capabilities *caps)
+{
+ RTE_LOG(ERR, POWER, "rte_power_get_capabilities is not implemented for Virtual Machine Power Management\n");
+ return -ENOTSUP;
+}
diff --git a/lib/librte_power/power_kvm_vm.h b/lib/librte_power/power_kvm_vm.h
index 446d6997..94d4aa12 100644
--- a/lib/librte_power/power_kvm_vm.h
+++ b/lib/librte_power/power_kvm_vm.h
@@ -14,6 +14,7 @@
#include <rte_byteorder.h>
#include <rte_log.h>
#include <rte_string_fns.h>
+#include "rte_power.h"
#ifdef __cplusplus
extern "C" {
@@ -177,6 +178,22 @@ int power_kvm_vm_enable_turbo(unsigned int lcore_id);
* - Negative on error.
*/
int power_kvm_vm_disable_turbo(unsigned int lcore_id);
+
+/**
+ * Returns power capabilities for a specific lcore.
+ *
+ * @param lcore_id
+ * lcore id.
+ * @param caps
+ * pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int power_kvm_vm_get_capabilities(unsigned int lcore_id,
+ struct rte_power_core_capabilities *caps);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c
index 6c8fb403..208b7919 100644
--- a/lib/librte_power/rte_power.c
+++ b/lib/librte_power/rte_power.c
@@ -24,6 +24,7 @@ rte_power_freq_change_t rte_power_freq_min = NULL;
rte_power_freq_change_t rte_power_turbo_status;
rte_power_freq_change_t rte_power_freq_enable_turbo;
rte_power_freq_change_t rte_power_freq_disable_turbo;
+rte_power_get_capabilities_t rte_power_get_capabilities;
int
rte_power_set_env(enum power_management_env env)
@@ -42,6 +43,7 @@ rte_power_set_env(enum power_management_env env)
rte_power_turbo_status = power_acpi_turbo_status;
rte_power_freq_enable_turbo = power_acpi_enable_turbo;
rte_power_freq_disable_turbo = power_acpi_disable_turbo;
+ rte_power_get_capabilities = power_acpi_get_capabilities;
} else if (env == PM_ENV_KVM_VM) {
rte_power_freqs = power_kvm_vm_freqs;
rte_power_get_freq = power_kvm_vm_get_freq;
@@ -53,6 +55,7 @@ rte_power_set_env(enum power_management_env env)
rte_power_turbo_status = power_kvm_vm_turbo_status;
rte_power_freq_enable_turbo = power_kvm_vm_enable_turbo;
rte_power_freq_disable_turbo = power_kvm_vm_disable_turbo;
+ rte_power_get_capabilities = power_kvm_vm_get_capabilities;
} else {
RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n",
env);
diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
index b4b7357b..d70bc0b3 100644
--- a/lib/librte_power/rte_power.h
+++ b/lib/librte_power/rte_power.h
@@ -247,6 +247,38 @@ extern rte_power_freq_change_t rte_power_freq_enable_turbo;
*/
extern rte_power_freq_change_t rte_power_freq_disable_turbo;
+/**
+ * Power capabilities summary.
+ */
+struct rte_power_core_capabilities {
+ RTE_STD_C11
+ union {
+ uint64_t capabilities;
+ RTE_STD_C11
+ struct {
+ uint64_t turbo:1; /**< Turbo can be enabled. */
+ };
+ };
+};
+
+/**
+ * Returns power capabilities for a specific lcore.
+ * Function pointer definition. Review each environments
+ * specific documentation for usage.
+ *
+ * @param lcore_id
+ * lcore id.
+ * @param caps
+ * pointer to rte_power_core_capabilities object.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+typedef int (*rte_power_get_capabilities_t)(unsigned int lcore_id,
+ struct rte_power_core_capabilities *caps);
+
+extern rte_power_get_capabilities_t rte_power_get_capabilities;
#ifdef __cplusplus
}
diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map
index 96dc42ec..dd587dfb 100644
--- a/lib/librte_power/rte_power_version.map
+++ b/lib/librte_power/rte_power_version.map
@@ -25,4 +25,11 @@ DPDK_17.11 {
rte_power_freq_enable_turbo;
rte_power_turbo_status;
-} DPDK_2.0; \ No newline at end of file
+} DPDK_2.0;
+
+DPDK_18.08 {
+ global:
+
+ rte_power_get_capabilities;
+
+} DPDK_17.11;
diff --git a/lib/librte_rawdev/Makefile b/lib/librte_rawdev/Makefile
index b9105b06..addb288d 100644
--- a/lib/librte_rawdev/Makefile
+++ b/lib/librte_rawdev/Makefile
@@ -10,7 +10,6 @@ LIB = librte_rawdev.a
LIBABIVER := 1
# build flags
-CFLAGS += -DALLOW_EXPERIMENTAL_API
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)
LDLIBS += -lrte_eal
diff --git a/lib/librte_rawdev/meson.build b/lib/librte_rawdev/meson.build
index dcd37ad4..a20fbdc0 100644
--- a/lib/librte_rawdev/meson.build
+++ b/lib/librte_rawdev/meson.build
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright(c) 2018 Intel Corporation
-allow_experimental_apis = true
sources = files('rte_rawdev.c')
headers = files('rte_rawdev.h', 'rte_rawdev_pmd.h')
diff --git a/lib/librte_rawdev/rte_rawdev.c b/lib/librte_rawdev/rte_rawdev.c
index 284e6aec..62b6b97e 100644
--- a/lib/librte_rawdev/rte_rawdev.c
+++ b/lib/librte_rawdev/rte_rawdev.c
@@ -46,13 +46,13 @@ static struct rte_rawdev_global rawdev_globals = {
struct rte_rawdev_global *rte_rawdev_globals = &rawdev_globals;
/* Raw device, northbound API implementation */
-uint8_t __rte_experimental
+uint8_t
rte_rawdev_count(void)
{
return rte_rawdev_globals->nb_devs;
}
-uint16_t __rte_experimental
+uint16_t
rte_rawdev_get_dev_id(const char *name)
{
uint16_t i;
@@ -69,7 +69,7 @@ rte_rawdev_get_dev_id(const char *name)
return -ENODEV;
}
-int __rte_experimental
+int
rte_rawdev_socket_id(uint16_t dev_id)
{
struct rte_rawdev *dev;
@@ -80,7 +80,7 @@ rte_rawdev_socket_id(uint16_t dev_id)
return dev->socket_id;
}
-int __rte_experimental
+int
rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info)
{
struct rte_rawdev *rawdev;
@@ -102,7 +102,7 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info)
return 0;
}
-int __rte_experimental
+int
rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf)
{
struct rte_rawdev *dev;
@@ -131,7 +131,7 @@ rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf)
return diag;
}
-int __rte_experimental
+int
rte_rawdev_queue_conf_get(uint16_t dev_id,
uint16_t queue_id,
rte_rawdev_obj_t queue_conf)
@@ -146,7 +146,7 @@ rte_rawdev_queue_conf_get(uint16_t dev_id,
return 0;
}
-int __rte_experimental
+int
rte_rawdev_queue_setup(uint16_t dev_id,
uint16_t queue_id,
rte_rawdev_obj_t queue_conf)
@@ -160,7 +160,7 @@ rte_rawdev_queue_setup(uint16_t dev_id,
return (*dev->dev_ops->queue_setup)(dev, queue_id, queue_conf);
}
-int __rte_experimental
+int
rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id)
{
struct rte_rawdev *dev;
@@ -172,7 +172,19 @@ rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id)
return (*dev->dev_ops->queue_release)(dev, queue_id);
}
-int __rte_experimental
+uint16_t
+rte_rawdev_queue_count(uint16_t dev_id)
+{
+ struct rte_rawdev *dev;
+
+ RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+ dev = &rte_rawdevs[dev_id];
+
+ RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_count, -ENOTSUP);
+ return (*dev->dev_ops->queue_count)(dev);
+}
+
+int
rte_rawdev_get_attr(uint16_t dev_id,
const char *attr_name,
uint64_t *attr_value)
@@ -186,7 +198,7 @@ rte_rawdev_get_attr(uint16_t dev_id,
return (*dev->dev_ops->attr_get)(dev, attr_name, attr_value);
}
-int __rte_experimental
+int
rte_rawdev_set_attr(uint16_t dev_id,
const char *attr_name,
const uint64_t attr_value)
@@ -200,7 +212,7 @@ rte_rawdev_set_attr(uint16_t dev_id,
return (*dev->dev_ops->attr_set)(dev, attr_name, attr_value);
}
-int __rte_experimental
+int
rte_rawdev_enqueue_buffers(uint16_t dev_id,
struct rte_rawdev_buf **buffers,
unsigned int count,
@@ -215,7 +227,7 @@ rte_rawdev_enqueue_buffers(uint16_t dev_id,
return (*dev->dev_ops->enqueue_bufs)(dev, buffers, count, context);
}
-int __rte_experimental
+int
rte_rawdev_dequeue_buffers(uint16_t dev_id,
struct rte_rawdev_buf **buffers,
unsigned int count,
@@ -230,7 +242,7 @@ rte_rawdev_dequeue_buffers(uint16_t dev_id,
return (*dev->dev_ops->dequeue_bufs)(dev, buffers, count, context);
}
-int __rte_experimental
+int
rte_rawdev_dump(uint16_t dev_id, FILE *f)
{
struct rte_rawdev *dev;
@@ -251,7 +263,7 @@ xstats_get_count(uint16_t dev_id)
return (*dev->dev_ops->xstats_get_names)(dev, NULL, 0);
}
-int __rte_experimental
+int
rte_rawdev_xstats_names_get(uint16_t dev_id,
struct rte_rawdev_xstats_name *xstats_names,
unsigned int size)
@@ -274,7 +286,7 @@ rte_rawdev_xstats_names_get(uint16_t dev_id,
}
/* retrieve rawdev extended statistics */
-int __rte_experimental
+int
rte_rawdev_xstats_get(uint16_t dev_id,
const unsigned int ids[],
uint64_t values[],
@@ -287,7 +299,7 @@ rte_rawdev_xstats_get(uint16_t dev_id,
return (*dev->dev_ops->xstats_get)(dev, ids, values, n);
}
-uint64_t __rte_experimental
+uint64_t
rte_rawdev_xstats_by_name_get(uint16_t dev_id,
const char *name,
unsigned int *id)
@@ -306,7 +318,7 @@ rte_rawdev_xstats_by_name_get(uint16_t dev_id,
return (*dev->dev_ops->xstats_get_by_name)(dev, name, id);
}
-int __rte_experimental
+int
rte_rawdev_xstats_reset(uint16_t dev_id,
const uint32_t ids[], uint32_t nb_ids)
{
@@ -317,7 +329,7 @@ rte_rawdev_xstats_reset(uint16_t dev_id,
return (*dev->dev_ops->xstats_reset)(dev, ids, nb_ids);
}
-int __rte_experimental
+int
rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info)
{
RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -327,7 +339,7 @@ rte_rawdev_firmware_status_get(uint16_t dev_id, rte_rawdev_obj_t status_info)
return (*dev->dev_ops->firmware_status_get)(dev, status_info);
}
-int __rte_experimental
+int
rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info)
{
RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -337,7 +349,7 @@ rte_rawdev_firmware_version_get(uint16_t dev_id, rte_rawdev_obj_t version_info)
return (*dev->dev_ops->firmware_version_get)(dev, version_info);
}
-int __rte_experimental
+int
rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image)
{
RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -350,7 +362,7 @@ rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image)
return (*dev->dev_ops->firmware_load)(dev, firmware_image);
}
-int __rte_experimental
+int
rte_rawdev_firmware_unload(uint16_t dev_id)
{
RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -360,7 +372,7 @@ rte_rawdev_firmware_unload(uint16_t dev_id)
return (*dev->dev_ops->firmware_unload)(dev);
}
-int __rte_experimental
+int
rte_rawdev_selftest(uint16_t dev_id)
{
RTE_RAWDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
@@ -370,7 +382,7 @@ rte_rawdev_selftest(uint16_t dev_id)
return (*dev->dev_ops->dev_selftest)();
}
-int __rte_experimental
+int
rte_rawdev_start(uint16_t dev_id)
{
struct rte_rawdev *dev;
@@ -397,7 +409,7 @@ rte_rawdev_start(uint16_t dev_id)
return 0;
}
-void __rte_experimental
+void
rte_rawdev_stop(uint16_t dev_id)
{
struct rte_rawdev *dev;
@@ -419,7 +431,7 @@ rte_rawdev_stop(uint16_t dev_id)
dev->started = 0;
}
-int __rte_experimental
+int
rte_rawdev_close(uint16_t dev_id)
{
struct rte_rawdev *dev;
@@ -438,7 +450,7 @@ rte_rawdev_close(uint16_t dev_id)
return (*dev->dev_ops->dev_close)(dev);
}
-int __rte_experimental
+int
rte_rawdev_reset(uint16_t dev_id)
{
struct rte_rawdev *dev;
@@ -465,7 +477,7 @@ rte_rawdev_find_free_device_index(void)
return RTE_RAWDEV_MAX_DEVS;
}
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
rte_rawdev_pmd_allocate(const char *name, size_t dev_priv_size, int socket_id)
{
struct rte_rawdev *rawdev;
@@ -506,7 +518,7 @@ rte_rawdev_pmd_allocate(const char *name, size_t dev_priv_size, int socket_id)
return rawdev;
}
-int __rte_experimental
+int
rte_rawdev_pmd_release(struct rte_rawdev *rawdev)
{
int ret;
@@ -532,10 +544,7 @@ rte_rawdev_pmd_release(struct rte_rawdev *rawdev)
return 0;
}
-RTE_INIT(librawdev_init_log);
-
-static void
-librawdev_init_log(void)
+RTE_INIT(librawdev_init_log)
{
librawdev_logtype = rte_log_register("lib.rawdev");
if (librawdev_logtype >= 0)
diff --git a/lib/librte_rawdev/rte_rawdev.h b/lib/librte_rawdev/rte_rawdev.h
index 2e14919b..684bfdb8 100644
--- a/lib/librte_rawdev/rte_rawdev.h
+++ b/lib/librte_rawdev/rte_rawdev.h
@@ -35,7 +35,7 @@ typedef void *rte_rawdev_obj_t;
* @return
* The total number of usable raw devices.
*/
-uint8_t __rte_experimental
+uint8_t
rte_rawdev_count(void);
/**
@@ -48,7 +48,7 @@ rte_rawdev_count(void);
* Returns raw device identifier on success.
* - <0: Failure to find named raw device.
*/
-uint16_t __rte_experimental
+uint16_t
rte_rawdev_get_dev_id(const char *name);
/**
@@ -61,7 +61,7 @@ rte_rawdev_get_dev_id(const char *name);
* a default of zero if the socket could not be determined.
* -(-EINVAL) dev_id value is out of range.
*/
-int __rte_experimental
+int
rte_rawdev_socket_id(uint16_t dev_id);
/**
@@ -84,7 +84,7 @@ struct rte_rawdev_info;
* - <0: Error code returned by the driver info get function.
*
*/
-int __rte_experimental
+int
rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info);
/**
@@ -111,7 +111,7 @@ rte_rawdev_info_get(uint16_t dev_id, struct rte_rawdev_info *dev_info);
* - 0: Success, device configured.
* - <0: Error code returned by the driver configuration function.
*/
-int __rte_experimental
+int
rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf);
@@ -137,7 +137,7 @@ rte_rawdev_configure(uint16_t dev_id, struct rte_rawdev_info *dev_conf);
* @see rte_raw_queue_setup()
*
*/
-int __rte_experimental
+int
rte_rawdev_queue_conf_get(uint16_t dev_id,
uint16_t queue_id,
rte_rawdev_obj_t queue_conf);
@@ -160,7 +160,7 @@ rte_rawdev_queue_conf_get(uint16_t dev_id,
* - 0: Success, raw queue correctly set up.
* - <0: raw queue configuration failed
*/
-int __rte_experimental
+int
rte_rawdev_queue_setup(uint16_t dev_id,
uint16_t queue_id,
rte_rawdev_obj_t queue_conf);
@@ -180,8 +180,9 @@ rte_rawdev_queue_setup(uint16_t dev_id,
* - 0: Success, raw queue released.
* - <0: raw queue configuration failed
*/
-int __rte_experimental
+int
rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id);
+
/**
* Get the number of raw queues on a specific raw device
*
@@ -190,7 +191,7 @@ rte_rawdev_queue_release(uint16_t dev_id, uint16_t queue_id);
* @return
* - The number of configured raw queues
*/
-uint16_t __rte_experimental
+uint16_t
rte_rawdev_queue_count(uint16_t dev_id);
/**
@@ -208,7 +209,7 @@ rte_rawdev_queue_count(uint16_t dev_id);
* - 0: Success, device started.
* < 0: Failure
*/
-int __rte_experimental
+int
rte_rawdev_start(uint16_t dev_id);
/**
@@ -218,7 +219,7 @@ rte_rawdev_start(uint16_t dev_id);
* @param dev_id
* Raw device identifier.
*/
-void __rte_experimental
+void
rte_rawdev_stop(uint16_t dev_id);
/**
@@ -232,7 +233,7 @@ rte_rawdev_stop(uint16_t dev_id);
* - <0 on failure to close device
* - (-EAGAIN) if device is busy
*/
-int __rte_experimental
+int
rte_rawdev_close(uint16_t dev_id);
/**
@@ -246,7 +247,7 @@ rte_rawdev_close(uint16_t dev_id);
* 0 for sucessful reset,
* !0 for failure in resetting
*/
-int __rte_experimental
+int
rte_rawdev_reset(uint16_t dev_id);
#define RTE_RAWDEV_NAME_MAX_LEN (64)
@@ -316,7 +317,7 @@ struct rte_rawdev_buf {
* - 0: on success
* - <0: on failure.
*/
-int __rte_experimental
+int
rte_rawdev_dump(uint16_t dev_id, FILE *f);
/**
@@ -338,7 +339,7 @@ rte_rawdev_dump(uint16_t dev_id, FILE *f);
* 0 for success
* !0 Error; attr_value remains untouched in case of error.
*/
-int __rte_experimental
+int
rte_rawdev_get_attr(uint16_t dev_id,
const char *attr_name,
uint64_t *attr_value);
@@ -357,7 +358,7 @@ rte_rawdev_get_attr(uint16_t dev_id,
* 0 for success
* !0 Error
*/
-int __rte_experimental
+int
rte_rawdev_set_attr(uint16_t dev_id,
const char *attr_name,
const uint64_t attr_value);
@@ -383,7 +384,7 @@ rte_rawdev_set_attr(uint16_t dev_id,
* Whether partial enqueue is failure or success is defined between app
* and driver implementation.
*/
-int __rte_experimental
+int
rte_rawdev_enqueue_buffers(uint16_t dev_id,
struct rte_rawdev_buf **buffers,
unsigned int count,
@@ -414,7 +415,7 @@ rte_rawdev_enqueue_buffers(uint16_t dev_id,
* Whether partial enqueue is failure or success is defined between app
* and driver implementation.
*/
-int __rte_experimental
+int
rte_rawdev_dequeue_buffers(uint16_t dev_id,
struct rte_rawdev_buf **buffers,
unsigned int count,
@@ -454,7 +455,7 @@ struct rte_rawdev_xstats_name {
* -ENODEV for invalid *dev_id*
* -ENOTSUP if the device doesn't support this function.
*/
-int __rte_experimental
+int
rte_rawdev_xstats_names_get(uint16_t dev_id,
struct rte_rawdev_xstats_name *xstats_names,
unsigned int size);
@@ -478,7 +479,7 @@ rte_rawdev_xstats_names_get(uint16_t dev_id,
* -ENODEV for invalid *dev_id*
* -ENOTSUP if the device doesn't support this function.
*/
-int __rte_experimental
+int
rte_rawdev_xstats_get(uint16_t dev_id,
const unsigned int ids[],
uint64_t values[],
@@ -500,7 +501,7 @@ rte_rawdev_xstats_get(uint16_t dev_id,
* - positive value or zero: the stat value
* - negative value: -EINVAL if stat not found, -ENOTSUP if not supported.
*/
-uint64_t __rte_experimental
+uint64_t
rte_rawdev_xstats_by_name_get(uint16_t dev_id,
const char *name,
unsigned int *id);
@@ -520,7 +521,7 @@ rte_rawdev_xstats_by_name_get(uint16_t dev_id,
* - zero: successfully reset the statistics to zero
* - negative value: -EINVAL invalid parameters, -ENOTSUP if not supported.
*/
-int __rte_experimental
+int
rte_rawdev_xstats_reset(uint16_t dev_id,
const uint32_t ids[],
uint32_t nb_ids);
@@ -539,7 +540,7 @@ rte_rawdev_xstats_reset(uint16_t dev_id,
* 0 for success,
* !0 for failure, `status_info` argument state is undefined
*/
-int __rte_experimental
+int
rte_rawdev_firmware_status_get(uint16_t dev_id,
rte_rawdev_obj_t status_info);
@@ -557,7 +558,7 @@ rte_rawdev_firmware_status_get(uint16_t dev_id,
* 0 for success,
* !0 for failure, `version_info` argument state is undefined
*/
-int __rte_experimental
+int
rte_rawdev_firmware_version_get(uint16_t dev_id,
rte_rawdev_obj_t version_info);
@@ -574,7 +575,7 @@ rte_rawdev_firmware_version_get(uint16_t dev_id,
* 0 for successful load
* !0 for failure to load the provided image, or image incorrect.
*/
-int __rte_experimental
+int
rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image);
/**
@@ -586,7 +587,7 @@ rte_rawdev_firmware_load(uint16_t dev_id, rte_rawdev_obj_t firmware_image);
* 0 for successful Unload
* !0 for failure in unloading
*/
-int __rte_experimental
+int
rte_rawdev_firmware_unload(uint16_t dev_id);
/**
@@ -599,7 +600,7 @@ rte_rawdev_firmware_unload(uint16_t dev_id);
* - -ENOTSUP if the device doesn't support selftest
* - other values < 0 on failure.
*/
-int __rte_experimental
+int
rte_rawdev_selftest(uint16_t dev_id);
#ifdef __cplusplus
diff --git a/lib/librte_rawdev/rte_rawdev_pmd.h b/lib/librte_rawdev/rte_rawdev_pmd.h
index 408adf0f..bb9bbc35 100644
--- a/lib/librte_rawdev/rte_rawdev_pmd.h
+++ b/lib/librte_rawdev/rte_rawdev_pmd.h
@@ -251,6 +251,24 @@ typedef int (*rawdev_queue_release_t)(struct rte_rawdev *dev,
uint16_t queue_id);
/**
+ * Get the count of number of queues configured on this device.
+ *
+ * Another way to fetch this information is to fetch the device configuration.
+ * But, that assumes that the device configuration managed by the driver has
+ * that kind of information.
+ *
+ * This function helps in getting queue count supported, independently. It
+ * can help in cases where iterator needs to be implemented.
+ *
+ * @param
+ * Raw device pointer
+ * @return
+ * Number of queues; 0 is assumed to be a valid response.
+ *
+ */
+typedef uint16_t (*rawdev_queue_count_t)(struct rte_rawdev *dev);
+
+/**
* Enqueue an array of raw buffers to the device.
*
* Buffer being used is opaque - it can be obtained from mempool or from
@@ -506,6 +524,8 @@ struct rte_rawdev_ops {
rawdev_queue_setup_t queue_setup;
/**< Release an raw queue. */
rawdev_queue_release_t queue_release;
+ /**< Get the number of queues attached to the device */
+ rawdev_queue_count_t queue_count;
/**< Enqueue an array of raw buffers to device. */
rawdev_enqueue_bufs_t enqueue_bufs;
@@ -556,7 +576,7 @@ struct rte_rawdev_ops {
* @return
* - Slot in the rte_dev_devices array for a new device;
*/
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
rte_rawdev_pmd_allocate(const char *name, size_t dev_private_size,
int socket_id);
@@ -568,7 +588,7 @@ rte_rawdev_pmd_allocate(const char *name, size_t dev_private_size,
* @return
* - 0 on success, negative on error
*/
-int __rte_experimental
+int
rte_rawdev_pmd_release(struct rte_rawdev *rawdev);
/**
@@ -585,7 +605,7 @@ rte_rawdev_pmd_release(struct rte_rawdev *rawdev);
* - Raw device pointer if device is successfully created.
* - NULL if device cannot be created.
*/
-struct rte_rawdev * __rte_experimental
+struct rte_rawdev *
rte_rawdev_pmd_init(const char *name, size_t dev_private_size,
int socket_id);
@@ -597,7 +617,7 @@ rte_rawdev_pmd_init(const char *name, size_t dev_private_size,
* @return
* - 0 on success, negative on error
*/
-int __rte_experimental
+int
rte_rawdev_pmd_uninit(const char *name);
#ifdef __cplusplus
diff --git a/lib/librte_rawdev/rte_rawdev_version.map b/lib/librte_rawdev/rte_rawdev_version.map
index af4465e2..b61dbff1 100644
--- a/lib/librte_rawdev/rte_rawdev_version.map
+++ b/lib/librte_rawdev/rte_rawdev_version.map
@@ -1,4 +1,4 @@
-EXPERIMENTAL {
+DPDK_18.08 {
global:
rte_rawdev_close;
@@ -16,6 +16,7 @@ EXPERIMENTAL {
rte_rawdev_pmd_allocate;
rte_rawdev_pmd_release;
rte_rawdev_queue_conf_get;
+ rte_rawdev_queue_count;
rte_rawdev_queue_setup;
rte_rawdev_queue_release;
rte_rawdev_reset;
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 12458225..7a731d07 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -26,8 +26,9 @@
* - Bulk dequeue.
* - Bulk enqueue.
*
- * Note: the ring implementation is not preemptable. A lcore must not
- * be interrupted by another task that uses the same ring.
+ * Note: the ring implementation is not preemptible. Refer to Programmer's
+ * guide/Environment Abstraction Layer/Multiple pthread/Known Issues/rte_ring
+ * for more information.
*
*/
@@ -382,7 +383,7 @@ __rte_ring_do_dequeue(struct rte_ring *r, void **obj_table,
uint32_t cons_head, cons_next;
uint32_t entries;
- n = __rte_ring_move_cons_head(r, is_sc, n, behavior,
+ n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior,
&cons_head, &cons_next, &entries);
if (n == 0)
goto end;
diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h
index cb3f82b1..94df3c4a 100644
--- a/lib/librte_ring/rte_ring_c11_mem.h
+++ b/lib/librte_ring/rte_ring_c11_mem.h
@@ -66,14 +66,14 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
*old_head = __atomic_load_n(&r->prod.head,
__ATOMIC_ACQUIRE);
- const uint32_t cons_tail = r->cons.tail;
+
/*
* The subtraction is done between two unsigned 32bits value
* (the result is always modulo 32 bits even if we have
* *old_head > cons_tail). So 'free_entries' is always between 0
* and capacity (which is < size).
*/
- *free_entries = (capacity + cons_tail - *old_head);
+ *free_entries = (capacity + r->cons.tail - *old_head);
/* check that we have enough room in ring */
if (unlikely(n > *free_entries))
@@ -133,13 +133,13 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
n = max;
*old_head = __atomic_load_n(&r->cons.head,
__ATOMIC_ACQUIRE);
- const uint32_t prod_tail = r->prod.tail;
+
/* The subtraction is done between two unsigned 32bits value
* (the result is always modulo 32 bits even if we have
* cons_head > prod_tail). So 'entries' is always between 0
* and size(ring)-1.
*/
- *entries = (prod_tail - *old_head);
+ *entries = (r->prod.tail - *old_head);
/* Set the actual entries for dequeue */
if (n > *entries)
diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c
index 1e559c99..1954960a 100644
--- a/lib/librte_security/rte_security.c
+++ b/lib/librte_security/rte_security.c
@@ -1,34 +1,6 @@
-/*-
- * BSD LICENSE
- *
- * Copyright 2017 NXP.
- * Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of NXP nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
*/
#include <rte_malloc.h>
@@ -91,7 +63,6 @@ rte_security_session_destroy(struct rte_security_ctx *instance,
struct rte_security_session *sess)
{
int ret;
- struct rte_mempool *mp = rte_mempool_from_obj(sess);
RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP);
@@ -100,7 +71,7 @@ rte_security_session_destroy(struct rte_security_ctx *instance,
ret = instance->ops->session_destroy(instance->device, sess);
if (!ret)
- rte_mempool_put(mp, (void *)sess);
+ rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess);
return ret;
}
diff --git a/lib/librte_security/rte_security.h b/lib/librte_security/rte_security.h
index afa2861f..b0d1b97e 100644
--- a/lib/librte_security/rte_security.h
+++ b/lib/librte_security/rte_security.h
@@ -1,34 +1,6 @@
-/*-
- * BSD LICENSE
- *
- * Copyright 2017 NXP.
- * Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of NXP nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
*/
#ifndef _RTE_SECURITY_H_
diff --git a/lib/librte_security/rte_security_driver.h b/lib/librte_security/rte_security_driver.h
index 0583f889..42f42ffe 100644
--- a/lib/librte_security/rte_security_driver.h
+++ b/lib/librte_security/rte_security_driver.h
@@ -1,34 +1,6 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2017 Intel Corporation. All rights reserved.
- * Copyright 2017 NXP.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP.
+ * Copyright(c) 2017 Intel Corporation.
*/
#ifndef _RTE_SECURITY_DRIVER_H_
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
index c11ebcaa..c6354fef 100644
--- a/lib/librte_vhost/iotlb.c
+++ b/lib/librte_vhost/iotlb.c
@@ -303,6 +303,13 @@ out:
return vva;
}
+void
+vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
+{
+ vhost_user_iotlb_cache_remove_all(vq);
+ vhost_user_iotlb_pending_remove_all(vq);
+}
+
int
vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
{
@@ -315,8 +322,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
* The cache has already been initialized,
* just drop all cached and pending entries.
*/
- vhost_user_iotlb_cache_remove_all(vq);
- vhost_user_iotlb_pending_remove_all(vq);
+ vhost_user_iotlb_flush_all(vq);
}
#ifdef RTE_LIBRTE_VHOST_NUMA
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
index e7083e37..60b9e4c5 100644
--- a/lib/librte_vhost/iotlb.h
+++ b/lib/librte_vhost/iotlb.h
@@ -73,7 +73,7 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova,
uint8_t perm);
void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova,
uint64_t size, uint8_t perm);
-
+void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq);
int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
#endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
index 7f0cb9bc..b02673d4 100644
--- a/lib/librte_vhost/rte_vhost.h
+++ b/lib/librte_vhost/rte_vhost.h
@@ -58,6 +58,14 @@ extern "C" {
#define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
#endif
+#ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
+#define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
+#endif
+
+#ifndef VHOST_USER_PROTOCOL_F_HOST_NOTIFIER
+#define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
+#endif
+
/** Indicate whether protocol features negotiation is supported. */
#ifndef VHOST_USER_F_PROTOCOL_FEATURES
#define VHOST_USER_F_PROTOCOL_FEATURES 30
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 0399c37b..d6303174 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -853,6 +853,12 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
vsocket->features = VIRTIO_NET_SUPPORTED_FEATURES;
+ /* Dequeue zero copy can't assure descriptors returned in order */
+ if (vsocket->dequeue_zero_copy) {
+ vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+ vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+ }
+
if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
vsocket->supported_features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index afded495..3c9be10a 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -93,9 +93,12 @@ cleanup_device(struct virtio_net *dev, int destroy)
}
void
-free_vq(struct vhost_virtqueue *vq)
+free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
- rte_free(vq->shadow_used_ring);
+ if (vq_is_packed(dev))
+ rte_free(vq->shadow_used_packed);
+ else
+ rte_free(vq->shadow_used_split);
rte_free(vq->batch_copy_elems);
rte_mempool_free(vq->iotlb_pool);
rte_free(vq);
@@ -110,19 +113,16 @@ free_device(struct virtio_net *dev)
uint32_t i;
for (i = 0; i < dev->nr_vring; i++)
- free_vq(dev->virtqueue[i]);
+ free_vq(dev, dev->virtqueue[i]);
rte_free(dev);
}
-int
-vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+static int
+vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
uint64_t req_size, size;
- if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
- goto out;
-
req_size = sizeof(struct vring_desc) * vq->size;
size = req_size;
vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
@@ -153,6 +153,55 @@ vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
if (!vq->used || size != req_size)
return -1;
+ return 0;
+}
+
+static int
+vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ uint64_t req_size, size;
+
+ req_size = sizeof(struct vring_packed_desc) * vq->size;
+ size = req_size;
+ vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+ vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
+ &size, VHOST_ACCESS_RW);
+ if (!vq->desc_packed || size != req_size)
+ return -1;
+
+ req_size = sizeof(struct vring_packed_desc_event);
+ size = req_size;
+ vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
+ vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
+ &size, VHOST_ACCESS_RW);
+ if (!vq->driver_event || size != req_size)
+ return -1;
+
+ req_size = sizeof(struct vring_packed_desc_event);
+ size = req_size;
+ vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
+ vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
+ &size, VHOST_ACCESS_RW);
+ if (!vq->device_event || size != req_size)
+ return -1;
+
+ return 0;
+}
+
+int
+vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+
+ if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
+ goto out;
+
+ if (vq_is_packed(dev)) {
+ if (vring_translate_packed(dev, vq) < 0)
+ return -1;
+ } else {
+ if (vring_translate_split(dev, vq) < 0)
+ return -1;
+ }
out:
vq->access_ok = 1;
@@ -234,6 +283,9 @@ alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
dev->virtqueue[vring_idx] = vq;
init_vring_queue(dev, vring_idx);
rte_spinlock_init(&vq->access_lock);
+ vq->avail_wrap_counter = 1;
+ vq->used_wrap_counter = 1;
+ vq->signalled_used_valid = false;
dev->nr_vring += 1;
@@ -268,21 +320,21 @@ vhost_new_device(void)
struct virtio_net *dev;
int i;
- dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
- if (dev == NULL) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "Failed to allocate memory for new dev.\n");
- return -1;
- }
-
for (i = 0; i < MAX_VHOST_DEVICE; i++) {
if (vhost_devices[i] == NULL)
break;
}
+
if (i == MAX_VHOST_DEVICE) {
RTE_LOG(ERR, VHOST_CONFIG,
"Failed to find a free slot for new device.\n");
- rte_free(dev);
+ return -1;
+ }
+
+ dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
+ if (dev == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for new dev.\n");
return -1;
}
@@ -291,10 +343,27 @@ vhost_new_device(void)
dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
dev->slave_req_fd = -1;
dev->vdpa_dev_id = -1;
+ rte_spinlock_init(&dev->slave_req_lock);
return i;
}
+void
+vhost_destroy_device_notify(struct virtio_net *dev)
+{
+ struct rte_vdpa_device *vdpa_dev;
+ int did;
+
+ if (dev->flags & VIRTIO_DEV_RUNNING) {
+ did = dev->vdpa_dev_id;
+ vdpa_dev = rte_vdpa_get_device(did);
+ if (vdpa_dev && vdpa_dev->ops->dev_close)
+ vdpa_dev->ops->dev_close(dev->vid);
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+ dev->notify_ops->destroy_device(dev->vid);
+ }
+}
+
/*
* Invoked when there is the vhost-user connection is broken (when
* the virtio device is being detached).
@@ -303,20 +372,11 @@ void
vhost_destroy_device(int vid)
{
struct virtio_net *dev = get_device(vid);
- struct rte_vdpa_device *vdpa_dev;
- int did = -1;
if (dev == NULL)
return;
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- did = dev->vdpa_dev_id;
- vdpa_dev = rte_vdpa_get_device(did);
- if (vdpa_dev && vdpa_dev->ops->dev_close)
- vdpa_dev->ops->dev_close(dev->vid);
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- dev->notify_ops->destroy_device(vid);
- }
+ vhost_destroy_device_notify(dev);
cleanup_device(dev, 1);
free_device(dev);
@@ -346,6 +406,8 @@ vhost_detach_vdpa_device(int vid)
if (dev == NULL)
return;
+ vhost_user_host_notifier_ctrl(vid, false);
+
dev->vdpa_dev_id = -1;
}
@@ -558,7 +620,11 @@ rte_vhost_vring_call(int vid, uint16_t vring_idx)
if (!vq)
return -1;
- vhost_vring_call(dev, vq);
+ if (vq_is_packed(dev))
+ vhost_vring_call_packed(dev, vq);
+ else
+ vhost_vring_call_split(dev, vq);
+
return 0;
}
@@ -579,19 +645,52 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
}
+static inline void
+vhost_enable_notify_split(struct vhost_virtqueue *vq, int enable)
+{
+ if (enable)
+ vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
+ else
+ vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+}
+
+static inline void
+vhost_enable_notify_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq, int enable)
+{
+ uint16_t flags;
+
+ if (!enable)
+ vq->device_event->flags = VRING_EVENT_F_DISABLE;
+
+ flags = VRING_EVENT_F_ENABLE;
+ if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
+ flags = VRING_EVENT_F_DESC;
+ vq->device_event->off_wrap = vq->last_avail_idx |
+ vq->avail_wrap_counter << 15;
+ }
+
+ rte_smp_wmb();
+
+ vq->device_event->flags = flags;
+}
+
int
rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
{
struct virtio_net *dev = get_device(vid);
+ struct vhost_virtqueue *vq;
if (!dev)
return -1;
- if (enable)
- dev->virtqueue[queue_id]->used->flags &=
- ~VRING_USED_F_NO_NOTIFY;
+ vq = dev->virtqueue[queue_id];
+
+ if (vq_is_packed(dev))
+ vhost_enable_notify_packed(dev, vq, enable);
else
- dev->virtqueue[queue_id]->used->flags |= VRING_USED_F_NO_NOTIFY;
+ vhost_enable_notify_split(vq, enable);
+
return 0;
}
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 58c425a5..760a09c0 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -43,6 +43,7 @@
* from vring to do scatter RX.
*/
struct buf_vector {
+ uint64_t buf_iova;
uint64_t buf_addr;
uint32_t buf_len;
uint32_t desc_idx;
@@ -55,6 +56,7 @@ struct buf_vector {
struct zcopy_mbuf {
struct rte_mbuf *mbuf;
uint32_t desc_idx;
+ uint16_t desc_count;
uint16_t in_use;
TAILQ_ENTRY(zcopy_mbuf) next;
@@ -79,19 +81,35 @@ struct log_cache_entry {
unsigned long val;
};
+struct vring_used_elem_packed {
+ uint16_t id;
+ uint32_t len;
+ uint32_t count;
+};
+
/**
* Structure contains variables relevant to RX/TX virtqueues.
*/
struct vhost_virtqueue {
- struct vring_desc *desc;
- struct vring_avail *avail;
- struct vring_used *used;
+ union {
+ struct vring_desc *desc;
+ struct vring_packed_desc *desc_packed;
+ };
+ union {
+ struct vring_avail *avail;
+ struct vring_packed_desc_event *driver_event;
+ };
+ union {
+ struct vring_used *used;
+ struct vring_packed_desc_event *device_event;
+ };
uint32_t size;
uint16_t last_avail_idx;
uint16_t last_used_idx;
/* Last used index we notify to front end. */
uint16_t signalled_used;
+ bool signalled_used_valid;
#define VIRTIO_INVALID_EVENTFD (-1)
#define VIRTIO_UNINITIALIZED_EVENTFD (-2)
@@ -115,12 +133,17 @@ struct vhost_virtqueue {
struct zcopy_mbuf *zmbufs;
struct zcopy_mbuf_list zmbuf_list;
- struct vring_used_elem *shadow_used_ring;
+ union {
+ struct vring_used_elem *shadow_used_split;
+ struct vring_used_elem_packed *shadow_used_packed;
+ };
uint16_t shadow_used_idx;
struct vhost_vring_addr ring_addrs;
struct batch_copy_elem *batch_copy_elems;
uint16_t batch_copy_nb_elems;
+ bool used_wrap_counter;
+ bool avail_wrap_counter;
struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
uint16_t log_cache_nb_elem;
@@ -191,6 +214,42 @@ struct vhost_msg {
#define VIRTIO_F_VERSION_1 32
#endif
+/* Declare packed ring related bits for older kernels */
+#ifndef VIRTIO_F_RING_PACKED
+
+#define VIRTIO_F_RING_PACKED 34
+
+#define VRING_DESC_F_NEXT 1
+#define VRING_DESC_F_WRITE 2
+#define VRING_DESC_F_INDIRECT 4
+
+#define VRING_DESC_F_AVAIL (1ULL << 7)
+#define VRING_DESC_F_USED (1ULL << 15)
+
+struct vring_packed_desc {
+ uint64_t addr;
+ uint32_t len;
+ uint16_t id;
+ uint16_t flags;
+};
+
+#define VRING_EVENT_F_ENABLE 0x0
+#define VRING_EVENT_F_DISABLE 0x1
+#define VRING_EVENT_F_DESC 0x2
+
+struct vring_packed_desc_event {
+ uint16_t off_wrap;
+ uint16_t flags;
+};
+#endif
+
+/*
+ * Available and used descs are in same order
+ */
+#ifndef VIRTIO_F_IN_ORDER
+#define VIRTIO_F_IN_ORDER 35
+#endif
+
/* Features supported by this builtin vhost-user net driver. */
#define VIRTIO_NET_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
@@ -214,7 +273,8 @@ struct vhost_msg {
(1ULL << VIRTIO_NET_F_GUEST_ECN) | \
(1ULL << VIRTIO_RING_F_INDIRECT_DESC) | \
(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
- (1ULL << VIRTIO_NET_F_MTU) | \
+ (1ULL << VIRTIO_NET_F_MTU) | \
+ (1ULL << VIRTIO_F_IN_ORDER) | \
(1ULL << VIRTIO_F_IOMMU_PLATFORM))
@@ -301,6 +361,7 @@ struct virtio_net {
struct guest_page *guest_pages;
int slave_req_fd;
+ rte_spinlock_t slave_req_lock;
/*
* Device id to identify a specific backend device.
@@ -314,6 +375,19 @@ struct virtio_net {
struct vhost_user_extern_ops extern_ops;
} __rte_cache_aligned;
+static __rte_always_inline bool
+vq_is_packed(struct virtio_net *dev)
+{
+ return dev->features & (1ull << VIRTIO_F_RING_PACKED);
+}
+
+static inline bool
+desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter)
+{
+ return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) &&
+ wrap_counter != !!(desc->flags & VRING_DESC_F_USED);
+}
+
#define VHOST_LOG_PAGE 4096
/*
@@ -428,6 +502,7 @@ vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
vq->log_cache[i].offset = offset;
vq->log_cache[i].val = (1UL << bit_nr);
+ vq->log_cache_nb_elem++;
}
static __rte_always_inline void
@@ -535,9 +610,10 @@ int vhost_new_device(void);
void cleanup_device(struct virtio_net *dev, int destroy);
void reset_device(struct virtio_net *dev);
void vhost_destroy_device(int);
+void vhost_destroy_device_notify(struct virtio_net *dev);
void cleanup_vq(struct vhost_virtqueue *vq, int destroy);
-void free_vq(struct vhost_virtqueue *vq);
+void free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq);
int alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx);
@@ -588,10 +664,10 @@ vhost_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
}
static __rte_always_inline void
-vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
+vhost_vring_call_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
/* Flush used->idx update before we read avail->flags. */
- rte_mb();
+ rte_smp_mb();
/* Don't kick guest if we don't reach index specified by guest. */
if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
@@ -615,4 +691,55 @@ vhost_vring_call(struct virtio_net *dev, struct vhost_virtqueue *vq)
}
}
+static __rte_always_inline void
+vhost_vring_call_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ uint16_t old, new, off, off_wrap;
+ bool signalled_used_valid, kick = false;
+
+ /* Flush used desc update. */
+ rte_smp_mb();
+
+ if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
+ if (vq->driver_event->flags !=
+ VRING_EVENT_F_DISABLE)
+ kick = true;
+ goto kick;
+ }
+
+ old = vq->signalled_used;
+ new = vq->last_used_idx;
+ vq->signalled_used = new;
+ signalled_used_valid = vq->signalled_used_valid;
+ vq->signalled_used_valid = true;
+
+ if (vq->driver_event->flags != VRING_EVENT_F_DESC) {
+ if (vq->driver_event->flags != VRING_EVENT_F_DISABLE)
+ kick = true;
+ goto kick;
+ }
+
+ if (unlikely(!signalled_used_valid)) {
+ kick = true;
+ goto kick;
+ }
+
+ rte_smp_rmb();
+
+ off_wrap = vq->driver_event->off_wrap;
+ off = off_wrap & ~(1 << 15);
+
+ if (new <= old)
+ old -= vq->size;
+
+ if (vq->used_wrap_counter != off_wrap >> 15)
+ off -= vq->size;
+
+ if (vhost_need_event(off, new, old))
+ kick = true;
+kick:
+ if (kick)
+ eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
#endif /* _VHOST_NET_CDEV_H_ */
diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c
index f1650738..57341ef8 100644
--- a/lib/librte_vhost/vhost_crypto.c
+++ b/lib/librte_vhost/vhost_crypto.c
@@ -940,8 +940,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto,
struct vhost_virtqueue *vq, struct rte_crypto_op *op,
struct vring_desc *head, uint16_t desc_idx)
{
- struct vhost_crypto_data_req *vc_req = RTE_PTR_ADD(op->sym->m_src,
- sizeof(struct rte_mbuf));
+ struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(op->sym->m_src);
struct rte_cryptodev_sym_session *session;
struct virtio_crypto_op_data_req *req, tmp_req;
struct virtio_crypto_inhdr *inhdr;
@@ -1062,8 +1061,7 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op,
{
struct rte_mbuf *m_src = op->sym->m_src;
struct rte_mbuf *m_dst = op->sym->m_dst;
- struct vhost_crypto_data_req *vc_req = RTE_PTR_ADD(m_src,
- sizeof(struct rte_mbuf));
+ struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src);
uint16_t desc_idx;
int ret = 0;
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 947290fc..a2d4c9ff 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -135,17 +135,7 @@ vhost_user_set_owner(void)
static int
vhost_user_reset_owner(struct virtio_net *dev)
{
- struct rte_vdpa_device *vdpa_dev;
- int did = -1;
-
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- did = dev->vdpa_dev_id;
- vdpa_dev = rte_vdpa_get_device(did);
- if (vdpa_dev && vdpa_dev->ops->dev_close)
- vdpa_dev->ops->dev_close(dev->vid);
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- dev->notify_ops->destroy_device(dev->vid);
- }
+ vhost_destroy_device_notify(dev);
cleanup_device(dev, 0);
reset_device(dev);
@@ -243,7 +233,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
dev->virtqueue[dev->nr_vring] = NULL;
cleanup_vq(vq, 1);
- free_vq(vq);
+ free_vq(dev, vq);
}
}
@@ -292,13 +282,26 @@ vhost_user_set_vring_num(struct virtio_net *dev,
TAILQ_INIT(&vq->zmbuf_list);
}
- vq->shadow_used_ring = rte_malloc(NULL,
+ if (vq_is_packed(dev)) {
+ vq->shadow_used_packed = rte_malloc(NULL,
+ vq->size *
+ sizeof(struct vring_used_elem_packed),
+ RTE_CACHE_LINE_SIZE);
+ if (!vq->shadow_used_packed) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to allocate memory for shadow used ring.\n");
+ return -1;
+ }
+
+ } else {
+ vq->shadow_used_split = rte_malloc(NULL,
vq->size * sizeof(struct vring_used_elem),
RTE_CACHE_LINE_SIZE);
- if (!vq->shadow_used_ring) {
- RTE_LOG(ERR, VHOST_CONFIG,
- "failed to allocate memory for shadow used ring.\n");
- return -1;
+ if (!vq->shadow_used_split) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "failed to allocate memory for shadow used ring.\n");
+ return -1;
+ }
}
vq->batch_copy_elems = rte_malloc(NULL,
@@ -325,7 +328,8 @@ numa_realloc(struct virtio_net *dev, int index)
struct virtio_net *old_dev;
struct vhost_virtqueue *old_vq, *vq;
struct zcopy_mbuf *new_zmbuf;
- struct vring_used_elem *new_shadow_used_ring;
+ struct vring_used_elem *new_shadow_used_split;
+ struct vring_used_elem_packed *new_shadow_used_packed;
struct batch_copy_elem *new_batch_copy_elems;
int ret;
@@ -360,13 +364,26 @@ numa_realloc(struct virtio_net *dev, int index)
vq->zmbufs = new_zmbuf;
}
- new_shadow_used_ring = rte_malloc_socket(NULL,
- vq->size * sizeof(struct vring_used_elem),
- RTE_CACHE_LINE_SIZE,
- newnode);
- if (new_shadow_used_ring) {
- rte_free(vq->shadow_used_ring);
- vq->shadow_used_ring = new_shadow_used_ring;
+ if (vq_is_packed(dev)) {
+ new_shadow_used_packed = rte_malloc_socket(NULL,
+ vq->size *
+ sizeof(struct vring_used_elem_packed),
+ RTE_CACHE_LINE_SIZE,
+ newnode);
+ if (new_shadow_used_packed) {
+ rte_free(vq->shadow_used_packed);
+ vq->shadow_used_packed = new_shadow_used_packed;
+ }
+ } else {
+ new_shadow_used_split = rte_malloc_socket(NULL,
+ vq->size *
+ sizeof(struct vring_used_elem),
+ RTE_CACHE_LINE_SIZE,
+ newnode);
+ if (new_shadow_used_split) {
+ rte_free(vq->shadow_used_split);
+ vq->shadow_used_split = new_shadow_used_split;
+ }
}
new_batch_copy_elems = rte_malloc_socket(NULL,
@@ -477,6 +494,51 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
struct vhost_vring_addr *addr = &vq->ring_addrs;
uint64_t len;
+ if (vq_is_packed(dev)) {
+ len = sizeof(struct vring_packed_desc) * vq->size;
+ vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
+ ring_addr_to_vva(dev, vq, addr->desc_user_addr, &len);
+ vq->log_guest_addr = 0;
+ if (vq->desc_packed == NULL ||
+ len != sizeof(struct vring_packed_desc) *
+ vq->size) {
+ RTE_LOG(DEBUG, VHOST_CONFIG,
+ "(%d) failed to map desc_packed ring.\n",
+ dev->vid);
+ return dev;
+ }
+
+ dev = numa_realloc(dev, vq_index);
+ vq = dev->virtqueue[vq_index];
+ addr = &vq->ring_addrs;
+
+ len = sizeof(struct vring_packed_desc_event);
+ vq->driver_event = (struct vring_packed_desc_event *)
+ (uintptr_t)ring_addr_to_vva(dev,
+ vq, addr->avail_user_addr, &len);
+ if (vq->driver_event == NULL ||
+ len != sizeof(struct vring_packed_desc_event)) {
+ RTE_LOG(DEBUG, VHOST_CONFIG,
+ "(%d) failed to find driver area address.\n",
+ dev->vid);
+ return dev;
+ }
+
+ len = sizeof(struct vring_packed_desc_event);
+ vq->device_event = (struct vring_packed_desc_event *)
+ (uintptr_t)ring_addr_to_vva(dev,
+ vq, addr->used_user_addr, &len);
+ if (vq->device_event == NULL ||
+ len != sizeof(struct vring_packed_desc_event)) {
+ RTE_LOG(DEBUG, VHOST_CONFIG,
+ "(%d) failed to find device area address.\n",
+ dev->vid);
+ return dev;
+ }
+
+ return dev;
+ }
+
/* The addresses are converted from QEMU virtual to Vhost virtual. */
if (vq->desc && vq->avail && vq->used)
return dev;
@@ -751,6 +813,11 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
dev->mem = NULL;
}
+ /* Flush IOTLB cache as previous HVAs are now invalid */
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ for (i = 0; i < dev->nr_vring; i++)
+ vhost_user_iotlb_flush_all(dev->virtqueue[i]);
+
dev->nr_guest_pages = 0;
if (!dev->guest_pages) {
dev->max_guest_pages = 8;
@@ -885,10 +952,20 @@ err_mmap:
return -1;
}
-static int
-vq_is_ready(struct vhost_virtqueue *vq)
+static bool
+vq_is_ready(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
- return vq && vq->desc && vq->avail && vq->used &&
+ bool rings_ok;
+
+ if (!vq)
+ return false;
+
+ if (vq_is_packed(dev))
+ rings_ok = !!vq->desc_packed;
+ else
+ rings_ok = vq->desc && vq->avail && vq->used;
+
+ return rings_ok &&
vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD &&
vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD;
}
@@ -905,7 +982,7 @@ virtio_is_ready(struct virtio_net *dev)
for (i = 0; i < dev->nr_vring; i++) {
vq = dev->virtqueue[i];
- if (!vq_is_ready(vq))
+ if (!vq_is_ready(dev, vq))
return 0;
}
@@ -996,18 +1073,9 @@ vhost_user_get_vring_base(struct virtio_net *dev,
VhostUserMsg *msg)
{
struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
- struct rte_vdpa_device *vdpa_dev;
- int did = -1;
/* We have to stop the queue (virtio) if it is running. */
- if (dev->flags & VIRTIO_DEV_RUNNING) {
- did = dev->vdpa_dev_id;
- vdpa_dev = rte_vdpa_get_device(did);
- if (vdpa_dev && vdpa_dev->ops->dev_close)
- vdpa_dev->ops->dev_close(dev->vid);
- dev->flags &= ~VIRTIO_DEV_RUNNING;
- dev->notify_ops->destroy_device(dev->vid);
- }
+ vhost_destroy_device_notify(dev);
dev->flags &= ~VIRTIO_DEV_READY;
dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED;
@@ -1035,8 +1103,13 @@ vhost_user_get_vring_base(struct virtio_net *dev,
if (dev->dequeue_zero_copy)
free_zmbufs(vq);
- rte_free(vq->shadow_used_ring);
- vq->shadow_used_ring = NULL;
+ if (vq_is_packed(dev)) {
+ rte_free(vq->shadow_used_packed);
+ vq->shadow_used_packed = NULL;
+ } else {
+ rte_free(vq->shadow_used_split);
+ vq->shadow_used_split = NULL;
+ }
rte_free(vq->batch_copy_elems);
vq->batch_copy_elems = NULL;
@@ -1384,6 +1457,22 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
return send_vhost_message(sockfd, msg, NULL, 0);
}
+static int
+send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
+ int *fds, int fd_num)
+{
+ int ret;
+
+ if (msg->flags & VHOST_USER_NEED_REPLY)
+ rte_spinlock_lock(&dev->slave_req_lock);
+
+ ret = send_vhost_message(dev->slave_req_fd, msg, fds, fd_num);
+ if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY))
+ rte_spinlock_unlock(&dev->slave_req_lock);
+
+ return ret;
+}
+
/*
* Allocate a queue pair if it hasn't been allocated yet
*/
@@ -1705,11 +1794,45 @@ skip_to_reply:
if (vdpa_dev->ops->dev_conf)
vdpa_dev->ops->dev_conf(dev->vid);
dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED;
+ if (vhost_user_host_notifier_ctrl(dev->vid, true) != 0) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "(%d) software relay is used for vDPA, performance may be low.\n",
+ dev->vid);
+ }
}
return 0;
}
+static int process_slave_message_reply(struct virtio_net *dev,
+ const VhostUserMsg *msg)
+{
+ VhostUserMsg msg_reply;
+ int ret;
+
+ if ((msg->flags & VHOST_USER_NEED_REPLY) == 0)
+ return 0;
+
+ if (read_vhost_message(dev->slave_req_fd, &msg_reply) < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (msg_reply.request.slave != msg->request.slave) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Received unexpected msg type (%u), expected %u\n",
+ msg_reply.request.slave, msg->request.slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = msg_reply.payload.u64 ? -1 : 0;
+
+out:
+ rte_spinlock_unlock(&dev->slave_req_lock);
+ return ret;
+}
+
int
vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
{
@@ -1735,3 +1858,101 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
return 0;
}
+
+static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
+ int index, int fd,
+ uint64_t offset,
+ uint64_t size)
+{
+ int *fdp = NULL;
+ size_t fd_num = 0;
+ int ret;
+ struct VhostUserMsg msg = {
+ .request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
+ .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY,
+ .size = sizeof(msg.payload.area),
+ .payload.area = {
+ .u64 = index & VHOST_USER_VRING_IDX_MASK,
+ .size = size,
+ .offset = offset,
+ },
+ };
+
+ if (fd < 0)
+ msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
+ else {
+ fdp = &fd;
+ fd_num = 1;
+ }
+
+ ret = send_vhost_slave_message(dev, &msg, fdp, fd_num);
+ if (ret < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to set host notifier (%d)\n", ret);
+ return ret;
+ }
+
+ return process_slave_message_reply(dev, &msg);
+}
+
+int vhost_user_host_notifier_ctrl(int vid, bool enable)
+{
+ struct virtio_net *dev;
+ struct rte_vdpa_device *vdpa_dev;
+ int vfio_device_fd, did, ret = 0;
+ uint64_t offset, size;
+ unsigned int i;
+
+ dev = get_device(vid);
+ if (!dev)
+ return -ENODEV;
+
+ did = dev->vdpa_dev_id;
+ if (did < 0)
+ return -EINVAL;
+
+ if (!(dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
+ !(dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES)) ||
+ !(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ)) ||
+ !(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD)) ||
+ !(dev->protocol_features &
+ (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER)))
+ return -ENOTSUP;
+
+ vdpa_dev = rte_vdpa_get_device(did);
+ if (!vdpa_dev)
+ return -ENODEV;
+
+ RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_vfio_device_fd, -ENOTSUP);
+ RTE_FUNC_PTR_OR_ERR_RET(vdpa_dev->ops->get_notify_area, -ENOTSUP);
+
+ vfio_device_fd = vdpa_dev->ops->get_vfio_device_fd(vid);
+ if (vfio_device_fd < 0)
+ return -ENOTSUP;
+
+ if (enable) {
+ for (i = 0; i < dev->nr_vring; i++) {
+ if (vdpa_dev->ops->get_notify_area(vid, i, &offset,
+ &size) < 0) {
+ ret = -ENOTSUP;
+ goto disable;
+ }
+
+ if (vhost_user_slave_set_vring_host_notifier(dev, i,
+ vfio_device_fd, offset, size) < 0) {
+ ret = -EFAULT;
+ goto disable;
+ }
+ }
+ } else {
+disable:
+ for (i = 0; i < dev->nr_vring; i++) {
+ vhost_user_slave_set_vring_host_notifier(dev, i, -1,
+ 0, 0);
+ }
+ }
+
+ return ret;
+}
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index 1ad5cf46..42166adf 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -20,7 +20,9 @@
(1ULL << VHOST_USER_PROTOCOL_F_REPLY_ACK) | \
(1ULL << VHOST_USER_PROTOCOL_F_NET_MTU) | \
(1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
- (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION))
+ (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
@@ -54,6 +56,7 @@ typedef enum VhostUserRequest {
typedef enum VhostUserSlaveRequest {
VHOST_USER_SLAVE_NONE = 0,
VHOST_USER_SLAVE_IOTLB_MSG = 1,
+ VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
VHOST_USER_SLAVE_MAX
} VhostUserSlaveRequest;
@@ -99,6 +102,12 @@ typedef struct VhostUserCryptoSessionParam {
uint8_t auth_key_buf[VHOST_USER_CRYPTO_MAX_HMAC_KEY_LENGTH];
} VhostUserCryptoSessionParam;
+typedef struct VhostUserVringArea {
+ uint64_t u64;
+ uint64_t size;
+ uint64_t offset;
+} VhostUserVringArea;
+
typedef struct VhostUserMsg {
union {
uint32_t master; /* a VhostUserRequest value */
@@ -120,6 +129,7 @@ typedef struct VhostUserMsg {
VhostUserLog log;
struct vhost_iotlb_msg iotlb;
VhostUserCryptoSessionParam crypto_session;
+ VhostUserVringArea area;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
} __attribute((packed)) VhostUserMsg;
@@ -133,6 +143,7 @@ typedef struct VhostUserMsg {
/* vhost_user.c */
int vhost_user_msg_handler(int vid, int fd);
int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
+int vhost_user_host_notifier_ctrl(int vid, bool enable);
/* socket.c */
int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 76ec5f08..99c7afc8 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -25,22 +25,27 @@
#define MAX_BATCH_LEN 256
+static __rte_always_inline bool
+rxvq_is_mergeable(struct virtio_net *dev)
+{
+ return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
+}
+
static bool
is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
{
return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
}
-static __rte_always_inline struct vring_desc *
+static __rte_always_inline void *
alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct vring_desc *desc)
+ uint64_t desc_addr, uint64_t desc_len)
{
- struct vring_desc *idesc;
+ void *idesc;
uint64_t src, dst;
- uint64_t len, remain = desc->len;
- uint64_t desc_addr = desc->addr;
+ uint64_t len, remain = desc_len;
- idesc = rte_malloc(__func__, desc->len, 0);
+ idesc = rte_malloc(__func__, desc_len, 0);
if (unlikely(!idesc))
return 0;
@@ -66,17 +71,18 @@ alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
static __rte_always_inline void
-free_ind_table(struct vring_desc *idesc)
+free_ind_table(void *idesc)
{
rte_free(idesc);
}
static __rte_always_inline void
-do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint16_t to, uint16_t from, uint16_t size)
+do_flush_shadow_used_ring_split(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ uint16_t to, uint16_t from, uint16_t size)
{
rte_memcpy(&vq->used->ring[to],
- &vq->shadow_used_ring[from],
+ &vq->shadow_used_split[from],
size * sizeof(struct vring_used_elem));
vhost_log_cache_used_vring(dev, vq,
offsetof(struct vring_used, ring[to]),
@@ -84,22 +90,22 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
static __rte_always_inline void
-flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
+flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
{
uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
if (used_idx + vq->shadow_used_idx <= vq->size) {
- do_flush_shadow_used_ring(dev, vq, used_idx, 0,
+ do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
vq->shadow_used_idx);
} else {
uint16_t size;
/* update used ring interval [used_idx, vq->size] */
size = vq->size - used_idx;
- do_flush_shadow_used_ring(dev, vq, used_idx, 0, size);
+ do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
/* update the left half used ring interval [0, left_size] */
- do_flush_shadow_used_ring(dev, vq, 0, size,
+ do_flush_shadow_used_ring_split(dev, vq, 0, size,
vq->shadow_used_idx - size);
}
vq->last_used_idx += vq->shadow_used_idx;
@@ -109,18 +115,84 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
vhost_log_cache_sync(dev, vq);
*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
+ vq->shadow_used_idx = 0;
vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
sizeof(vq->used->idx));
}
static __rte_always_inline void
-update_shadow_used_ring(struct vhost_virtqueue *vq,
+update_shadow_used_ring_split(struct vhost_virtqueue *vq,
uint16_t desc_idx, uint16_t len)
{
uint16_t i = vq->shadow_used_idx++;
- vq->shadow_used_ring[i].id = desc_idx;
- vq->shadow_used_ring[i].len = len;
+ vq->shadow_used_split[i].id = desc_idx;
+ vq->shadow_used_split[i].len = len;
+}
+
+static __rte_always_inline void
+flush_shadow_used_ring_packed(struct virtio_net *dev,
+ struct vhost_virtqueue *vq)
+{
+ int i;
+ uint16_t used_idx = vq->last_used_idx;
+
+ /* Split loop in two to save memory barriers */
+ for (i = 0; i < vq->shadow_used_idx; i++) {
+ vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
+ vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
+
+ used_idx += vq->shadow_used_packed[i].count;
+ if (used_idx >= vq->size)
+ used_idx -= vq->size;
+ }
+
+ rte_smp_wmb();
+
+ for (i = 0; i < vq->shadow_used_idx; i++) {
+ uint16_t flags;
+
+ if (vq->shadow_used_packed[i].len)
+ flags = VRING_DESC_F_WRITE;
+ else
+ flags = 0;
+
+ if (vq->used_wrap_counter) {
+ flags |= VRING_DESC_F_USED;
+ flags |= VRING_DESC_F_AVAIL;
+ } else {
+ flags &= ~VRING_DESC_F_USED;
+ flags &= ~VRING_DESC_F_AVAIL;
+ }
+
+ vq->desc_packed[vq->last_used_idx].flags = flags;
+
+ vhost_log_cache_used_vring(dev, vq,
+ vq->last_used_idx *
+ sizeof(struct vring_packed_desc),
+ sizeof(struct vring_packed_desc));
+
+ vq->last_used_idx += vq->shadow_used_packed[i].count;
+ if (vq->last_used_idx >= vq->size) {
+ vq->used_wrap_counter ^= 1;
+ vq->last_used_idx -= vq->size;
+ }
+ }
+
+ rte_smp_wmb();
+ vq->shadow_used_idx = 0;
+ vhost_log_cache_sync(dev, vq);
+}
+
+static __rte_always_inline void
+update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
+ uint16_t desc_idx, uint16_t len, uint16_t count)
+{
+ uint16_t i = vq->shadow_used_idx++;
+
+ vq->shadow_used_packed[i].id = desc_idx;
+ vq->shadow_used_packed[i].len = len;
+ vq->shadow_used_packed[i].count = count;
}
static inline void
@@ -135,6 +207,8 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);
PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
}
+
+ vq->batch_copy_nb_elems = 0;
}
static inline void
@@ -146,6 +220,8 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
for (i = 0; i < count; i++)
rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
+
+ vq->batch_copy_nb_elems = 0;
}
/* avoid write operation when necessary, to lessen cache issues */
@@ -154,7 +230,7 @@ do_data_copy_dequeue(struct vhost_virtqueue *vq)
(var) = (val); \
} while (0)
-static void
+static __rte_always_inline void
virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
{
uint64_t csum_l4 = m_buf->ol_flags & PKT_TX_L4_MASK;
@@ -216,324 +292,47 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
}
static __rte_always_inline int
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct vring_desc *descs, struct rte_mbuf *m,
- uint16_t desc_idx, uint32_t size)
+map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct buf_vector *buf_vec, uint16_t *vec_idx,
+ uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
{
- uint32_t desc_avail, desc_offset;
- uint32_t mbuf_avail, mbuf_offset;
- uint32_t cpy_len;
- uint64_t desc_chunck_len;
- struct vring_desc *desc;
- uint64_t desc_addr, desc_gaddr;
- /* A counter to avoid desc dead loop chain */
- uint16_t nr_desc = 1;
- struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
- uint16_t copy_nb = vq->batch_copy_nb_elems;
- int error = 0;
+ uint16_t vec_id = *vec_idx;
- desc = &descs[desc_idx];
- desc_chunck_len = desc->len;
- desc_gaddr = desc->addr;
- desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr,
- &desc_chunck_len, VHOST_ACCESS_RW);
- /*
- * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
- * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
- * otherwise stores offset on the stack instead of in a register.
- */
- if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr) {
- error = -1;
- goto out;
- }
+ while (desc_len) {
+ uint64_t desc_addr;
+ uint64_t desc_chunck_len = desc_len;
- rte_prefetch0((void *)(uintptr_t)desc_addr);
-
- if (likely(desc_chunck_len >= dev->vhost_hlen)) {
- virtio_enqueue_offload(m,
- (struct virtio_net_hdr *)(uintptr_t)desc_addr);
- PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
- vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen);
- } else {
- struct virtio_net_hdr vnet_hdr;
- uint64_t remain = dev->vhost_hlen;
- uint64_t len;
- uint64_t src = (uint64_t)(uintptr_t)&vnet_hdr, dst;
- uint64_t guest_addr = desc_gaddr;
-
- virtio_enqueue_offload(m, &vnet_hdr);
-
- while (remain) {
- len = remain;
- dst = vhost_iova_to_vva(dev, vq, guest_addr,
- &len, VHOST_ACCESS_RW);
- if (unlikely(!dst || !len)) {
- error = -1;
- goto out;
- }
-
- rte_memcpy((void *)(uintptr_t)dst,
- (void *)(uintptr_t)src, len);
-
- PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0);
- vhost_log_cache_write(dev, vq, guest_addr, len);
- remain -= len;
- guest_addr += len;
- src += len;
- }
- }
+ if (unlikely(vec_id >= BUF_VECTOR_MAX))
+ return -1;
- desc_avail = desc->len - dev->vhost_hlen;
- if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
- desc_chunck_len = desc_avail;
- desc_gaddr = desc->addr + dev->vhost_hlen;
- desc_addr = vhost_iova_to_vva(dev,
- vq, desc_gaddr,
+ desc_addr = vhost_iova_to_vva(dev, vq,
+ desc_iova,
&desc_chunck_len,
- VHOST_ACCESS_RW);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
-
- desc_offset = 0;
- } else {
- desc_offset = dev->vhost_hlen;
- desc_chunck_len -= dev->vhost_hlen;
- }
-
- mbuf_avail = rte_pktmbuf_data_len(m);
- mbuf_offset = 0;
- while (mbuf_avail != 0 || m->next != NULL) {
- /* done with current mbuf, fetch next */
- if (mbuf_avail == 0) {
- m = m->next;
-
- mbuf_offset = 0;
- mbuf_avail = rte_pktmbuf_data_len(m);
- }
-
- /* done with current desc buf, fetch next */
- if (desc_avail == 0) {
- if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
- /* Room in vring buffer is not enough */
- error = -1;
- goto out;
- }
- if (unlikely(desc->next >= size || ++nr_desc > size)) {
- error = -1;
- goto out;
- }
-
- desc = &descs[desc->next];
- desc_chunck_len = desc->len;
- desc_gaddr = desc->addr;
- desc_addr = vhost_iova_to_vva(dev, vq, desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RW);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
-
- desc_offset = 0;
- desc_avail = desc->len;
- } else if (unlikely(desc_chunck_len == 0)) {
- desc_chunck_len = desc_avail;
- desc_gaddr += desc_offset;
- desc_addr = vhost_iova_to_vva(dev,
- vq, desc_gaddr,
- &desc_chunck_len, VHOST_ACCESS_RW);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
- desc_offset = 0;
- }
-
- cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
- if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
- rte_memcpy((void *)((uintptr_t)(desc_addr +
- desc_offset)),
- rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
- cpy_len);
- vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
- cpy_len);
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
- cpy_len, 0);
- } else {
- batch_copy[copy_nb].dst =
- (void *)((uintptr_t)(desc_addr + desc_offset));
- batch_copy[copy_nb].src =
- rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
- batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset;
- batch_copy[copy_nb].len = cpy_len;
- copy_nb++;
- }
-
- mbuf_avail -= cpy_len;
- mbuf_offset += cpy_len;
- desc_avail -= cpy_len;
- desc_offset += cpy_len;
- desc_chunck_len -= cpy_len;
- }
-
-out:
- vq->batch_copy_nb_elems = copy_nb;
-
- return error;
-}
-
-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are successfully
- * added to the RX queue. This function works when the mbuf is scattered, but
- * it doesn't support the mergeable feature.
- */
-static __rte_always_inline uint32_t
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
- struct rte_mbuf **pkts, uint32_t count)
-{
- struct vhost_virtqueue *vq;
- uint16_t avail_idx, free_entries, start_idx;
- uint16_t desc_indexes[MAX_PKT_BURST];
- struct vring_desc *descs;
- uint16_t used_idx;
- uint32_t i, sz;
-
- VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
- if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
- RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
- dev->vid, __func__, queue_id);
- return 0;
- }
-
- vq = dev->virtqueue[queue_id];
-
- rte_spinlock_lock(&vq->access_lock);
-
- if (unlikely(vq->enabled == 0))
- goto out_access_unlock;
-
- if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
- vhost_user_iotlb_rd_lock(vq);
-
- if (unlikely(vq->access_ok == 0)) {
- if (unlikely(vring_translate(dev, vq) < 0)) {
- count = 0;
- goto out;
- }
- }
-
- avail_idx = *((volatile uint16_t *)&vq->avail->idx);
- start_idx = vq->last_used_idx;
- free_entries = avail_idx - start_idx;
- count = RTE_MIN(count, free_entries);
- count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
- if (count == 0)
- goto out;
-
- VHOST_LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
- dev->vid, start_idx, start_idx + count);
-
- vq->batch_copy_nb_elems = 0;
-
- /* Retrieve all of the desc indexes first to avoid caching issues. */
- rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
- for (i = 0; i < count; i++) {
- used_idx = (start_idx + i) & (vq->size - 1);
- desc_indexes[i] = vq->avail->ring[used_idx];
- vq->used->ring[used_idx].id = desc_indexes[i];
- vq->used->ring[used_idx].len = pkts[i]->pkt_len +
- dev->vhost_hlen;
- vhost_log_cache_used_vring(dev, vq,
- offsetof(struct vring_used, ring[used_idx]),
- sizeof(vq->used->ring[used_idx]));
- }
-
- rte_prefetch0(&vq->desc[desc_indexes[0]]);
- for (i = 0; i < count; i++) {
- struct vring_desc *idesc = NULL;
- uint16_t desc_idx = desc_indexes[i];
- int err;
-
- if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
- uint64_t dlen = vq->desc[desc_idx].len;
- descs = (struct vring_desc *)(uintptr_t)
- vhost_iova_to_vva(dev,
- vq, vq->desc[desc_idx].addr,
- &dlen, VHOST_ACCESS_RO);
- if (unlikely(!descs)) {
- count = i;
- break;
- }
-
- if (unlikely(dlen < vq->desc[desc_idx].len)) {
- /*
- * The indirect desc table is not contiguous
- * in process VA space, we have to copy it.
- */
- idesc = alloc_copy_ind_table(dev, vq,
- &vq->desc[desc_idx]);
- if (unlikely(!idesc))
- break;
-
- descs = idesc;
- }
-
- desc_idx = 0;
- sz = vq->desc[desc_idx].len / sizeof(*descs);
- } else {
- descs = vq->desc;
- sz = vq->size;
- }
-
- err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz);
- if (unlikely(err)) {
- count = i;
- free_ind_table(idesc);
- break;
- }
+ perm);
+ if (unlikely(!desc_addr))
+ return -1;
- if (i + 1 < count)
- rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+ buf_vec[vec_id].buf_iova = desc_iova;
+ buf_vec[vec_id].buf_addr = desc_addr;
+ buf_vec[vec_id].buf_len = desc_chunck_len;
- if (unlikely(!!idesc))
- free_ind_table(idesc);
+ desc_len -= desc_chunck_len;
+ desc_iova += desc_chunck_len;
+ vec_id++;
}
+ *vec_idx = vec_id;
- do_data_copy_enqueue(dev, vq);
-
- rte_smp_wmb();
-
- vhost_log_cache_sync(dev, vq);
-
- *(volatile uint16_t *)&vq->used->idx += count;
- vq->last_used_idx += count;
- vhost_log_used_vring(dev, vq,
- offsetof(struct vring_used, idx),
- sizeof(vq->used->idx));
-
- vhost_vring_call(dev, vq);
-out:
- if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
- vhost_user_iotlb_rd_unlock(vq);
-
-out_access_unlock:
- rte_spinlock_unlock(&vq->access_lock);
-
- return count;
+ return 0;
}
static __rte_always_inline int
-fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint32_t avail_idx, uint32_t *vec_idx,
+fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint32_t avail_idx, uint16_t *vec_idx,
struct buf_vector *buf_vec, uint16_t *desc_chain_head,
- uint16_t *desc_chain_len)
+ uint16_t *desc_chain_len, uint8_t perm)
{
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
- uint32_t vec_id = *vec_idx;
+ uint16_t vec_id = *vec_idx;
uint32_t len = 0;
uint64_t dlen;
struct vring_desc *descs = vq->desc;
@@ -555,7 +354,8 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
* The indirect desc table is not contiguous
* in process VA space, we have to copy it.
*/
- idesc = alloc_copy_ind_table(dev, vq, &vq->desc[idx]);
+ idesc = alloc_copy_ind_table(dev, vq,
+ vq->desc[idx].addr, vq->desc[idx].len);
if (unlikely(!idesc))
return -1;
@@ -566,16 +366,19 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
}
while (1) {
- if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) {
+ if (unlikely(idx >= vq->size)) {
free_ind_table(idesc);
return -1;
}
len += descs[idx].len;
- buf_vec[vec_id].buf_addr = descs[idx].addr;
- buf_vec[vec_id].buf_len = descs[idx].len;
- buf_vec[vec_id].desc_idx = idx;
- vec_id++;
+
+ if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+ descs[idx].addr, descs[idx].len,
+ perm))) {
+ free_ind_table(idesc);
+ return -1;
+ }
if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
break;
@@ -596,13 +399,14 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
* Returns -1 on fail, 0 on success
*/
static inline int
-reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint32_t size, struct buf_vector *buf_vec,
- uint16_t *num_buffers, uint16_t avail_head)
+ uint16_t *num_buffers, uint16_t avail_head,
+ uint16_t *nr_vec)
{
uint16_t cur_idx;
- uint32_t vec_idx = 0;
- uint16_t tries = 0;
+ uint16_t vec_idx = 0;
+ uint16_t max_tries, tries = 0;
uint16_t head_idx = 0;
uint16_t len = 0;
@@ -610,49 +414,223 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
*num_buffers = 0;
cur_idx = vq->last_avail_idx;
+ if (rxvq_is_mergeable(dev))
+ max_tries = vq->size - 1;
+ else
+ max_tries = 1;
+
while (size > 0) {
if (unlikely(cur_idx == avail_head))
return -1;
+ /*
+ * if we tried all available ring items, and still
+ * can't get enough buf, it means something abnormal
+ * happened.
+ */
+ if (unlikely(++tries > max_tries))
+ return -1;
- if (unlikely(fill_vec_buf(dev, vq, cur_idx, &vec_idx, buf_vec,
- &head_idx, &len) < 0))
+ if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
+ &vec_idx, buf_vec,
+ &head_idx, &len,
+ VHOST_ACCESS_RW) < 0))
return -1;
len = RTE_MIN(len, size);
- update_shadow_used_ring(vq, head_idx, len);
+ update_shadow_used_ring_split(vq, head_idx, len);
size -= len;
cur_idx++;
- tries++;
*num_buffers += 1;
+ }
+
+ *nr_vec = vec_idx;
+
+ return 0;
+}
+
+static __rte_always_inline int
+fill_vec_buf_packed_indirect(struct virtio_net *dev,
+ struct vhost_virtqueue *vq,
+ struct vring_packed_desc *desc, uint16_t *vec_idx,
+ struct buf_vector *buf_vec, uint16_t *len, uint8_t perm)
+{
+ uint16_t i;
+ uint32_t nr_descs;
+ uint16_t vec_id = *vec_idx;
+ uint64_t dlen;
+ struct vring_packed_desc *descs, *idescs = NULL;
+
+ dlen = desc->len;
+ descs = (struct vring_packed_desc *)(uintptr_t)
+ vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
+ if (unlikely(!descs))
+ return -1;
+
+ if (unlikely(dlen < desc->len)) {
+ /*
+ * The indirect desc table is not contiguous
+ * in process VA space, we have to copy it.
+ */
+ idescs = alloc_copy_ind_table(dev, vq, desc->addr, desc->len);
+ if (unlikely(!idescs))
+ return -1;
+
+ descs = idescs;
+ }
+
+ nr_descs = desc->len / sizeof(struct vring_packed_desc);
+ if (unlikely(nr_descs >= vq->size)) {
+ free_ind_table(idescs);
+ return -1;
+ }
+
+ for (i = 0; i < nr_descs; i++) {
+ if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
+ free_ind_table(idescs);
+ return -1;
+ }
+
+ *len += descs[i].len;
+ if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+ descs[i].addr, descs[i].len,
+ perm)))
+ return -1;
+ }
+ *vec_idx = vec_id;
+
+ if (unlikely(!!idescs))
+ free_ind_table(idescs);
+
+ return 0;
+}
+
+static __rte_always_inline int
+fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint16_t avail_idx, uint16_t *desc_count,
+ struct buf_vector *buf_vec, uint16_t *vec_idx,
+ uint16_t *buf_id, uint16_t *len, uint8_t perm)
+{
+ bool wrap_counter = vq->avail_wrap_counter;
+ struct vring_packed_desc *descs = vq->desc_packed;
+ uint16_t vec_id = *vec_idx;
+
+ if (avail_idx < vq->last_avail_idx)
+ wrap_counter ^= 1;
+
+ if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
+ return -1;
+
+ *desc_count = 0;
+
+ while (1) {
+ if (unlikely(vec_id >= BUF_VECTOR_MAX))
+ return -1;
+
+ *desc_count += 1;
+ *buf_id = descs[avail_idx].id;
+
+ if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
+ if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
+ &descs[avail_idx],
+ &vec_id, buf_vec,
+ len, perm) < 0))
+ return -1;
+ } else {
+ *len += descs[avail_idx].len;
+
+ if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
+ descs[avail_idx].addr,
+ descs[avail_idx].len,
+ perm)))
+ return -1;
+ }
+
+ if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
+ break;
+
+ if (++avail_idx >= vq->size) {
+ avail_idx -= vq->size;
+ wrap_counter ^= 1;
+ }
+ }
+
+ *vec_idx = vec_id;
+
+ return 0;
+}
+
+/*
+ * Returns -1 on fail, 0 on success
+ */
+static inline int
+reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint32_t size, struct buf_vector *buf_vec,
+ uint16_t *nr_vec, uint16_t *num_buffers,
+ uint16_t *nr_descs)
+{
+ uint16_t avail_idx;
+ uint16_t vec_idx = 0;
+ uint16_t max_tries, tries = 0;
+
+ uint16_t buf_id = 0;
+ uint16_t len = 0;
+ uint16_t desc_count;
+
+ *num_buffers = 0;
+ avail_idx = vq->last_avail_idx;
+ if (rxvq_is_mergeable(dev))
+ max_tries = vq->size - 1;
+ else
+ max_tries = 1;
+
+ while (size > 0) {
/*
* if we tried all available ring items, and still
* can't get enough buf, it means something abnormal
* happened.
*/
- if (unlikely(tries >= vq->size))
+ if (unlikely(++tries > max_tries))
return -1;
+
+ if (unlikely(fill_vec_buf_packed(dev, vq,
+ avail_idx, &desc_count,
+ buf_vec, &vec_idx,
+ &buf_id, &len,
+ VHOST_ACCESS_RO) < 0))
+ return -1;
+
+ len = RTE_MIN(len, size);
+ update_shadow_used_ring_packed(vq, buf_id, len, desc_count);
+ size -= len;
+
+ avail_idx += desc_count;
+ if (avail_idx >= vq->size)
+ avail_idx -= vq->size;
+
+ *nr_descs += desc_count;
+ *num_buffers += 1;
}
+ *nr_vec = vec_idx;
+
return 0;
}
static __rte_always_inline int
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
+copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf *m, struct buf_vector *buf_vec,
- uint16_t num_buffers)
+ uint16_t nr_vec, uint16_t num_buffers)
{
uint32_t vec_idx = 0;
- uint64_t desc_addr, desc_gaddr;
uint32_t mbuf_offset, mbuf_avail;
- uint32_t desc_offset, desc_avail;
+ uint32_t buf_offset, buf_avail;
+ uint64_t buf_addr, buf_iova, buf_len;
uint32_t cpy_len;
- uint64_t desc_chunck_len;
- uint64_t hdr_addr, hdr_phys_addr;
+ uint64_t hdr_addr;
struct rte_mbuf *hdr_mbuf;
struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
- uint16_t copy_nb = vq->batch_copy_nb_elems;
int error = 0;
if (unlikely(m == NULL)) {
@@ -660,82 +638,61 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
goto out;
}
- desc_chunck_len = buf_vec[vec_idx].buf_len;
- desc_gaddr = buf_vec[vec_idx].buf_addr;
- desc_addr = vhost_iova_to_vva(dev, vq,
- desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RW);
- if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr) {
+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
+
+ if (nr_vec > 1)
+ rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
+ if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
error = -1;
goto out;
}
hdr_mbuf = m;
- hdr_addr = desc_addr;
- if (unlikely(desc_chunck_len < dev->vhost_hlen))
+ hdr_addr = buf_addr;
+ if (unlikely(buf_len < dev->vhost_hlen))
hdr = &tmp_hdr;
else
hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
- hdr_phys_addr = desc_gaddr;
- rte_prefetch0((void *)(uintptr_t)hdr_addr);
VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
dev->vid, num_buffers);
- desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
- if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
- desc_chunck_len = desc_avail;
- desc_gaddr += dev->vhost_hlen;
- desc_addr = vhost_iova_to_vva(dev, vq,
- desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RW);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
-
- desc_offset = 0;
+ if (unlikely(buf_len < dev->vhost_hlen)) {
+ buf_offset = dev->vhost_hlen - buf_len;
+ vec_idx++;
+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
+ buf_avail = buf_len - buf_offset;
} else {
- desc_offset = dev->vhost_hlen;
- desc_chunck_len -= dev->vhost_hlen;
+ buf_offset = dev->vhost_hlen;
+ buf_avail = buf_len - dev->vhost_hlen;
}
-
mbuf_avail = rte_pktmbuf_data_len(m);
mbuf_offset = 0;
while (mbuf_avail != 0 || m->next != NULL) {
- /* done with current desc buf, get the next one */
- if (desc_avail == 0) {
+ /* done with current buf, get the next one */
+ if (buf_avail == 0) {
vec_idx++;
- desc_chunck_len = buf_vec[vec_idx].buf_len;
- desc_gaddr = buf_vec[vec_idx].buf_addr;
- desc_addr =
- vhost_iova_to_vva(dev, vq,
- desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RW);
- if (unlikely(!desc_addr)) {
+ if (unlikely(vec_idx >= nr_vec)) {
error = -1;
goto out;
}
- /* Prefetch buffer address. */
- rte_prefetch0((void *)(uintptr_t)desc_addr);
- desc_offset = 0;
- desc_avail = buf_vec[vec_idx].buf_len;
- } else if (unlikely(desc_chunck_len == 0)) {
- desc_chunck_len = desc_avail;
- desc_gaddr += desc_offset;
- desc_addr = vhost_iova_to_vva(dev, vq,
- desc_gaddr,
- &desc_chunck_len, VHOST_ACCESS_RW);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
- desc_offset = 0;
+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
+
+ /* Prefetch next buffer address. */
+ if (vec_idx + 1 < nr_vec)
+ rte_prefetch0((void *)(uintptr_t)
+ buf_vec[vec_idx + 1].buf_addr);
+ buf_offset = 0;
+ buf_avail = buf_len;
}
/* done with current mbuf, get the next one */
@@ -748,24 +705,21 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
if (hdr_addr) {
virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
- ASSIGN_UNLESS_EQUAL(hdr->num_buffers, num_buffers);
+ if (rxvq_is_mergeable(dev))
+ ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
+ num_buffers);
if (unlikely(hdr == &tmp_hdr)) {
uint64_t len;
uint64_t remain = dev->vhost_hlen;
uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
- uint64_t guest_addr = hdr_phys_addr;
+ uint64_t iova = buf_vec[0].buf_iova;
+ uint16_t hdr_vec_idx = 0;
while (remain) {
- len = remain;
- dst = vhost_iova_to_vva(dev, vq,
- guest_addr, &len,
- VHOST_ACCESS_RW);
- if (unlikely(!dst || !len)) {
- error = -1;
- goto out;
- }
-
+ len = RTE_MIN(remain,
+ buf_vec[hdr_vec_idx].buf_len);
+ dst = buf_vec[hdr_vec_idx].buf_addr;
rte_memcpy((void *)(uintptr_t)dst,
(void *)(uintptr_t)src,
len);
@@ -773,103 +727,125 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
PRINT_PACKET(dev, (uintptr_t)dst,
(uint32_t)len, 0);
vhost_log_cache_write(dev, vq,
- guest_addr, len);
+ iova, len);
remain -= len;
- guest_addr += len;
+ iova += len;
src += len;
+ hdr_vec_idx++;
}
} else {
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
- vhost_log_cache_write(dev, vq, hdr_phys_addr,
+ vhost_log_cache_write(dev, vq,
+ buf_vec[0].buf_iova,
dev->vhost_hlen);
}
hdr_addr = 0;
}
- cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
+ cpy_len = RTE_MIN(buf_avail, mbuf_avail);
- if (likely(cpy_len > MAX_BATCH_LEN || copy_nb >= vq->size)) {
- rte_memcpy((void *)((uintptr_t)(desc_addr +
- desc_offset)),
+ if (likely(cpy_len > MAX_BATCH_LEN ||
+ vq->batch_copy_nb_elems >= vq->size)) {
+ rte_memcpy((void *)((uintptr_t)(buf_addr + buf_offset)),
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
cpy_len);
- vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
+ vhost_log_cache_write(dev, vq, buf_iova + buf_offset,
cpy_len);
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+ PRINT_PACKET(dev, (uintptr_t)(buf_addr + buf_offset),
cpy_len, 0);
} else {
- batch_copy[copy_nb].dst =
- (void *)((uintptr_t)(desc_addr + desc_offset));
- batch_copy[copy_nb].src =
+ batch_copy[vq->batch_copy_nb_elems].dst =
+ (void *)((uintptr_t)(buf_addr + buf_offset));
+ batch_copy[vq->batch_copy_nb_elems].src =
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
- batch_copy[copy_nb].log_addr = desc_gaddr + desc_offset;
- batch_copy[copy_nb].len = cpy_len;
- copy_nb++;
+ batch_copy[vq->batch_copy_nb_elems].log_addr =
+ buf_iova + buf_offset;
+ batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
+ vq->batch_copy_nb_elems++;
}
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
- desc_avail -= cpy_len;
- desc_offset += cpy_len;
- desc_chunck_len -= cpy_len;
+ buf_avail -= cpy_len;
+ buf_offset += cpy_len;
}
out:
- vq->batch_copy_nb_elems = copy_nb;
return error;
}
static __rte_always_inline uint32_t
-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
+virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mbuf **pkts, uint32_t count)
{
- struct vhost_virtqueue *vq;
uint32_t pkt_idx = 0;
uint16_t num_buffers;
struct buf_vector buf_vec[BUF_VECTOR_MAX];
uint16_t avail_head;
- VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
- if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
- RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
- dev->vid, __func__, queue_id);
- return 0;
- }
+ rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+ avail_head = *((volatile uint16_t *)&vq->avail->idx);
- vq = dev->virtqueue[queue_id];
+ for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
+ uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+ uint16_t nr_vec = 0;
- rte_spinlock_lock(&vq->access_lock);
+ if (unlikely(reserve_avail_buf_split(dev, vq,
+ pkt_len, buf_vec, &num_buffers,
+ avail_head, &nr_vec) < 0)) {
+ VHOST_LOG_DEBUG(VHOST_DATA,
+ "(%d) failed to get enough desc from vring\n",
+ dev->vid);
+ vq->shadow_used_idx -= num_buffers;
+ break;
+ }
- if (unlikely(vq->enabled == 0))
- goto out_access_unlock;
+ rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
- if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
- vhost_user_iotlb_rd_lock(vq);
+ VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
+ dev->vid, vq->last_avail_idx,
+ vq->last_avail_idx + num_buffers);
- if (unlikely(vq->access_ok == 0))
- if (unlikely(vring_translate(dev, vq) < 0))
- goto out;
+ if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+ buf_vec, nr_vec,
+ num_buffers) < 0) {
+ vq->shadow_used_idx -= num_buffers;
+ break;
+ }
- count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
- if (count == 0)
- goto out;
+ vq->last_avail_idx += num_buffers;
+ }
- vq->batch_copy_nb_elems = 0;
+ do_data_copy_enqueue(dev, vq);
- rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
+ if (likely(vq->shadow_used_idx)) {
+ flush_shadow_used_ring_split(dev, vq);
+ vhost_vring_call_split(dev, vq);
+ }
+
+ return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mbuf **pkts, uint32_t count)
+{
+ uint32_t pkt_idx = 0;
+ uint16_t num_buffers;
+ struct buf_vector buf_vec[BUF_VECTOR_MAX];
- vq->shadow_used_idx = 0;
- avail_head = *((volatile uint16_t *)&vq->avail->idx);
for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
+ uint16_t nr_vec = 0;
+ uint16_t nr_descs = 0;
- if (unlikely(reserve_avail_buf_mergeable(dev, vq,
- pkt_len, buf_vec, &num_buffers,
- avail_head) < 0)) {
+ if (unlikely(reserve_avail_buf_packed(dev, vq,
+ pkt_len, buf_vec, &nr_vec,
+ &num_buffers, &nr_descs) < 0)) {
VHOST_LOG_DEBUG(VHOST_DATA,
"(%d) failed to get enough desc from vring\n",
dev->vid);
@@ -877,26 +853,72 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
break;
}
+ rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
dev->vid, vq->last_avail_idx,
vq->last_avail_idx + num_buffers);
- if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx],
- buf_vec, num_buffers) < 0) {
+ if (copy_mbuf_to_desc(dev, vq, pkts[pkt_idx],
+ buf_vec, nr_vec,
+ num_buffers) < 0) {
vq->shadow_used_idx -= num_buffers;
break;
}
- vq->last_avail_idx += num_buffers;
+ vq->last_avail_idx += nr_descs;
+ if (vq->last_avail_idx >= vq->size) {
+ vq->last_avail_idx -= vq->size;
+ vq->avail_wrap_counter ^= 1;
+ }
}
do_data_copy_enqueue(dev, vq);
if (likely(vq->shadow_used_idx)) {
- flush_shadow_used_ring(dev, vq);
- vhost_vring_call(dev, vq);
+ flush_shadow_used_ring_packed(dev, vq);
+ vhost_vring_call_packed(dev, vq);
}
+ return pkt_idx;
+}
+
+static __rte_always_inline uint32_t
+virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count)
+{
+ struct vhost_virtqueue *vq;
+
+ VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
+ if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
+ RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+ dev->vid, __func__, queue_id);
+ return 0;
+ }
+
+ vq = dev->virtqueue[queue_id];
+
+ rte_spinlock_lock(&vq->access_lock);
+
+ if (unlikely(vq->enabled == 0))
+ goto out_access_unlock;
+
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0))
+ if (unlikely(vring_translate(dev, vq) < 0))
+ goto out;
+
+ count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
+ if (count == 0)
+ goto out;
+
+ if (vq_is_packed(dev))
+ count = virtio_dev_rx_packed(dev, vq, pkts, count);
+ else
+ count = virtio_dev_rx_split(dev, vq, pkts, count);
+
out:
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
vhost_user_iotlb_rd_unlock(vq);
@@ -904,7 +926,7 @@ out:
out_access_unlock:
rte_spinlock_unlock(&vq->access_lock);
- return pkt_idx;
+ return count;
}
uint16_t
@@ -923,10 +945,7 @@ rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
return 0;
}
- if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
- return virtio_dev_merge_rx(dev, queue_id, pkts, count);
- else
- return virtio_dev_rx(dev, queue_id, pkts, count);
+ return virtio_dev_rx(dev, queue_id, pkts, count);
}
static inline bool
@@ -1051,76 +1070,60 @@ put_zmbuf(struct zcopy_mbuf *zmbuf)
static __rte_always_inline int
copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
- struct vring_desc *descs, uint16_t max_desc,
- struct rte_mbuf *m, uint16_t desc_idx,
- struct rte_mempool *mbuf_pool)
+ struct buf_vector *buf_vec, uint16_t nr_vec,
+ struct rte_mbuf *m, struct rte_mempool *mbuf_pool)
{
- struct vring_desc *desc;
- uint64_t desc_addr, desc_gaddr;
- uint32_t desc_avail, desc_offset;
+ uint32_t buf_avail, buf_offset;
+ uint64_t buf_addr, buf_iova, buf_len;
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
- uint64_t desc_chunck_len;
struct rte_mbuf *cur = m, *prev = m;
struct virtio_net_hdr tmp_hdr;
struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
- uint32_t nr_desc = 1;
+ uint16_t vec_idx = 0;
struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
- uint16_t copy_nb = vq->batch_copy_nb_elems;
int error = 0;
- desc = &descs[desc_idx];
- if (unlikely((desc->len < dev->vhost_hlen)) ||
- (desc->flags & VRING_DESC_F_INDIRECT)) {
- error = -1;
- goto out;
- }
+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
- desc_chunck_len = desc->len;
- desc_gaddr = desc->addr;
- desc_addr = vhost_iova_to_vva(dev,
- vq, desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RO);
- if (unlikely(!desc_addr)) {
+ if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1)) {
error = -1;
goto out;
}
+ if (likely(nr_vec > 1))
+ rte_prefetch0((void *)(uintptr_t)buf_vec[1].buf_addr);
+
if (virtio_net_with_host_offload(dev)) {
- if (unlikely(desc_chunck_len < sizeof(struct virtio_net_hdr))) {
- uint64_t len = desc_chunck_len;
+ if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
+ uint64_t len;
uint64_t remain = sizeof(struct virtio_net_hdr);
- uint64_t src = desc_addr;
+ uint64_t src;
uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
- uint64_t guest_addr = desc_gaddr;
+ uint16_t hdr_vec_idx = 0;
/*
* No luck, the virtio-net header doesn't fit
* in a contiguous virtual area.
*/
while (remain) {
- len = remain;
- src = vhost_iova_to_vva(dev, vq,
- guest_addr, &len,
- VHOST_ACCESS_RO);
- if (unlikely(!src || !len)) {
- error = -1;
- goto out;
- }
-
+ len = RTE_MIN(remain,
+ buf_vec[hdr_vec_idx].buf_len);
+ src = buf_vec[hdr_vec_idx].buf_addr;
rte_memcpy((void *)(uintptr_t)dst,
(void *)(uintptr_t)src, len);
- guest_addr += len;
remain -= len;
dst += len;
+ hdr_vec_idx++;
}
hdr = &tmp_hdr;
} else {
- hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+ hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
rte_prefetch0(hdr);
}
}
@@ -1130,61 +1133,40 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
* for Tx: the first for storing the header, and others
* for storing the data.
*/
- if (likely((desc->len == dev->vhost_hlen) &&
- (desc->flags & VRING_DESC_F_NEXT) != 0)) {
- desc = &descs[desc->next];
- if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
- error = -1;
+ if (unlikely(buf_len < dev->vhost_hlen)) {
+ buf_offset = dev->vhost_hlen - buf_len;
+ vec_idx++;
+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
+ buf_avail = buf_len - buf_offset;
+ } else if (buf_len == dev->vhost_hlen) {
+ if (unlikely(++vec_idx >= nr_vec))
goto out;
- }
-
- desc_chunck_len = desc->len;
- desc_gaddr = desc->addr;
- desc_addr = vhost_iova_to_vva(dev,
- vq, desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RO);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
- desc_offset = 0;
- desc_avail = desc->len;
- nr_desc += 1;
+ buf_offset = 0;
+ buf_avail = buf_len;
} else {
- desc_avail = desc->len - dev->vhost_hlen;
-
- if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
- desc_chunck_len = desc_avail;
- desc_gaddr += dev->vhost_hlen;
- desc_addr = vhost_iova_to_vva(dev,
- vq, desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RO);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
-
- desc_offset = 0;
- } else {
- desc_offset = dev->vhost_hlen;
- desc_chunck_len -= dev->vhost_hlen;
- }
+ buf_offset = dev->vhost_hlen;
+ buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
}
- rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+ rte_prefetch0((void *)(uintptr_t)
+ (buf_addr + buf_offset));
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
- (uint32_t)desc_chunck_len, 0);
+ PRINT_PACKET(dev,
+ (uintptr_t)(buf_addr + buf_offset),
+ (uint32_t)buf_avail, 0);
mbuf_offset = 0;
mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
while (1) {
uint64_t hpa;
- cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
+ cpy_len = RTE_MIN(buf_avail, mbuf_avail);
/*
* A desc buf might across two host physical pages that are
@@ -1192,11 +1174,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
* will be copied even though zero copy is enabled.
*/
if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
- desc_gaddr + desc_offset, cpy_len)))) {
+ buf_iova + buf_offset, cpy_len)))) {
cur->data_len = cpy_len;
cur->data_off = 0;
- cur->buf_addr = (void *)(uintptr_t)(desc_addr
- + desc_offset);
+ cur->buf_addr =
+ (void *)(uintptr_t)(buf_addr + buf_offset);
cur->buf_iova = hpa;
/*
@@ -1206,81 +1188,53 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
mbuf_avail = cpy_len;
} else {
if (likely(cpy_len > MAX_BATCH_LEN ||
- copy_nb >= vq->size ||
- (hdr && cur == m) ||
- desc->len != desc_chunck_len)) {
+ vq->batch_copy_nb_elems >= vq->size ||
+ (hdr && cur == m))) {
rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
mbuf_offset),
- (void *)((uintptr_t)(desc_addr +
- desc_offset)),
+ (void *)((uintptr_t)(buf_addr +
+ buf_offset)),
cpy_len);
} else {
- batch_copy[copy_nb].dst =
+ batch_copy[vq->batch_copy_nb_elems].dst =
rte_pktmbuf_mtod_offset(cur, void *,
mbuf_offset);
- batch_copy[copy_nb].src =
- (void *)((uintptr_t)(desc_addr +
- desc_offset));
- batch_copy[copy_nb].len = cpy_len;
- copy_nb++;
+ batch_copy[vq->batch_copy_nb_elems].src =
+ (void *)((uintptr_t)(buf_addr +
+ buf_offset));
+ batch_copy[vq->batch_copy_nb_elems].len =
+ cpy_len;
+ vq->batch_copy_nb_elems++;
}
}
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
- desc_avail -= cpy_len;
- desc_chunck_len -= cpy_len;
- desc_offset += cpy_len;
+ buf_avail -= cpy_len;
+ buf_offset += cpy_len;
- /* This desc reaches to its end, get the next one */
- if (desc_avail == 0) {
- if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+ /* This buf reaches to its end, get the next one */
+ if (buf_avail == 0) {
+ if (++vec_idx >= nr_vec)
break;
- if (unlikely(desc->next >= max_desc ||
- ++nr_desc > max_desc)) {
- error = -1;
- goto out;
- }
- desc = &descs[desc->next];
- if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
- error = -1;
- goto out;
- }
+ buf_addr = buf_vec[vec_idx].buf_addr;
+ buf_iova = buf_vec[vec_idx].buf_iova;
+ buf_len = buf_vec[vec_idx].buf_len;
- desc_chunck_len = desc->len;
- desc_gaddr = desc->addr;
- desc_addr = vhost_iova_to_vva(dev,
- vq, desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RO);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
+ /*
+ * Prefecth desc n + 1 buffer while
+ * desc n buffer is processed.
+ */
+ if (vec_idx + 1 < nr_vec)
+ rte_prefetch0((void *)(uintptr_t)
+ buf_vec[vec_idx + 1].buf_addr);
- rte_prefetch0((void *)(uintptr_t)desc_addr);
-
- desc_offset = 0;
- desc_avail = desc->len;
-
- PRINT_PACKET(dev, (uintptr_t)desc_addr,
- (uint32_t)desc_chunck_len, 0);
- } else if (unlikely(desc_chunck_len == 0)) {
- desc_chunck_len = desc_avail;
- desc_gaddr += desc_offset;
- desc_addr = vhost_iova_to_vva(dev, vq,
- desc_gaddr,
- &desc_chunck_len,
- VHOST_ACCESS_RO);
- if (unlikely(!desc_addr)) {
- error = -1;
- goto out;
- }
- desc_offset = 0;
+ buf_offset = 0;
+ buf_avail = buf_len;
- PRINT_PACKET(dev, (uintptr_t)desc_addr,
- (uint32_t)desc_chunck_len, 0);
+ PRINT_PACKET(dev, (uintptr_t)buf_addr,
+ (uint32_t)buf_avail, 0);
}
/*
@@ -1316,40 +1270,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
vhost_dequeue_offload(hdr, m);
out:
- vq->batch_copy_nb_elems = copy_nb;
return error;
}
-static __rte_always_inline void
-update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint32_t used_idx, uint32_t desc_idx)
-{
- vq->used->ring[used_idx].id = desc_idx;
- vq->used->ring[used_idx].len = 0;
- vhost_log_cache_used_vring(dev, vq,
- offsetof(struct vring_used, ring[used_idx]),
- sizeof(vq->used->ring[used_idx]));
-}
-
-static __rte_always_inline void
-update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
- uint32_t count)
-{
- if (unlikely(count == 0))
- return;
-
- rte_smp_wmb();
- rte_smp_rmb();
-
- vhost_log_cache_sync(dev, vq);
-
- vq->used->idx += count;
- vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
- sizeof(vq->used->idx));
- vhost_vring_call(dev, vq);
-}
-
static __rte_always_inline struct zcopy_mbuf *
get_zmbuf(struct vhost_virtqueue *vq)
{
@@ -1409,66 +1333,137 @@ restore_mbuf(struct rte_mbuf *m)
}
}
-uint16_t
-rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+static __rte_always_inline uint16_t
+virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
{
- struct virtio_net *dev;
- struct rte_mbuf *rarp_mbuf = NULL;
- struct vhost_virtqueue *vq;
- uint32_t desc_indexes[MAX_PKT_BURST];
- uint32_t used_idx;
- uint32_t i = 0;
+ uint16_t i;
uint16_t free_entries;
- uint16_t avail_idx;
- dev = get_device(vid);
- if (!dev)
- return 0;
+ if (unlikely(dev->dequeue_zero_copy)) {
+ struct zcopy_mbuf *zmbuf, *next;
- if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
- RTE_LOG(ERR, VHOST_DATA,
- "(%d) %s: built-in vhost net backend is disabled.\n",
- dev->vid, __func__);
- return 0;
- }
+ for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
+ zmbuf != NULL; zmbuf = next) {
+ next = TAILQ_NEXT(zmbuf, next);
- if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
- RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
- dev->vid, __func__, queue_id);
- return 0;
+ if (mbuf_is_consumed(zmbuf->mbuf)) {
+ update_shadow_used_ring_split(vq,
+ zmbuf->desc_idx, 0);
+ TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
+ restore_mbuf(zmbuf->mbuf);
+ rte_pktmbuf_free(zmbuf->mbuf);
+ put_zmbuf(zmbuf);
+ vq->nr_zmbuf -= 1;
+ }
+ }
+
+ flush_shadow_used_ring_split(dev, vq);
+ vhost_vring_call_split(dev, vq);
}
- vq = dev->virtqueue[queue_id];
+ rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
- if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+ free_entries = *((volatile uint16_t *)&vq->avail->idx) -
+ vq->last_avail_idx;
+ if (free_entries == 0)
return 0;
- if (unlikely(vq->enabled == 0))
- goto out_access_unlock;
+ VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
- vq->batch_copy_nb_elems = 0;
+ count = RTE_MIN(count, MAX_PKT_BURST);
+ count = RTE_MIN(count, free_entries);
+ VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
+ dev->vid, count);
- if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
- vhost_user_iotlb_rd_lock(vq);
+ for (i = 0; i < count; i++) {
+ struct buf_vector buf_vec[BUF_VECTOR_MAX];
+ uint16_t head_idx, dummy_len;
+ uint16_t nr_vec = 0;
+ int err;
- if (unlikely(vq->access_ok == 0))
- if (unlikely(vring_translate(dev, vq) < 0))
- goto out;
+ if (unlikely(fill_vec_buf_split(dev, vq,
+ vq->last_avail_idx + i,
+ &nr_vec, buf_vec,
+ &head_idx, &dummy_len,
+ VHOST_ACCESS_RO) < 0))
+ break;
+
+ if (likely(dev->dequeue_zero_copy == 0))
+ update_shadow_used_ring_split(vq, head_idx, 0);
+
+ rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
+
+ pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+ if (unlikely(pkts[i] == NULL)) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "Failed to allocate memory for mbuf.\n");
+ break;
+ }
+
+ err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+ mbuf_pool);
+ if (unlikely(err)) {
+ rte_pktmbuf_free(pkts[i]);
+ break;
+ }
+
+ if (unlikely(dev->dequeue_zero_copy)) {
+ struct zcopy_mbuf *zmbuf;
+
+ zmbuf = get_zmbuf(vq);
+ if (!zmbuf) {
+ rte_pktmbuf_free(pkts[i]);
+ break;
+ }
+ zmbuf->mbuf = pkts[i];
+ zmbuf->desc_idx = head_idx;
+
+ /*
+ * Pin lock the mbuf; we will check later to see
+ * whether the mbuf is freed (when we are the last
+ * user) or not. If that's the case, we then could
+ * update the used ring safely.
+ */
+ rte_mbuf_refcnt_update(pkts[i], 1);
+
+ vq->nr_zmbuf += 1;
+ TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
+ }
+ }
+ vq->last_avail_idx += i;
+
+ if (likely(dev->dequeue_zero_copy == 0)) {
+ do_data_copy_dequeue(vq);
+ if (unlikely(i < count))
+ vq->shadow_used_idx = i;
+ flush_shadow_used_ring_split(dev, vq);
+ vhost_vring_call_split(dev, vq);
+ }
+
+ return i;
+}
+
+static __rte_always_inline uint16_t
+virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+ uint16_t i;
+
+ rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
if (unlikely(dev->dequeue_zero_copy)) {
struct zcopy_mbuf *zmbuf, *next;
- int nr_updated = 0;
for (zmbuf = TAILQ_FIRST(&vq->zmbuf_list);
zmbuf != NULL; zmbuf = next) {
next = TAILQ_NEXT(zmbuf, next);
if (mbuf_is_consumed(zmbuf->mbuf)) {
- used_idx = vq->last_used_idx++ & (vq->size - 1);
- update_used_ring(dev, vq, used_idx,
- zmbuf->desc_idx);
- nr_updated += 1;
+ update_shadow_used_ring_packed(vq,
+ zmbuf->desc_idx,
+ 0,
+ zmbuf->desc_count);
TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next);
restore_mbuf(zmbuf->mbuf);
@@ -1478,122 +1473,46 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
}
}
- update_used_idx(dev, vq, nr_updated);
+ flush_shadow_used_ring_packed(dev, vq);
+ vhost_vring_call_packed(dev, vq);
}
- /*
- * Construct a RARP broadcast packet, and inject it to the "pkts"
- * array, to looks like that guest actually send such packet.
- *
- * Check user_send_rarp() for more information.
- *
- * broadcast_rarp shares a cacheline in the virtio_net structure
- * with some fields that are accessed during enqueue and
- * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
- * result in false sharing between enqueue and dequeue.
- *
- * Prevent unnecessary false sharing by reading broadcast_rarp first
- * and only performing cmpset if the read indicates it is likely to
- * be set.
- */
-
- if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
- rte_atomic16_cmpset((volatile uint16_t *)
- &dev->broadcast_rarp.cnt, 1, 0))) {
-
- rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
- if (rarp_mbuf == NULL) {
- RTE_LOG(ERR, VHOST_DATA,
- "Failed to make RARP packet.\n");
- return 0;
- }
- count -= 1;
- }
-
- free_entries = *((volatile uint16_t *)&vq->avail->idx) -
- vq->last_avail_idx;
- if (free_entries == 0)
- goto out;
-
VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
- /* Prefetch available and used ring */
- avail_idx = vq->last_avail_idx & (vq->size - 1);
- used_idx = vq->last_used_idx & (vq->size - 1);
- rte_prefetch0(&vq->avail->ring[avail_idx]);
- rte_prefetch0(&vq->used->ring[used_idx]);
-
count = RTE_MIN(count, MAX_PKT_BURST);
- count = RTE_MIN(count, free_entries);
VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n",
dev->vid, count);
- /* Retrieve all of the head indexes first to avoid caching issues. */
for (i = 0; i < count; i++) {
- avail_idx = (vq->last_avail_idx + i) & (vq->size - 1);
- used_idx = (vq->last_used_idx + i) & (vq->size - 1);
- desc_indexes[i] = vq->avail->ring[avail_idx];
-
- if (likely(dev->dequeue_zero_copy == 0))
- update_used_ring(dev, vq, used_idx, desc_indexes[i]);
- }
-
- /* Prefetch descriptor index. */
- rte_prefetch0(&vq->desc[desc_indexes[0]]);
- for (i = 0; i < count; i++) {
- struct vring_desc *desc, *idesc = NULL;
- uint16_t sz, idx;
- uint64_t dlen;
+ struct buf_vector buf_vec[BUF_VECTOR_MAX];
+ uint16_t buf_id, dummy_len;
+ uint16_t desc_count, nr_vec = 0;
int err;
- if (likely(i + 1 < count))
- rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
-
- if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
- dlen = vq->desc[desc_indexes[i]].len;
- desc = (struct vring_desc *)(uintptr_t)
- vhost_iova_to_vva(dev, vq,
- vq->desc[desc_indexes[i]].addr,
- &dlen,
- VHOST_ACCESS_RO);
- if (unlikely(!desc))
- break;
+ if (unlikely(fill_vec_buf_packed(dev, vq,
+ vq->last_avail_idx, &desc_count,
+ buf_vec, &nr_vec,
+ &buf_id, &dummy_len,
+ VHOST_ACCESS_RW) < 0))
+ break;
- if (unlikely(dlen < vq->desc[desc_indexes[i]].len)) {
- /*
- * The indirect desc table is not contiguous
- * in process VA space, we have to copy it.
- */
- idesc = alloc_copy_ind_table(dev, vq,
- &vq->desc[desc_indexes[i]]);
- if (unlikely(!idesc))
- break;
-
- desc = idesc;
- }
+ if (likely(dev->dequeue_zero_copy == 0))
+ update_shadow_used_ring_packed(vq, buf_id, 0,
+ desc_count);
- rte_prefetch0(desc);
- sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
- idx = 0;
- } else {
- desc = vq->desc;
- sz = vq->size;
- idx = desc_indexes[i];
- }
+ rte_prefetch0((void *)(uintptr_t)buf_vec[0].buf_addr);
pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
if (unlikely(pkts[i] == NULL)) {
RTE_LOG(ERR, VHOST_DATA,
"Failed to allocate memory for mbuf.\n");
- free_ind_table(idesc);
break;
}
- err = copy_desc_to_mbuf(dev, vq, desc, sz, pkts[i], idx,
- mbuf_pool);
+ err = copy_desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
+ mbuf_pool);
if (unlikely(err)) {
rte_pktmbuf_free(pkts[i]);
- free_ind_table(idesc);
break;
}
@@ -1603,11 +1522,11 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
zmbuf = get_zmbuf(vq);
if (!zmbuf) {
rte_pktmbuf_free(pkts[i]);
- free_ind_table(idesc);
break;
}
zmbuf->mbuf = pkts[i];
- zmbuf->desc_idx = desc_indexes[i];
+ zmbuf->desc_idx = buf_id;
+ zmbuf->desc_count = desc_count;
/*
* Pin lock the mbuf; we will check later to see
@@ -1621,17 +1540,102 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
}
- if (unlikely(!!idesc))
- free_ind_table(idesc);
+ vq->last_avail_idx += desc_count;
+ if (vq->last_avail_idx >= vq->size) {
+ vq->last_avail_idx -= vq->size;
+ vq->avail_wrap_counter ^= 1;
+ }
}
- vq->last_avail_idx += i;
if (likely(dev->dequeue_zero_copy == 0)) {
do_data_copy_dequeue(vq);
- vq->last_used_idx += i;
- update_used_idx(dev, vq, i);
+ if (unlikely(i < count))
+ vq->shadow_used_idx = i;
+ flush_shadow_used_ring_packed(dev, vq);
+ vhost_vring_call_packed(dev, vq);
+ }
+
+ return i;
+}
+
+uint16_t
+rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+ struct virtio_net *dev;
+ struct rte_mbuf *rarp_mbuf = NULL;
+ struct vhost_virtqueue *vq;
+
+ dev = get_device(vid);
+ if (!dev)
+ return 0;
+
+ if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "(%d) %s: built-in vhost net backend is disabled.\n",
+ dev->vid, __func__);
+ return 0;
+ }
+
+ if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
+ RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
+ dev->vid, __func__, queue_id);
+ return 0;
+ }
+
+ vq = dev->virtqueue[queue_id];
+
+ if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
+ return 0;
+
+ if (unlikely(vq->enabled == 0)) {
+ count = 0;
+ goto out_access_unlock;
}
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ vhost_user_iotlb_rd_lock(vq);
+
+ if (unlikely(vq->access_ok == 0))
+ if (unlikely(vring_translate(dev, vq) < 0)) {
+ count = 0;
+ goto out;
+ }
+
+ /*
+ * Construct a RARP broadcast packet, and inject it to the "pkts"
+ * array, to looks like that guest actually send such packet.
+ *
+ * Check user_send_rarp() for more information.
+ *
+ * broadcast_rarp shares a cacheline in the virtio_net structure
+ * with some fields that are accessed during enqueue and
+ * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
+ * result in false sharing between enqueue and dequeue.
+ *
+ * Prevent unnecessary false sharing by reading broadcast_rarp first
+ * and only performing cmpset if the read indicates it is likely to
+ * be set.
+ */
+ if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
+ rte_atomic16_cmpset((volatile uint16_t *)
+ &dev->broadcast_rarp.cnt, 1, 0))) {
+
+ rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
+ if (rarp_mbuf == NULL) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "Failed to make RARP packet.\n");
+ count = 0;
+ goto out;
+ }
+ count -= 1;
+ }
+
+ if (vq_is_packed(dev))
+ count = virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count);
+ else
+ count = virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count);
+
out:
if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
vhost_user_iotlb_rd_unlock(vq);
@@ -1644,10 +1648,10 @@ out_access_unlock:
* Inject it to the head of "pkts" array, so that switch's mac
* learning table will get updated first.
*/
- memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *));
+ memmove(&pkts[1], pkts, count * sizeof(struct rte_mbuf *));
pkts[0] = rarp_mbuf;
- i += 1;
+ count += 1;
}
- return i;
+ return count;
}
diff --git a/lib/meson.build b/lib/meson.build
index 9d11571f..eb91f100 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -9,7 +9,8 @@
# given as a dep, no need to mention ring. This is especially true for the
# core libs which are widely reused, so their deps are kept to a minimum.
libraries = [ 'compat', # just a header, used for versioning
- 'eal', 'ring', 'mempool', 'mbuf', 'net', 'kvargs', 'ethdev', 'pci', # core
+ 'kvargs',
+ 'eal', 'ring', 'mempool', 'mbuf', 'net', 'ethdev', 'pci', # core
'metrics', # bitrate/latency stats depends on this
'hash', # efd depends on this
'timer', # eventdev depends on this
@@ -25,6 +26,10 @@ libraries = [ 'compat', # just a header, used for versioning
# flow_classify lib depends on pkt framework table lib
'flow_classify', 'bpf']
+default_cflags = machine_args
+if cc.has_argument('-Wno-format-truncation')
+ default_cflags += '-Wno-format-truncation'
+endif
foreach l:libraries
build = true
name = l
@@ -33,7 +38,7 @@ foreach l:libraries
sources = []
headers = []
includes = []
- cflags = machine_args
+ cflags = default_cflags
objs = [] # other object files to link against, used e.g. for
# instruction-set optimized versions of code
@@ -41,9 +46,12 @@ foreach l:libraries
# external package/library requirements
ext_deps = []
deps = ['eal'] # eal is standard dependency except for itself
- if l == 'eal'
+ if l == 'kvargs'
deps = []
endif
+ if l == 'eal'
+ deps = ['kvargs']
+ endif
dir_name = 'librte_' + l
subdir(dir_name)
@@ -63,6 +71,10 @@ foreach l:libraries
shared_deps = ext_deps
static_deps = ext_deps
foreach d:deps
+ if not is_variable('shared_rte_' + d)
+ error('Missing dependency ' + d +
+ ' for library ' + lib_name)
+ endif
shared_deps += [get_variable('shared_rte_' + d)]
static_deps += [get_variable('static_rte_' + d)]
endforeach